diff --git a/doc/classes/ProjectSettings.xml b/doc/classes/ProjectSettings.xml index 1c7525b3afa4..23e03043ec60 100644 --- a/doc/classes/ProjectSettings.xml +++ b/doc/classes/ProjectSettings.xml @@ -3362,6 +3362,9 @@ A larger number is more efficient up to a limit, after that it will only waste RAM (maximum efficiency is achieved when there is no more than 1 pool per frame). A small number could end up with one pool per descriptor, which negatively impacts performance. [b]Note:[/b] Changing this property requires a restart to take effect. + + Determines whether to automatically generate a reactive mask for FSR 2/3. Setting this to [code]true[/code] will reduce flickering when upscaling transparent objects, at a cost of one more rendering pass. + Determines how sharp the upscaled image will be when using the FSR upscaling mode. Sharpness halves with every whole number. Values go from 0.0 (sharpest) to 2.0. Values above 2.0 won't make a visible difference. diff --git a/doc/classes/RenderSceneBuffersConfiguration.xml b/doc/classes/RenderSceneBuffersConfiguration.xml index b5c37f3288f9..37ee668eef20 100644 --- a/doc/classes/RenderSceneBuffersConfiguration.xml +++ b/doc/classes/RenderSceneBuffersConfiguration.xml @@ -12,6 +12,9 @@ Level of the anisotropic filter. + + If [code]true[/code], automatically generate a reactive mask for FSR 2/3. + FSR Sharpness applicable if FSR upscaling is used. diff --git a/doc/classes/RenderSceneBuffersExtension.xml b/doc/classes/RenderSceneBuffersExtension.xml index fa3efbcaa0ef..20e799e57c2b 100644 --- a/doc/classes/RenderSceneBuffersExtension.xml +++ b/doc/classes/RenderSceneBuffersExtension.xml @@ -23,6 +23,13 @@ Implement this in GDExtension to change the anisotropic filtering level. + + + + + Implement this in GDExtension to record a new FSR auto generate reactive value. + + diff --git a/doc/classes/RenderSceneBuffersRD.xml b/doc/classes/RenderSceneBuffersRD.xml index 6a9445a25edf..6aee5ad0ec83 100644 --- a/doc/classes/RenderSceneBuffersRD.xml +++ b/doc/classes/RenderSceneBuffersRD.xml @@ -90,6 +90,12 @@ If [param msaa] is [code]true[/code] and MSAA is enabled, this returns the MSAA variant of the buffer. + + + + Returns [code]true[/code] if FSR automatically generates reactive mask during upscaling, [code]false[/code] otherwise. + + diff --git a/doc/classes/RenderingServer.xml b/doc/classes/RenderingServer.xml index e1f7a53302b8..1ab0f567bfc9 100644 --- a/doc/classes/RenderingServer.xml +++ b/doc/classes/RenderingServer.xml @@ -4098,6 +4098,14 @@ Sets the viewport's environment mode which allows enabling or disabling rendering of 3D environment over 2D canvas. When disabled, 2D will not be affected by the environment. When enabled, 2D will be affected by the environment if the environment background mode is [constant ENV_BG_CANVAS]. The default behavior is to inherit the setting from the viewport's parent. If the topmost parent is also set to [constant VIEWPORT_ENVIRONMENT_INHERIT], then the behavior will be the same as if it was set to [constant VIEWPORT_ENVIRONMENT_ENABLED]. + + + + + + Determines whether to automatically generate a reactive mask for FSR 2/3. Setting this to [code]true[/code] will reduce flickering when upscaling transparent objects, at a cost of one more rendering pass. + + @@ -5100,21 +5108,24 @@ Use bilinear scaling for the viewport's 3D buffer. The amount of scaling can be set using [member Viewport.scaling_3d_scale]. Values less than [code]1.0[/code] will result in undersampling while values greater than [code]1.0[/code] will result in supersampling. A value of [code]1.0[/code] disables scaling. - - Use AMD FidelityFX Super Resolution 1.0 upscaling for the viewport's 3D buffer. The amount of scaling can be set using [member Viewport.scaling_3d_scale]. Values less than [code]1.0[/code] will result in the viewport being upscaled using FSR. Values greater than [code]1.0[/code] are not supported and bilinear downsampling will be used instead. A value of [code]1.0[/code] disables scaling. + + Use AMD FidelityFX Super Resolution 1.2 upscaling for the viewport's 3D buffer. The amount of scaling can be set using [member Viewport.scaling_3d_scale]. Values less than [code]1.0[/code] will result in the viewport being upscaled using FSR1. Values greater than [code]1.0[/code] are not supported and bilinear downsampling will be used instead. A value of [code]1.0[/code] disables scaling. - Use AMD FidelityFX Super Resolution 2.2 upscaling for the viewport's 3D buffer. The amount of scaling can be set using [member Viewport.scaling_3d_scale]. Values less than [code]1.0[/code] will result in the viewport being upscaled using FSR2. Values greater than [code]1.0[/code] are not supported and bilinear downsampling will be used instead. A value of [code]1.0[/code] will use FSR2 at native resolution as a TAA solution. + Use AMD FidelityFX Super Resolution 2.3 upscaling for the viewport's 3D buffer. The amount of scaling can be set using [member Viewport.scaling_3d_scale]. Values less than [code]1.0[/code] will result in the viewport being upscaled using FSR2. Values greater than [code]1.0[/code] are not supported and bilinear downsampling will be used instead. A value of [code]1.0[/code] will use FSR2 at native resolution as a TAA solution. + + + Use AMD FidelityFX Super Resolution 3.1 upscaling for the viewport's 3D buffer. The amount of scaling can be set using [member Viewport.scaling_3d_scale]. Values less than [code]1.0[/code] will result in the viewport being upscaled using FSR3. Values greater than [code]1.0[/code] are not supported and bilinear downsampling will be used instead. A value of [code]1.0[/code] will use FSR3 at native resolution as a TAA solution. - + Use MetalFX spatial upscaling for the viewport's 3D buffer. The amount of scaling can be set using [member Viewport.scaling_3d_scale]. Values less than [code]1.0[/code] will result in the viewport being upscaled using MetalFX. Values greater than [code]1.0[/code] are not supported and bilinear downsampling will be used instead. A value of [code]1.0[/code] disables scaling. [b]Note:[/b] Only supported when the Metal rendering driver is in use, which limits this scaling mode to macOS and iOS. - + Use MetalFX temporal upscaling for the viewport's 3D buffer. The amount of scaling can be set using [member Viewport.scaling_3d_scale]. Values less than [code]1.0[/code] will result in the viewport being upscaled using MetalFX. Values greater than [code]1.0[/code] are not supported and bilinear downsampling will be used instead. A value of [code]1.0[/code] will use MetalFX at native resolution as a TAA solution. [b]Note:[/b] Only supported when the Metal rendering driver is in use, which limits this scaling mode to macOS and iOS. - + Represents the size of the [enum ViewportScaling3DMode] enum. diff --git a/doc/classes/Viewport.xml b/doc/classes/Viewport.xml index 7832cc844c86..f90777b4504b 100644 --- a/doc/classes/Viewport.xml +++ b/doc/classes/Viewport.xml @@ -343,6 +343,10 @@ Disable 3D rendering (but keep 2D rendering). + + Determines whether FSR 2/3 runs an additional pass to generate an reactive mask. Enabling this will increase GPU time, but provide better upscaling results for transparent objects. + To control this property on the root viewport, set the [member ProjectSettings.rendering/scaling_3d/fsr_auto_generate_reactive] project setting. + Determines how sharp the upscaled image will be when using the FSR upscaling mode. Sharpness halves with every whole number. Values go from 0.0 (sharpest) to 2.0. Values above 2.0 won't make a visible difference. To control this property on the root viewport, set the [member ProjectSettings.rendering/scaling_3d/fsr_sharpness] project setting. @@ -559,21 +563,23 @@ Use AMD FidelityFX Super Resolution 2.2 upscaling for the viewport's 3D buffer. The amount of scaling can be set using [member Viewport.scaling_3d_scale]. Values less than [code]1.0[/code] will result in the viewport being upscaled using FSR2. Values greater than [code]1.0[/code] are not supported and bilinear downsampling will be used instead. A value of [code]1.0[/code] will use FSR2 at native resolution as a TAA solution. - + + + Use the [url=https://developer.apple.com/documentation/metalfx/mtlfxspatialscaler#overview]MetalFX spatial upscaler[/url] for the viewport's 3D buffer. The amount of scaling can be set using [member scaling_3d_scale]. Values less than [code]1.0[/code] will result in the viewport being upscaled using MetalFX. Values greater than [code]1.0[/code] are not supported and bilinear downsampling will be used instead. A value of [code]1.0[/code] disables scaling. More information: [url=https://developer.apple.com/documentation/metalfx]MetalFX[/url]. [b]Note:[/b] Only supported when the Metal rendering driver is in use, which limits this scaling mode to macOS and iOS. - + Use the [url=https://developer.apple.com/documentation/metalfx/mtlfxtemporalscaler#overview]MetalFX temporal upscaler[/url] for the viewport's 3D buffer. The amount of scaling can be set using [member scaling_3d_scale]. To determine the minimum input scale, use the [method RenderingDevice.limit_get] method with [constant RenderingDevice.LIMIT_METALFX_TEMPORAL_SCALER_MIN_SCALE]. Values less than [code]1.0[/code] will result in the viewport being upscaled using MetalFX. Values greater than [code]1.0[/code] are not supported and bilinear downsampling will be used instead. A value of [code]1.0[/code] will use MetalFX at native resolution as a TAA solution. More information: [url=https://developer.apple.com/documentation/metalfx]MetalFX[/url]. [b]Note:[/b] Only supported when the Metal rendering driver is in use, which limits this scaling mode to macOS and iOS. - + Represents the size of the [enum Scaling3DMode] enum. diff --git a/drivers/gles3/storage/render_scene_buffers_gles3.h b/drivers/gles3/storage/render_scene_buffers_gles3.h index f98454f3504c..b8bda7953cf4 100644 --- a/drivers/gles3/storage/render_scene_buffers_gles3.h +++ b/drivers/gles3/storage/render_scene_buffers_gles3.h @@ -104,6 +104,7 @@ class RenderSceneBuffersGLES3 : public RenderSceneBuffers { virtual void set_anisotropic_filtering_level(RS::ViewportAnisotropicFiltering p_anisotropic_filtering_level) override {} virtual void set_fsr_sharpness(float p_fsr_sharpness) override {} + virtual void set_fsr_auto_generate_reactive(bool p_fsr_auto_generate_reactive) override {} virtual void set_texture_mipmap_bias(float p_texture_mipmap_bias) override {} virtual void set_use_debanding(bool p_use_debanding) override {} void set_apply_color_adjustments_in_post(bool p_apply_in_post); diff --git a/editor/scene/3d/node_3d_editor_plugin.cpp b/editor/scene/3d/node_3d_editor_plugin.cpp index 98d6b9f9503e..3c049df0f004 100644 --- a/editor/scene/3d/node_3d_editor_plugin.cpp +++ b/editor/scene/3d/node_3d_editor_plugin.cpp @@ -3164,6 +3164,9 @@ void Node3DEditorViewport::_project_settings_changed() { const float fsr_sharpness = GLOBAL_GET("rendering/scaling_3d/fsr_sharpness"); viewport->set_fsr_sharpness(fsr_sharpness); + const bool fsr_auto_generate_reactive = GLOBAL_GET("rendering/scaling_3d/fsr_auto_generate_reactive"); + viewport->set_fsr_auto_generate_reactive(fsr_auto_generate_reactive); + const float texture_mipmap_bias = GLOBAL_GET("rendering/textures/default_filters/texture_mipmap_bias"); viewport->set_texture_mipmap_bias(texture_mipmap_bias); diff --git a/scene/main/viewport.cpp b/scene/main/viewport.cpp index c61932035ec9..831b6e43bccb 100644 --- a/scene/main/viewport.cpp +++ b/scene/main/viewport.cpp @@ -4890,6 +4890,21 @@ float Viewport::get_fsr_sharpness() const { return fsr_sharpness; } +void Viewport::set_fsr_auto_generate_reactive(bool p_fsr_auto_generate_reactive) { + ERR_MAIN_THREAD_GUARD; + if (fsr_auto_generate_reactive == p_fsr_auto_generate_reactive) { + return; + } + + fsr_auto_generate_reactive = p_fsr_auto_generate_reactive; + RS::get_singleton()->viewport_set_fsr_auto_generate_reactive(viewport, p_fsr_auto_generate_reactive); +} + +bool Viewport::get_fsr_auto_generate_reactive() const { + ERR_READ_THREAD_GUARD_V(0); + return fsr_auto_generate_reactive; +} + void Viewport::set_texture_mipmap_bias(float p_texture_mipmap_bias) { ERR_MAIN_THREAD_GUARD; if (texture_mipmap_bias == p_texture_mipmap_bias) { @@ -5123,6 +5138,9 @@ void Viewport::_bind_methods() { ClassDB::bind_method(D_METHOD("set_fsr_sharpness", "fsr_sharpness"), &Viewport::set_fsr_sharpness); ClassDB::bind_method(D_METHOD("get_fsr_sharpness"), &Viewport::get_fsr_sharpness); + ClassDB::bind_method(D_METHOD("set_fsr_auto_generate_reactive", "fsr_auto_generate_reactive"), &Viewport::set_fsr_auto_generate_reactive); + ClassDB::bind_method(D_METHOD("get_fsr_auto_generate_reactive"), &Viewport::get_fsr_auto_generate_reactive); + ClassDB::bind_method(D_METHOD("set_texture_mipmap_bias", "texture_mipmap_bias"), &Viewport::set_texture_mipmap_bias); ClassDB::bind_method(D_METHOD("get_texture_mipmap_bias"), &Viewport::get_texture_mipmap_bias); @@ -5163,11 +5181,12 @@ void Viewport::_bind_methods() { #ifndef _3D_DISABLED ADD_GROUP("Scaling 3D", ""); - ADD_PROPERTY(PropertyInfo(Variant::INT, "scaling_3d_mode", PROPERTY_HINT_ENUM, "Bilinear (Fastest),FSR 1.0 (Fast),FSR 2.2 (Slow),MetalFX (Spatial),MetalFX (Temporal)"), "set_scaling_3d_mode", "get_scaling_3d_mode"); + ADD_PROPERTY(PropertyInfo(Variant::INT, "scaling_3d_mode", PROPERTY_HINT_ENUM, "Bilinear (Fastest),FSR 1.2 (Fast),FSR 2.3 (Slow),FSR 3.1 (Slow),MetalFX (Spatial),MetalFX (Temporal)"), "set_scaling_3d_mode", "get_scaling_3d_mode"); ADD_PROPERTY(PropertyInfo(Variant::FLOAT, "scaling_3d_scale", PROPERTY_HINT_RANGE, "0.25,2.0,0.01"), "set_scaling_3d_scale", "get_scaling_3d_scale"); ADD_PROPERTY(PropertyInfo(Variant::FLOAT, "texture_mipmap_bias", PROPERTY_HINT_RANGE, "-2,2,0.001"), "set_texture_mipmap_bias", "get_texture_mipmap_bias"); ADD_PROPERTY(PropertyInfo(Variant::INT, "anisotropic_filtering_level", PROPERTY_HINT_ENUM, String::utf8("Disabled (Fastest),2× (Faster),4× (Fast),8× (Average),16x (Slow)")), "set_anisotropic_filtering_level", "get_anisotropic_filtering_level"); ADD_PROPERTY(PropertyInfo(Variant::FLOAT, "fsr_sharpness", PROPERTY_HINT_RANGE, "0,2,0.1"), "set_fsr_sharpness", "get_fsr_sharpness"); + ADD_PROPERTY(PropertyInfo(Variant::BOOL, "fsr_auto_generate_reactive"), "set_fsr_auto_generate_reactive", "get_fsr_auto_generate_reactive"); ADD_GROUP("Variable Rate Shading", "vrs_"); ADD_PROPERTY(PropertyInfo(Variant::INT, "vrs_mode", PROPERTY_HINT_ENUM, "Disabled,Texture,XR"), "set_vrs_mode", "get_vrs_mode"); ADD_PROPERTY(PropertyInfo(Variant::INT, "vrs_update_mode", PROPERTY_HINT_ENUM, "Disabled,Once,Always"), "set_vrs_update_mode", "get_vrs_update_mode"); @@ -5224,6 +5243,7 @@ void Viewport::_bind_methods() { BIND_ENUM_CONSTANT(SCALING_3D_MODE_BILINEAR); BIND_ENUM_CONSTANT(SCALING_3D_MODE_FSR); BIND_ENUM_CONSTANT(SCALING_3D_MODE_FSR2); + BIND_ENUM_CONSTANT(SCALING_3D_MODE_FSR3); BIND_ENUM_CONSTANT(SCALING_3D_MODE_METALFX_SPATIAL); BIND_ENUM_CONSTANT(SCALING_3D_MODE_METALFX_TEMPORAL); BIND_ENUM_CONSTANT(SCALING_3D_MODE_MAX); @@ -5369,6 +5389,7 @@ Viewport::Viewport() { set_scaling_3d_mode((Viewport::Scaling3DMode)(int)GLOBAL_GET("rendering/scaling_3d/mode")); set_scaling_3d_scale(GLOBAL_GET("rendering/scaling_3d/scale")); set_fsr_sharpness((float)GLOBAL_GET("rendering/scaling_3d/fsr_sharpness")); + set_fsr_auto_generate_reactive((bool)GLOBAL_GET("rendering/scaling_3d/fsr_auto_generate_reactive")); set_texture_mipmap_bias((float)GLOBAL_GET("rendering/textures/default_filters/texture_mipmap_bias")); set_anisotropic_filtering_level((Viewport::AnisotropicFiltering)(int)GLOBAL_GET("rendering/textures/default_filters/anisotropic_filtering_level")); #endif // _3D_DISABLED diff --git a/scene/main/viewport.h b/scene/main/viewport.h index cfd99df7fcfa..833b8c825cac 100644 --- a/scene/main/viewport.h +++ b/scene/main/viewport.h @@ -100,6 +100,7 @@ class Viewport : public Node { SCALING_3D_MODE_BILINEAR, SCALING_3D_MODE_FSR, SCALING_3D_MODE_FSR2, + SCALING_3D_MODE_FSR3, SCALING_3D_MODE_METALFX_SPATIAL, SCALING_3D_MODE_METALFX_TEMPORAL, SCALING_3D_MODE_MAX @@ -314,6 +315,7 @@ class Viewport : public Node { Scaling3DMode scaling_3d_mode = SCALING_3D_MODE_BILINEAR; float scaling_3d_scale = 1.0; float fsr_sharpness = 0.2f; + bool fsr_auto_generate_reactive = false; float texture_mipmap_bias = 0.0f; AnisotropicFiltering anisotropic_filtering_level = ANISOTROPY_4X; bool use_debanding = false; @@ -590,6 +592,9 @@ class Viewport : public Node { void set_fsr_sharpness(float p_fsr_sharpness); float get_fsr_sharpness() const; + void set_fsr_auto_generate_reactive(bool p_fsr_auto_generate_reactive); + bool get_fsr_auto_generate_reactive() const; + void set_texture_mipmap_bias(float p_texture_mipmap_bias); float get_texture_mipmap_bias() const; diff --git a/servers/rendering/renderer_rd/effects/SCsub b/servers/rendering/renderer_rd/effects/SCsub index 30656a4225b7..329fcc0e32b8 100644 --- a/servers/rendering/renderer_rd/effects/SCsub +++ b/servers/rendering/renderer_rd/effects/SCsub @@ -11,9 +11,11 @@ env_effects = env.Clone() thirdparty_obj = [] -thirdparty_dir = "#thirdparty/amd-fsr2/" -thirdparty_sources = ["ffx_assert.cpp", "ffx_fsr2.cpp"] -thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources] +thirdparty_dir = "#thirdparty/amd-ffx/" +thirdparty_sources = thirdparty_dir + "*.cpp" + +if env.dev_build: + env_effects.Append(CPPDEFINES=["FFX_DEBUG"]) env_effects.Prepend(CPPPATH=[thirdparty_dir]) @@ -69,6 +71,7 @@ env.servers_sources += thirdparty_obj module_obj = [] env_effects.add_source_files(module_obj, "*.cpp") +env_effects.add_source_files(module_obj, "ffx/*.cpp") if env["metal"]: env_effects.add_source_files(module_obj, "metal_fx.mm") env.servers_sources += module_obj diff --git a/servers/rendering/renderer_rd/effects/ffx/ffx_common.cpp b/servers/rendering/renderer_rd/effects/ffx/ffx_common.cpp new file mode 100644 index 000000000000..97e8d55a91b8 --- /dev/null +++ b/servers/rendering/renderer_rd/effects/ffx/ffx_common.cpp @@ -0,0 +1,645 @@ +/**************************************************************************/ +/* ffx_common.cpp */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#include "ffx_common.h" + +#include "servers/rendering/renderer_rd/uniform_set_cache_rd.h" +#include "servers/rendering/rendering_device_commons.h" +#include "thirdparty/amd-ffx/ffx_fsr1.h" +#include "thirdparty/amd-ffx/ffx_fsr2.h" +#include "thirdparty/amd-ffx/ffx_fsr3upscaler.h" + +using namespace RendererRD; + +RD::TextureType FFXCommon::ffx_resource_type_to_rd_texture_type(FfxResourceType p_type) { + switch (p_type) { + case FFX_RESOURCE_TYPE_TEXTURE1D: + return RD::TEXTURE_TYPE_1D; + case FFX_RESOURCE_TYPE_TEXTURE2D: + return RD::TEXTURE_TYPE_2D; + case FFX_RESOURCE_TYPE_TEXTURE3D: + return RD::TEXTURE_TYPE_3D; + default: +#ifdef DEV_ENABLED + ERR_PRINT("Unknown FFX resource type."); +#endif + return RD::TEXTURE_TYPE_MAX; + } +} + +FfxResourceType FFXCommon::rd_texture_type_to_ffx_resource_type(RD::TextureType p_type) { + switch (p_type) { + case RD::TEXTURE_TYPE_1D: + return FFX_RESOURCE_TYPE_TEXTURE1D; + case RD::TEXTURE_TYPE_2D: + return FFX_RESOURCE_TYPE_TEXTURE2D; + case RD::TEXTURE_TYPE_3D: + return FFX_RESOURCE_TYPE_TEXTURE3D; + default: +#ifdef DEV_ENABLED + ERR_PRINT("Unknown FFX resource type."); +#endif + return FFX_RESOURCE_TYPE_BUFFER; + } +} + +RD::DataFormat FFXCommon::ffx_surface_format_to_rd_format(FfxSurfaceFormat p_format) { + switch (p_format) { + case FFX_SURFACE_FORMAT_R32G32B32A32_TYPELESS: + return RD::DATA_FORMAT_R32G32B32A32_SFLOAT; + case FFX_SURFACE_FORMAT_R32G32B32A32_FLOAT: + return RD::DATA_FORMAT_R32G32B32A32_SFLOAT; + case FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT: + return RD::DATA_FORMAT_R16G16B16A16_SFLOAT; + case FFX_SURFACE_FORMAT_R32G32_FLOAT: + return RD::DATA_FORMAT_R32G32_SFLOAT; + case FFX_SURFACE_FORMAT_R32_UINT: + return RD::DATA_FORMAT_R32_UINT; + case FFX_SURFACE_FORMAT_R8G8B8A8_TYPELESS: + return RD::DATA_FORMAT_R8G8B8A8_UNORM; + case FFX_SURFACE_FORMAT_R8G8B8A8_UNORM: + return RD::DATA_FORMAT_R8G8B8A8_UNORM; + case FFX_SURFACE_FORMAT_R11G11B10_FLOAT: + return RD::DATA_FORMAT_B10G11R11_UFLOAT_PACK32; + case FFX_SURFACE_FORMAT_R16G16_FLOAT: + return RD::DATA_FORMAT_R16G16_SFLOAT; + case FFX_SURFACE_FORMAT_R16G16_UINT: + return RD::DATA_FORMAT_R16G16_UINT; + case FFX_SURFACE_FORMAT_R16_FLOAT: + return RD::DATA_FORMAT_R16_SFLOAT; + case FFX_SURFACE_FORMAT_R16_UINT: + return RD::DATA_FORMAT_R16_UINT; + case FFX_SURFACE_FORMAT_R16_UNORM: + return RD::DATA_FORMAT_R16_UNORM; + case FFX_SURFACE_FORMAT_R16_SNORM: + return RD::DATA_FORMAT_R16_SNORM; + case FFX_SURFACE_FORMAT_R8_UNORM: + return RD::DATA_FORMAT_R8_UNORM; + case FFX_SURFACE_FORMAT_R8_UINT: + return RD::DATA_FORMAT_R8_UINT; + case FFX_SURFACE_FORMAT_R8G8_UNORM: + return RD::DATA_FORMAT_R8G8_UNORM; + case FFX_SURFACE_FORMAT_R32_FLOAT: + return RD::DATA_FORMAT_R32_SFLOAT; + default: +#ifdef DEV_ENABLED + ERR_PRINT("Unknown FFX resource type."); +#endif + return RD::DATA_FORMAT_MAX; + } +} + +FfxSurfaceFormat FFXCommon::rd_format_to_ffx_surface_format(RD::DataFormat p_format) { + switch (p_format) { + case RD::DATA_FORMAT_R32G32B32A32_SFLOAT: + return FFX_SURFACE_FORMAT_R32G32B32A32_FLOAT; + case RD::DATA_FORMAT_R16G16B16A16_SFLOAT: + return FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT; + case RD::DATA_FORMAT_R32G32_SFLOAT: + return FFX_SURFACE_FORMAT_R32G32_FLOAT; + case RD::DATA_FORMAT_R32_UINT: + return FFX_SURFACE_FORMAT_R32_UINT; + case RD::DATA_FORMAT_R8G8B8A8_UNORM: + return FFX_SURFACE_FORMAT_R8G8B8A8_UNORM; + case RD::DATA_FORMAT_B10G11R11_UFLOAT_PACK32: + return FFX_SURFACE_FORMAT_R11G11B10_FLOAT; + case RD::DATA_FORMAT_R16G16_SFLOAT: + return FFX_SURFACE_FORMAT_R16G16_FLOAT; + case RD::DATA_FORMAT_R16G16_UINT: + return FFX_SURFACE_FORMAT_R16G16_UINT; + case RD::DATA_FORMAT_R16_SFLOAT: + return FFX_SURFACE_FORMAT_R16_FLOAT; + case RD::DATA_FORMAT_R16_UINT: + return FFX_SURFACE_FORMAT_R16_UINT; + case RD::DATA_FORMAT_R16_UNORM: + return FFX_SURFACE_FORMAT_R16_UNORM; + case RD::DATA_FORMAT_R16_SNORM: + return FFX_SURFACE_FORMAT_R16_SNORM; + case RD::DATA_FORMAT_R8_UNORM: + return FFX_SURFACE_FORMAT_R8_UNORM; + case RD::DATA_FORMAT_R8_UINT: + return FFX_SURFACE_FORMAT_R8_UINT; + case RD::DATA_FORMAT_R8G8_UNORM: + return FFX_SURFACE_FORMAT_R8G8_UNORM; + case RD::DATA_FORMAT_R32_SFLOAT: + return FFX_SURFACE_FORMAT_R32_FLOAT; + default: + return FFX_SURFACE_FORMAT_UNKNOWN; + } +} + +static uint32_t ffx_usage_to_rd_usage_flags(uint32_t p_flags) { + uint32_t ret = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT; + + if (p_flags & FFX_RESOURCE_USAGE_RENDERTARGET) { + ret |= RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT; + } + + if (p_flags & FFX_RESOURCE_USAGE_UAV) { + ret |= RD::TEXTURE_USAGE_STORAGE_BIT; + ret |= RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT; + ret |= RD::TEXTURE_USAGE_CAN_COPY_TO_BIT; + } + + return ret; +} + +static FfxVersionNumber get_sdk_version_rd(FfxInterface *p_backend_interface) { + return FFX_SDK_MAKE_VERSION(FFX_SDK_VERSION_MAJOR, FFX_SDK_VERSION_MINOR, FFX_SDK_VERSION_PATCH); +} + +static FfxErrorCode create_backend_context_rd(FfxInterface *p_backend_interface, FfxEffect p_effect, + FfxEffectBindlessConfig *p_bindless_config, FfxUInt32 *p_effect_context_id) { + FFXCommon::Scratch &scratch = *reinterpret_cast(p_backend_interface->scratchBuffer); + + if (p_bindless_config) { + WARN_PRINT_ONCE("Fidelity FX: Bindless resources are not supported in Godot."); + } + + // Store pointer to the device common to all contexts. + scratch.device = p_backend_interface->device; + scratch.effect_type = p_effect; + scratch.staging_constant_buffer = (uint8_t *)memalloc(FFX_STAGING_CONSTANT_BUFFER_SIZE); + ERR_FAIL_NULL_V(scratch.staging_constant_buffer, FFX_ERROR_OUT_OF_MEMORY); + + // Create a ring buffer of uniform buffers. + // FIXME: This could be optimized to be a single memory block if it was possible for RD to create views into a particular memory range of a UBO. + for (uint32_t i = 0; i < FFX_UBO_RING_BUFFER_SIZE; i++) { + scratch.ubo_ring_buffer[i] = RD::get_singleton()->uniform_buffer_create(FFX_BUFFER_SIZE); + ERR_FAIL_COND_V(scratch.ubo_ring_buffer[i].is_null(), FFX_ERROR_BACKEND_API_ERROR); + } + + return FFX_OK; +} + +static FfxErrorCode get_device_capabilities_rd(FfxInterface *p_backend_interface, FfxDeviceCapabilities *p_out_device_capabilities) { + *p_out_device_capabilities = FFXCommon::get_device_capabilities(); + + return FFX_OK; +} + +static FfxErrorCode destroy_backend_context_rd(FfxInterface *p_backend_interface, FfxUInt32 effect_context_id) { + FFXCommon::Scratch &scratch = *reinterpret_cast(p_backend_interface->scratchBuffer); + if (scratch.staging_constant_buffer) { + memfree(scratch.staging_constant_buffer); + } + + for (uint32_t i = 0; i < FFX_UBO_RING_BUFFER_SIZE; i++) { + RD::get_singleton()->free_rid(scratch.ubo_ring_buffer[i]); + } + + return FFX_OK; +} + +static FfxErrorCode create_resource_rd(FfxInterface *p_backend_interface, const FfxCreateResourceDescription *p_create_resource_description, FfxUInt32 effect_context_id, FfxResourceInternal *p_out_resource) { + // FSR2's base implementation won't issue a call to create a heap type that isn't just default on its own, + // so we can safely ignore it as RD does not expose this concept. + ERR_FAIL_COND_V(p_create_resource_description->heapType != FFX_HEAP_TYPE_DEFAULT, FFX_ERROR_INVALID_ARGUMENT); + + RenderingDevice *rd = RD::get_singleton(); + FFXCommon::Scratch &scratch = *reinterpret_cast(p_backend_interface->scratchBuffer); + FfxResourceDescription res_desc = p_create_resource_description->resourceDescription; + + // FSR2's base implementation never requests buffer creation. + ERR_FAIL_COND_V(res_desc.type != FFX_RESOURCE_TYPE_TEXTURE1D && res_desc.type != FFX_RESOURCE_TYPE_TEXTURE2D && res_desc.type != FFX_RESOURCE_TYPE_TEXTURE3D, FFX_ERROR_INVALID_ARGUMENT); + + if (res_desc.mipCount == 0) { + // Mipmap count must be derived from the resource's dimensions. + res_desc.mipCount = uint32_t(1 + std::floor(std::log2(MAX(MAX(res_desc.width, res_desc.height), res_desc.depth)))); + } + + Vector initial_data; + if (p_create_resource_description->initData.type != FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED) { + PackedByteArray byte_array; + byte_array.resize(p_create_resource_description->initData.size); + switch (p_create_resource_description->initData.type) { + case FFX_RESOURCE_INIT_DATA_TYPE_BUFFER: + memcpy(byte_array.ptrw(), p_create_resource_description->initData.buffer, p_create_resource_description->initData.size); + break; + case FFX_RESOURCE_INIT_DATA_TYPE_VALUE: + memcpy(byte_array.ptrw(), &p_create_resource_description->initData.value, p_create_resource_description->initData.size); + break; + default: + ERR_PRINT("Invalid initial data type. "); + break; + } + initial_data.push_back(byte_array); + } + + RD::TextureFormat texture_format; + texture_format.texture_type = FFXCommon::ffx_resource_type_to_rd_texture_type(res_desc.type); + texture_format.format = FFXCommon::ffx_surface_format_to_rd_format(res_desc.format); + texture_format.usage_bits = ffx_usage_to_rd_usage_flags(p_create_resource_description->resourceDescription.usage); + texture_format.width = res_desc.width; + texture_format.height = res_desc.height; + texture_format.depth = res_desc.depth; + texture_format.mipmaps = res_desc.mipCount; + texture_format.is_discardable = true; + + RID texture = rd->texture_create(texture_format, RD::TextureView(), initial_data); + ERR_FAIL_COND_V(texture.is_null(), FFX_ERROR_BACKEND_API_ERROR); + + rd->set_resource_name(texture, String(p_create_resource_description->name)); + + // Add the resource to the storage and use the internal index to reference it. + p_out_resource->internalIndex = scratch.resources.add(texture, false, p_create_resource_description->id, res_desc); + + return FFX_OK; +} + +static FfxErrorCode register_resource_rd(FfxInterface *p_backend_interface, const FfxResource *p_in_resource, FfxUInt32 effect_context_id, FfxResourceInternal *p_out_resource) { + if (p_in_resource->resource == nullptr) { + // Null resource case. + p_out_resource->internalIndex = -1; + return FFX_OK; + } + + FFXCommon::Scratch &scratch = *reinterpret_cast(p_backend_interface->scratchBuffer); + const RID &rid = *reinterpret_cast(p_in_resource->resource); + ERR_FAIL_COND_V(rid.is_null(), FFX_ERROR_INVALID_ARGUMENT); + + // Add the resource to the storage and use the internal index to reference it. + p_out_resource->internalIndex = scratch.resources.add(rid, true, FFXCommon::RESOURCE_ID_DYNAMIC, p_in_resource->description); + + return FFX_OK; +} + +static FfxErrorCode unregister_resources_rd(FfxInterface *p_backend_interface, FfxCommandList p_command_list, FfxUInt32 effect_context_id) { + FFXCommon::Scratch &scratch = *reinterpret_cast(p_backend_interface->scratchBuffer); + LocalVector dynamic_list_copy = scratch.resources.dynamic_list; + for (uint32_t i : dynamic_list_copy) { + scratch.resources.remove(i); + } + + return FFX_OK; +} + +static FfxResourceDescription get_resource_description_rd(FfxInterface *p_backend_interface, FfxResourceInternal p_resource) { + if (p_resource.internalIndex != -1) { + FFXCommon::Scratch &scratch = *reinterpret_cast(p_backend_interface->scratchBuffer); + return scratch.resources.descriptions[p_resource.internalIndex]; + } else { + return {}; + } +} + +static FfxErrorCode destroy_resource_rd(FfxInterface *p_backend_interface, FfxResourceInternal p_resource, FfxUInt32 effect_context_id) { + if (p_resource.internalIndex != -1) { + FFXCommon::Scratch &scratch = *reinterpret_cast(p_backend_interface->scratchBuffer); + if (scratch.resources.rids[p_resource.internalIndex].is_valid()) { + RD::get_singleton()->free_rid(scratch.resources.rids[p_resource.internalIndex]); + scratch.resources.remove(p_resource.internalIndex); + } + } + + return FFX_OK; +} + +static FfxErrorCode create_pipeline_rd(FfxInterface *p_backend_interface, FfxEffect p_effect, FfxPass p_pass, uint32_t p_permutation_options, const FfxPipelineDescription *p_pipeline_description, FfxUInt32 p_effect_context_id, FfxPipelineState *p_out_pipeline) { + FFXCommon::Scratch &scratch = *reinterpret_cast(p_backend_interface->scratchBuffer); + FFXCommon::Device &device = *reinterpret_cast(scratch.device); + + FFXCommon::Pass &effect_pass = device.passes[p_pass]; + + if (effect_pass.pipeline.pipeline_rid.is_null()) { + // Create pipeline for the device if it hasn't been created yet. + effect_pass.root_signature.shader_rid = effect_pass.shader->version_get_shader(effect_pass.shader_version, effect_pass.shader_variant); + ERR_FAIL_COND_V(effect_pass.root_signature.shader_rid.is_null(), FFX_ERROR_BACKEND_API_ERROR); + + effect_pass.pipeline.pipeline_rid = RD::get_singleton()->compute_pipeline_create(effect_pass.root_signature.shader_rid); + ERR_FAIL_COND_V(effect_pass.pipeline.pipeline_rid.is_null(), FFX_ERROR_BACKEND_API_ERROR); + } + +#ifdef DEV_ENABLED + memcpy(p_out_pipeline->name, p_pipeline_description->name, sizeof(p_out_pipeline->name)); +#endif + + // While this is not their intended use, we use the pipeline and root signature pointers to store the + // RIDs to the pipeline and shader that RD needs for the compute pipeline. + p_out_pipeline->pipeline = reinterpret_cast(&effect_pass.pipeline); + p_out_pipeline->rootSignature = reinterpret_cast(&effect_pass.root_signature); + + // FSR doesn't use any buffers + p_out_pipeline->srvBufferCount = 0; + p_out_pipeline->srvTextureCount = effect_pass.sampled_texture_bindings.size(); + ERR_FAIL_COND_V(p_out_pipeline->srvTextureCount + p_out_pipeline->srvBufferCount > FFX_MAX_NUM_SRVS, FFX_ERROR_OUT_OF_RANGE); + memcpy(p_out_pipeline->srvTextureBindings, effect_pass.sampled_texture_bindings.ptr(), sizeof(FfxResourceBinding) * p_out_pipeline->srvTextureCount); + + // FSR doesn't use any buffers + p_out_pipeline->uavBufferCount = 0; + p_out_pipeline->uavTextureCount = effect_pass.storage_texture_bindings.size(); + ERR_FAIL_COND_V(p_out_pipeline->uavTextureCount + p_out_pipeline->uavBufferCount > FFX_MAX_NUM_UAVS, FFX_ERROR_OUT_OF_RANGE); + memcpy(p_out_pipeline->uavTextureBindings, effect_pass.storage_texture_bindings.ptr(), sizeof(FfxResourceBinding) * p_out_pipeline->uavTextureCount); + + p_out_pipeline->constCount = effect_pass.uniform_bindings.size(); + ERR_FAIL_COND_V(p_out_pipeline->constCount > FFX_MAX_NUM_CONST_BUFFERS, FFX_ERROR_OUT_OF_RANGE); + memcpy(p_out_pipeline->constantBufferBindings, effect_pass.uniform_bindings.ptr(), sizeof(FfxResourceBinding) * p_out_pipeline->constCount); + + if (p_effect == FFX_EFFECT_FSR2) { + bool low_resolution_mvs = (p_pipeline_description->contextFlags & FFX_FSR2_ENABLE_DISPLAY_RESOLUTION_MOTION_VECTORS) == 0; + + if (p_pass == FFX_FSR2_PASS_ACCUMULATE || p_pass == FFX_FSR2_PASS_ACCUMULATE_SHARPEN) { + // Change the binding for motion vectors in this particular pass if low resolution MVs are used. + if (low_resolution_mvs) { + FfxResourceBinding &binding = p_out_pipeline->srvTextureBindings[2]; + wcscpy_s(binding.name, L"r_dilated_motion_vectors"); + } + } + } + + if (p_effect == FFX_EFFECT_FSR3UPSCALER) { + bool low_resolution_mvs = (p_pipeline_description->contextFlags & FFX_FSR3UPSCALER_ENABLE_DISPLAY_RESOLUTION_MOTION_VECTORS) == 0; + + if (p_pass == FFX_FSR3UPSCALER_PASS_ACCUMULATE || p_pass == FFX_FSR3UPSCALER_PASS_ACCUMULATE_SHARPEN) { + // Change the binding for motion vectors in this particular pass if low resolution MVs are used. + if (low_resolution_mvs) { + FfxResourceBinding &binding = p_out_pipeline->srvTextureBindings[2]; + wcscpy_s(binding.name, L"r_dilated_motion_vectors"); + } + } + } + + return FFX_OK; +} + +static FfxErrorCode destroy_pipeline_rd(FfxInterface *p_backend_interface, FfxPipelineState *p_pipeline, FfxUInt32 p_effect_context_id) { + // We don't want to destroy pipelines when the FSR2 API deems it necessary as it'll do so whenever the context is destroyed. + + return FFX_OK; +} + +static FfxErrorCode schedule_gpu_job_rd(FfxInterface *p_backend_interface, const FfxGpuJobDescription *p_job) { + ERR_FAIL_NULL_V(p_backend_interface, FFX_ERROR_INVALID_ARGUMENT); + ERR_FAIL_NULL_V(p_job, FFX_ERROR_INVALID_ARGUMENT); + + FFXCommon::Scratch &scratch = *reinterpret_cast(p_backend_interface->scratchBuffer); + scratch.gpu_jobs.push_back(*p_job); + + return FFX_OK; +} + +static FfxErrorCode execute_gpu_job_clear_float_rd(FFXCommon::Scratch &p_scratch, const FfxClearFloatJobDescription &p_job, FfxUInt32 p_effect_context_id) { + RID resource = p_scratch.resources.rids[p_job.target.internalIndex]; + FfxResourceDescription &desc = p_scratch.resources.descriptions[p_job.target.internalIndex]; + + ERR_FAIL_COND_V_MSG(desc.type == FFX_RESOURCE_TYPE_BUFFER, FFX_ERROR_INVALID_ARGUMENT, "Cannot clear a buffer resource."); + + Color color(p_job.color[0], p_job.color[1], p_job.color[2], p_job.color[3]); + RD::get_singleton()->texture_clear(resource, color, 0, desc.mipCount, 0, 1); + + return FFX_OK; +} + +static FfxErrorCode execute_gpu_job_copy_rd(FFXCommon::Scratch &p_scratch, const FfxCopyJobDescription &p_job, FfxUInt32 p_effect_context_id) { + RID src = p_scratch.resources.rids[p_job.src.internalIndex]; + RID dst = p_scratch.resources.rids[p_job.dst.internalIndex]; + FfxResourceDescription &src_desc = p_scratch.resources.descriptions[p_job.src.internalIndex]; + FfxResourceDescription &dst_desc = p_scratch.resources.descriptions[p_job.dst.internalIndex]; + + ERR_FAIL_COND_V(src_desc.type == FFX_RESOURCE_TYPE_BUFFER, FFX_ERROR_INVALID_ARGUMENT); + ERR_FAIL_COND_V(dst_desc.type == FFX_RESOURCE_TYPE_BUFFER, FFX_ERROR_INVALID_ARGUMENT); + + for (uint32_t mip_level = 0; mip_level < src_desc.mipCount; mip_level++) { + RD::get_singleton()->texture_copy(src, dst, Vector3(0, 0, 0), Vector3(0, 0, 0), Vector3(src_desc.width, src_desc.height, src_desc.depth), mip_level, mip_level, 0, 0); + } + + return FFX_OK; +} + +static FfxErrorCode execute_gpu_job_compute_rd(FFXCommon::Scratch &p_scratch, const FfxComputeJobDescription &p_job, FfxUInt32 p_effect_context_id) { + UniformSetCacheRD *uniform_set_cache = UniformSetCacheRD::get_singleton(); + ERR_FAIL_NULL_V(uniform_set_cache, FFX_ERROR_BACKEND_API_ERROR); + + FFXCommon::RootSignature &root_signature = *reinterpret_cast(p_job.pipeline.rootSignature); + ERR_FAIL_COND_V(root_signature.shader_rid.is_null(), FFX_ERROR_INVALID_ARGUMENT); + + FFXCommon::Pipeline &backend_pipeline = *reinterpret_cast(p_job.pipeline.pipeline); + ERR_FAIL_COND_V(backend_pipeline.pipeline_rid.is_null(), FFX_ERROR_INVALID_ARGUMENT); + + thread_local LocalVector compute_uniforms; + compute_uniforms.clear(); + + for (uint32_t i = 0; i < p_job.pipeline.srvTextureCount; i++) { + RID texture_rid = p_scratch.resources.rids[p_job.srvTextures[i].resource.internalIndex]; + RD::Uniform texture_uniform(RD::UNIFORM_TYPE_TEXTURE, p_job.pipeline.srvTextureBindings[i].slotIndex, texture_rid); + compute_uniforms.push_back(texture_uniform); + } + + ERR_FAIL_COND_V_MSG(p_job.pipeline.srvBufferCount > 0, FFX_ERROR_BACKEND_API_ERROR, "Since FSR doesn't use buffers, SRV buffers are not supported."); + + for (uint32_t i = 0; i < p_job.pipeline.uavTextureCount; i++) { + RID image_rid = p_scratch.resources.rids[p_job.uavTextures[i].resource.internalIndex]; + RD::Uniform storage_uniform; + storage_uniform.uniform_type = RD::UNIFORM_TYPE_IMAGE; + storage_uniform.binding = p_job.pipeline.uavTextureBindings[i].slotIndex; + + int mipCount = p_scratch.resources.descriptions[p_job.uavTextures[i].resource.internalIndex].mipCount; + if (mipCount > 1) { + LocalVector &mip_slice_rids = p_scratch.resources.mip_slice_rids[p_job.uavTextures[i].resource.internalIndex]; + if (mip_slice_rids.is_empty()) { + mip_slice_rids.resize(mipCount); + } + + ERR_FAIL_COND_V(p_job.uavTextures[i].mip >= mip_slice_rids.size(), FFX_ERROR_INVALID_ARGUMENT); + + if (mip_slice_rids[p_job.uavTextures[i].mip].is_null()) { + mip_slice_rids[p_job.uavTextures[i].mip] = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), image_rid, 0, p_job.uavTextures[i].mip); + } + + ERR_FAIL_COND_V(mip_slice_rids[p_job.uavTextures[i].mip].is_null(), FFX_ERROR_BACKEND_API_ERROR); + + storage_uniform.append_id(mip_slice_rids[p_job.uavTextures[i].mip]); + } else { + storage_uniform.append_id(image_rid); + } + + compute_uniforms.push_back(storage_uniform); + } + + ERR_FAIL_COND_V_MSG(p_job.pipeline.uavBufferCount > 0, FFX_ERROR_BACKEND_API_ERROR, "Since FSR doesn't use buffers, UAV buffers are not supported."); + + for (uint32_t i = 0; i < p_job.pipeline.constCount; i++) { + RID buffer_rid = p_scratch.ubo_ring_buffer[p_scratch.ubo_ring_buffer_index]; + p_scratch.ubo_ring_buffer_index = (p_scratch.ubo_ring_buffer_index + 1) % FFX_UBO_RING_BUFFER_SIZE; + + RD::get_singleton()->buffer_update(buffer_rid, 0, p_job.cbs[i].num32BitEntries * sizeof(uint32_t), p_job.cbs[i].data); + + RD::Uniform buffer_uniform(RD::UNIFORM_TYPE_UNIFORM_BUFFER, p_job.pipeline.constantBufferBindings[i].slotIndex, buffer_rid); + compute_uniforms.push_back(buffer_uniform); + } + + FFXCommon::Device &device = *reinterpret_cast(p_scratch.device); + + if (p_scratch.effect_type == FFX_EFFECT_FSR1) { + RD::Uniform u_linear_clamp_sampler(RD::UniformType::UNIFORM_TYPE_SAMPLER, 100, device.linear_clamp_sampler); + compute_uniforms.push_back(u_linear_clamp_sampler); + } else if (p_scratch.effect_type == FFX_EFFECT_FSR2 || p_scratch.effect_type == FFX_EFFECT_FSR3UPSCALER) { + RD::Uniform u_point_clamp_sampler(RD::UniformType::UNIFORM_TYPE_SAMPLER, 100, device.point_clamp_sampler); + RD::Uniform u_linear_clamp_sampler(RD::UniformType::UNIFORM_TYPE_SAMPLER, 101, device.linear_clamp_sampler); + compute_uniforms.push_back(u_point_clamp_sampler); + compute_uniforms.push_back(u_linear_clamp_sampler); + } + + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, backend_pipeline.pipeline_rid); + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache_vec(root_signature.shader_rid, 0, compute_uniforms), 0); + RD::get_singleton()->compute_list_dispatch(compute_list, p_job.dimensions[0], p_job.dimensions[1], p_job.dimensions[2]); + RD::get_singleton()->compute_list_end(); + + return FFX_OK; +} + +static FfxErrorCode execute_gpu_jobs_rd(FfxInterface *p_backend_interface, FfxCommandList p_command_list, FfxUInt32 p_effect_context_id) { + ERR_FAIL_NULL_V(p_backend_interface, FFX_ERROR_INVALID_ARGUMENT); + + FFXCommon::Scratch &scratch = *reinterpret_cast(p_backend_interface->scratchBuffer); + FfxErrorCode error_code = FFX_OK; + for (const FfxGpuJobDescription &job : scratch.gpu_jobs) { + switch (job.jobType) { + case FFX_GPU_JOB_CLEAR_FLOAT: { + error_code = execute_gpu_job_clear_float_rd(scratch, job.clearJobDescriptor, p_effect_context_id); + } break; + case FFX_GPU_JOB_COPY: { + error_code = execute_gpu_job_copy_rd(scratch, job.copyJobDescriptor, p_effect_context_id); + } break; + case FFX_GPU_JOB_COMPUTE: { + error_code = execute_gpu_job_compute_rd(scratch, job.computeJobDescriptor, p_effect_context_id); + } break; + case FFX_GPU_JOB_DISCARD: { + // Discard is a DX12-only concept, so nothing has to be done. + // The DX12 backend handles this automatically. + } break; + default: { + error_code = FFX_ERROR_INVALID_ARGUMENT; + } break; + } + + if (error_code != FFX_OK) { + scratch.gpu_jobs.clear(); +#ifdef DEV_ENABLED + ERR_PRINT(vformat("FFX GPU job failed with code %d", error_code)); +#endif + return error_code; + } + } + + scratch.gpu_jobs.clear(); + + return FFX_OK; +} + +static FfxErrorCode stage_constant_buffer_data_rd(FfxInterface *p_backend_interface, void *p_data, FfxUInt32 p_size, FfxConstantBuffer *p_constant_buffer) { + ERR_FAIL_NULL_V(p_backend_interface, FFX_ERROR_INVALID_POINTER); + ERR_FAIL_NULL_V(p_data, FFX_ERROR_INVALID_POINTER); + ERR_FAIL_NULL_V(p_constant_buffer, FFX_ERROR_INVALID_POINTER); + + FFXCommon::Scratch &scratch = *reinterpret_cast(p_backend_interface->scratchBuffer); + if (scratch.staging_constant_buffer_base + FFX_ALIGN_UP(p_size, 256) >= FFX_STAGING_CONSTANT_BUFFER_SIZE) { + scratch.staging_constant_buffer_base = 0; + } + + void *dst = scratch.staging_constant_buffer + scratch.staging_constant_buffer_base; + memcpy(dst, p_data, p_size); + + p_constant_buffer->data = (uint32_t *)dst; + p_constant_buffer->num32BitEntries = p_size / sizeof(uint32_t); + scratch.staging_constant_buffer_base += FFX_ALIGN_UP(p_size, 256); + + return FFX_OK; +} + +FfxResource FFXCommon::get_resource_rd(RID *p_rid, const wchar_t *p_name) { + FfxResource res = {}; + if (p_rid->is_null()) { + return res; + } + + wcscpy_s(res.name, p_name); + + RD::TextureFormat texture_format = RD::get_singleton()->texture_get_format(*p_rid); + res.description.type = rd_texture_type_to_ffx_resource_type(texture_format.texture_type); + res.description.format = rd_format_to_ffx_surface_format(texture_format.format); + res.description.width = texture_format.width; + res.description.height = texture_format.height; + res.description.depth = texture_format.depth; + res.description.mipCount = texture_format.mipmaps; + res.description.flags = FFX_RESOURCE_FLAGS_NONE; + res.resource = reinterpret_cast(p_rid); + + return res; +} + +FfxDeviceCapabilities FFXCommon::get_device_capabilities() { + FfxDeviceCapabilities capabilities = {}; + capabilities.maximumSupportedShaderModel = FFX_SHADER_MODEL_6_7; + capabilities.waveLaneCountMin = 32; + capabilities.waveLaneCountMax = 32; + capabilities.fp16Supported = RD::get_singleton()->has_feature(RD::Features::SUPPORTS_HALF_FLOAT); + capabilities.raytracingSupported = false; + + return capabilities; +} + +void FFXCommon::create_ffx_interface(FfxInterface *p_interface, Scratch *p_scratch, Device *p_device) { + p_interface->fpGetSDKVersion = get_sdk_version_rd; + p_interface->fpCreateBackendContext = create_backend_context_rd; + p_interface->fpGetDeviceCapabilities = get_device_capabilities_rd; + p_interface->fpDestroyBackendContext = destroy_backend_context_rd; + p_interface->fpCreateResource = create_resource_rd; + p_interface->fpRegisterResource = register_resource_rd; + p_interface->fpUnregisterResources = unregister_resources_rd; + p_interface->fpGetResourceDescription = get_resource_description_rd; + p_interface->fpDestroyResource = destroy_resource_rd; + p_interface->fpCreatePipeline = create_pipeline_rd; + p_interface->fpDestroyPipeline = destroy_pipeline_rd; + p_interface->fpScheduleGpuJob = schedule_gpu_job_rd; + p_interface->fpExecuteGpuJobs = execute_gpu_jobs_rd; + p_interface->fpStageConstantBufferDataFunc = stage_constant_buffer_data_rd; + p_interface->scratchBuffer = p_scratch; + p_interface->scratchBufferSize = sizeof(*p_scratch); + + p_interface->device = p_device; +} + +RID FFXCommon::create_clamp_sampler(RD::SamplerFilter filter) { + RD::SamplerState state; + state.mag_filter = filter; + state.min_filter = filter; + state.repeat_u = RD::SAMPLER_REPEAT_MODE_CLAMP_TO_EDGE; + state.repeat_v = RD::SAMPLER_REPEAT_MODE_CLAMP_TO_EDGE; + state.repeat_w = RD::SAMPLER_REPEAT_MODE_CLAMP_TO_EDGE; + state.min_lod = -1000.0f; + state.max_lod = 1000.0f; + state.anisotropy_max = 1.0; + + RID sampler = RD::get_singleton()->sampler_create(state); + ERR_FAIL_COND_V(sampler.is_null(), RID()); + return sampler; +} diff --git a/servers/rendering/renderer_rd/effects/fsr2.h b/servers/rendering/renderer_rd/effects/ffx/ffx_common.h similarity index 63% rename from servers/rendering/renderer_rd/effects/fsr2.h rename to servers/rendering/renderer_rd/effects/ffx/ffx_common.h index 6554588df5cb..05aee97fd81b 100644 --- a/servers/rendering/renderer_rd/effects/fsr2.h +++ b/servers/rendering/renderer_rd/effects/ffx/ffx_common.h @@ -1,5 +1,5 @@ /**************************************************************************/ -/* fsr2.h */ +/* ffx_common.h */ /**************************************************************************/ /* This file is part of: */ /* GODOT ENGINE */ @@ -30,14 +30,9 @@ #pragma once -#include "servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_accumulate_pass.glsl.gen.h" -#include "servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_autogen_reactive_pass.glsl.gen.h" -#include "servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_compute_luminance_pyramid_pass.glsl.gen.h" -#include "servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_depth_clip_pass.glsl.gen.h" -#include "servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_lock_pass.glsl.gen.h" -#include "servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_rcas_pass.glsl.gen.h" -#include "servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_reconstruct_previous_depth_pass.glsl.gen.h" -#include "servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_tcr_autogen_pass.glsl.gen.h" +#include "servers/rendering/renderer_rd/pipeline_deferred_rd.h" +#include "servers/rendering/renderer_rd/shader_rd.h" +#include "servers/rendering/rendering_server.h" // This flag doesn't actually control anything GCC specific in FSR2. It determines // if symbols should be exported, which is not required for Godot. @@ -45,15 +40,18 @@ #define FFX_GCC #endif -#include "thirdparty/amd-fsr2/ffx_fsr2.h" +#include "thirdparty/amd-ffx/ffx_interface.h" -#define FSR2_MAX_QUEUED_FRAMES (4) -#define FSR2_MAX_UNIFORM_BUFFERS (4) -#define FSR2_MAX_BUFFERED_DESCRIPTORS (FFX_FSR2_PASS_COUNT * FSR2_MAX_QUEUED_FRAMES) -#define FSR2_UBO_RING_BUFFER_SIZE (FSR2_MAX_BUFFERED_DESCRIPTORS * FSR2_MAX_UNIFORM_BUFFERS) +#define FFX_UBO_RING_BUFFER_SIZE (FFX_MAX_QUEUED_FRAMES * FFX_MAX_NUM_CONST_BUFFERS * FFX_MAX_PASS_COUNT) +// FFX defines really large size, but we don't need that much. +// So we redefine a size considering the maximum size of uniform buffers +// that FSR effects supported by Godot actually uses. +#define FFX_STAGING_CONSTANT_BUFFER_SIZE (FFX_MAX_QUEUED_FRAMES * 2048) namespace RendererRD { -class FSR2Context { + +// Helper class for Fidelity FX interop. +class FFXCommon { public: enum ResourceID : uint32_t { RESOURCE_ID_DYNAMIC = 0xFFFFFFFF @@ -109,23 +107,6 @@ class FSR2Context { } }; - struct Scratch { - Resources resources; - LocalVector gpu_jobs; - RID ubo_ring_buffer[FSR2_UBO_RING_BUFFER_SIZE]; - uint32_t ubo_ring_buffer_index = 0; - FfxDevice device = nullptr; - }; - - Scratch scratch; - FfxFsr2Context fsr_context; - FfxFsr2ContextDescription fsr_desc; - - ~FSR2Context(); -}; - -class FSR2Effect { -public: struct RootSignature { // Proxy structure to store the shader required by RD that uses the terminology used by the FSR2 API. RID shader_rid; @@ -141,56 +122,40 @@ class FSR2Effect { RootSignature root_signature; uint32_t shader_variant = 0; Pipeline pipeline; - Vector sampled_bindings; - Vector storage_bindings; + Vector sampled_texture_bindings; + Vector storage_texture_bindings; Vector uniform_bindings; }; struct Device { - Pass passes[FFX_FSR2_PASS_COUNT]; - FfxDeviceCapabilities capabilities; RID point_clamp_sampler; RID linear_clamp_sampler; - }; + Pass passes[FFX_MAX_PASS_COUNT]; + } device; - struct Parameters { - FSR2Context *context; - Size2i internal_size; - RID color; - RID depth; - RID velocity; - RID reactive; - RID exposure; - RID output; - float z_near = 0.0f; - float z_far = 0.0f; - float fovy = 0.0f; - Vector2 jitter; - float delta_time = 0.0f; - float sharpness = 0.0f; - bool reset_accumulation = false; - Projection reprojection; + struct Scratch { + FfxEffect effect_type; + Resources resources; + LocalVector gpu_jobs; + // Uniform ring buffer + RID ubo_ring_buffer[FFX_UBO_RING_BUFFER_SIZE]; + uint32_t ubo_ring_buffer_index = 0; + // Staging buffer for constant buffer data. + uint8_t *staging_constant_buffer; + size_t staging_constant_buffer_base = 0; + // Pointer to the device common to all contexts. + // Static functions cannot access class members, so we store it here. + FfxDevice device; }; - FSR2Effect(); - ~FSR2Effect(); - FSR2Context *create_context(Size2i p_internal_size, Size2i p_target_size); - void upscale(const Parameters &p_params); - -private: - struct { - Fsr2DepthClipPassShaderRD depth_clip; - Fsr2ReconstructPreviousDepthPassShaderRD reconstruct_previous_depth; - Fsr2LockPassShaderRD lock; - Fsr2AccumulatePassShaderRD accumulate; - Fsr2AccumulatePassShaderRD accumulate_sharpen; - Fsr2RcasPassShaderRD rcas; - Fsr2ComputeLuminancePyramidPassShaderRD compute_luminance_pyramid; - Fsr2AutogenReactivePassShaderRD autogen_reactive; - Fsr2TcrAutogenPassShaderRD tcr_autogen; - } shaders; - - Device device; -}; + static FfxDeviceCapabilities get_device_capabilities(); + static void create_ffx_interface(FfxInterface *p_interface, Scratch *p_scratch, Device *p_device); + static RID create_clamp_sampler(RD::SamplerFilter filter); -} // namespace RendererRD + static FfxResource get_resource_rd(RID *p_rid, const wchar_t *p_name); + static RD::TextureType ffx_resource_type_to_rd_texture_type(FfxResourceType p_type); + static FfxResourceType rd_texture_type_to_ffx_resource_type(RD::TextureType p_type); + static RD::DataFormat ffx_surface_format_to_rd_format(FfxSurfaceFormat p_format); + static FfxSurfaceFormat rd_format_to_ffx_surface_format(RD::DataFormat p_format); +}; +} //namespace RendererRD diff --git a/servers/rendering/renderer_rd/effects/ffx/fsr1.cpp b/servers/rendering/renderer_rd/effects/ffx/fsr1.cpp new file mode 100644 index 000000000000..eb34961db8d2 --- /dev/null +++ b/servers/rendering/renderer_rd/effects/ffx/fsr1.cpp @@ -0,0 +1,164 @@ +/**************************************************************************/ +/* fsr1.cpp */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#include "fsr1.h" + +#include "servers/rendering/renderer_rd/storage_rd/material_storage.h" +#include "servers/rendering/renderer_rd/storage_rd/render_scene_buffers_rd.h" + +using namespace RendererRD; + +FSR1Context::~FSR1Context() { + ffxFsr1ContextDestroy(&fsr_context); +} + +void FSR1Effect::ensure_context(Ref p_render_buffers) { + p_render_buffers->ensure_fsr1(this); +} + +FSR1Effect::FSR1Effect() { + FfxDeviceCapabilities capabilities = FFXCommon::get_device_capabilities(); + + String general_defines = + "\n#define FFX_GPU\n" + "\n#define FFX_GLSL 1\n"; + + Vector modes_with_fp16; + modes_with_fp16.push_back(""); + modes_with_fp16.push_back("\n#define FFX_HALF 1\n"); + + // Since Godot currently lacks a shader reflection mechanism to persist the name of the bindings in the shader cache and + // there's also no mechanism to compile the shaders offline, the bindings are created manually by looking at the GLSL + // files included in FSR1 and mapping the macro bindings (#define FSR1_BIND_*) to their respective implementation names. + // + // It is not guaranteed these will remain consistent at all between versions of FSR2, so it'll be necessary to keep these + // bindings up to date whenever the library is updated. In such cases, it is very likely the validation layer will throw an + // error if the bindings do not match. + + { + Vector easu_modes_with_fp16; + easu_modes_with_fp16.push_back("\n"); + easu_modes_with_fp16.push_back("\n#define FFX_FSR1_OPTION_APPLY_RCAS 1\n"); + easu_modes_with_fp16.push_back("\n#define FFX_HALF 1\n"); + easu_modes_with_fp16.push_back("\n#define FFX_HALF 1\n#define FFX_FSR1_OPTION_APPLY_RCAS 1\n"); + + FFXCommon::Pass &pass = device.passes[FFX_FSR1_PASS_EASU]; + pass.shader = &shaders.easu; + pass.shader->initialize(easu_modes_with_fp16, general_defines); + pass.shader_version = pass.shader->version_create(); + pass.shader_variant = capabilities.fp16Supported ? 2 : 0; + + pass.sampled_texture_bindings = { + FfxResourceBinding{ 0, 0, 0, L"r_input_color" }, + }; + + pass.storage_texture_bindings = { + FfxResourceBinding{ 1, 0, 0, L"rw_internal_upscaled_color" }, + FfxResourceBinding{ 2, 0, 0, L"rw_upscaled_output" } + }; + + pass.uniform_bindings = { + FfxResourceBinding{ 3, 0, 0, L"cbFSR1" } + }; + + // EASU RCAS pass is a clone of the EASU pass with the RCAS variant. + FFXCommon::Pass &easu_rcas_pass = device.passes[FFX_FSR1_PASS_EASU_RCAS]; + easu_rcas_pass = pass; + easu_rcas_pass.shader_variant = pass.shader_variant + 1; + } + + { + FFXCommon::Pass &pass = device.passes[FFX_FSR1_PASS_RCAS]; + pass.shader = &shaders.rcas; + pass.shader->initialize(modes_with_fp16, general_defines); + pass.shader_version = pass.shader->version_create(); + pass.shader_variant = capabilities.fp16Supported ? 1 : 0; + + pass.sampled_texture_bindings = { + FfxResourceBinding{ 0, 0, 0, L"r_internal_upscaled_color" }, + }; + + pass.storage_texture_bindings = { + FfxResourceBinding{ 1, 0, 0, L"rw_upscaled_output" }, + }; + + pass.uniform_bindings = { + FfxResourceBinding{ 2, 0, 0, L"cbFSR1" } + }; + } + + device.linear_clamp_sampler = FFXCommon::create_clamp_sampler(RD::SAMPLER_FILTER_LINEAR); +} + +FSR1Effect::~FSR1Effect() { + RD::get_singleton()->free_rid(device.linear_clamp_sampler); + + for (uint32_t i = 0; i < FFX_FSR1_PASS_COUNT; i++) { + device.passes[i].shader->version_free(device.passes[i].shader_version); + } +} + +FSR1Context *FSR1Effect::create_context(Size2i p_internal_size, Size2i p_target_size, RD::DataFormat p_output_format) { + FSR1Context *context = memnew(RendererRD::FSR1Context); + context->fsr_desc.flags = FFX_FSR1_ENABLE_HIGH_DYNAMIC_RANGE | FFX_FSR1_ENABLE_RCAS; + context->fsr_desc.maxRenderSize.width = p_internal_size.x; + context->fsr_desc.maxRenderSize.height = p_internal_size.y; + context->fsr_desc.displaySize.width = p_target_size.x; + context->fsr_desc.displaySize.height = p_target_size.y; + context->fsr_desc.outputFormat = FFXCommon::rd_format_to_ffx_surface_format(p_output_format); + + FFXCommon::create_ffx_interface(&context->fsr_desc.backendInterface, &context->scratch, &device); + FfxErrorCode result = ffxFsr1ContextCreate(&context->fsr_context, &context->fsr_desc); + if (result == FFX_OK) { + return context; + } else { + memdelete(context); + return nullptr; + } +} + +void FSR1Effect::process(Ref p_render_buffers, RID p_source_rd_texture, RID p_destination_texture) { + FSR1Context *fsr1_context = p_render_buffers->get_fsr1_context(); + + Size2i internal_size = p_render_buffers->get_internal_size(); + float fsr_upscale_sharpness = p_render_buffers->get_fsr_sharpness(); + + FfxFsr1DispatchDescription dispatch_desc = {}; + dispatch_desc.commandList = nullptr; + dispatch_desc.color = FFXCommon::get_resource_rd(&p_source_rd_texture, L"color"); + dispatch_desc.output = FFXCommon::get_resource_rd(&p_destination_texture, L"output"); + dispatch_desc.renderSize.width = internal_size.width; + dispatch_desc.renderSize.height = internal_size.height; + dispatch_desc.enableSharpening = (fsr_upscale_sharpness > 1e-6f); + dispatch_desc.sharpness = fsr_upscale_sharpness; + + FfxErrorCode result = ffxFsr1ContextDispatch(&fsr1_context->fsr_context, &dispatch_desc); + ERR_FAIL_COND(result != FFX_OK); +} diff --git a/servers/rendering/renderer_rd/effects/fsr.h b/servers/rendering/renderer_rd/effects/ffx/fsr1.h similarity index 65% rename from servers/rendering/renderer_rd/effects/fsr.h rename to servers/rendering/renderer_rd/effects/ffx/fsr1.h index 7f308be203d3..3e1fc32f00a9 100644 --- a/servers/rendering/renderer_rd/effects/fsr.h +++ b/servers/rendering/renderer_rd/effects/ffx/fsr1.h @@ -1,5 +1,5 @@ /**************************************************************************/ -/* fsr.h */ +/* fsr1.h */ /**************************************************************************/ /* This file is part of: */ /* GODOT ENGINE */ @@ -30,47 +30,42 @@ #pragma once -#include "spatial_upscaler.h" +#include "servers/rendering/renderer_rd/shaders/effects/ffx/fsr1/fsr1_easu_pass.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/effects/ffx/fsr1/fsr1_rcas_pass.glsl.gen.h" -#include "../storage_rd/render_scene_buffers_rd.h" -#include "servers/rendering/renderer_rd/pipeline_deferred_rd.h" -#include "servers/rendering/renderer_rd/shaders/effects/fsr_upscale.glsl.gen.h" +#include "ffx_common.h" +#include "servers/rendering/renderer_rd/effects/spatial_upscaler.h" +#include "servers/rendering/rendering_server.h" -namespace RendererRD { +#include "thirdparty/amd-ffx/ffx_fsr1.h" -class FSR : public SpatialUpscaler { +namespace RendererRD { +class FSR1Context { public: - FSR(); - ~FSR(); + FFXCommon::Scratch scratch; + FfxFsr1Context fsr_context; + FfxFsr1ContextDescription fsr_desc; - virtual const Span get_label() const final { return "FSR 1.0 Upscale"; } - virtual void ensure_context(Ref p_render_buffers) final {} - virtual void process(Ref p_render_buffers, RID p_source_rd_texture, RID p_destination_texture) final; + ~FSR1Context(); +}; -private: - enum FSRShaderVariant { - FSR_SHADER_VARIANT_NORMAL, - FSR_SHADER_VARIANT_FALLBACK, - }; +class FSR1Effect : public SpatialUpscaler { +public: + FSR1Effect(); + ~FSR1Effect() override; - enum FSRUpscalePass { - FSR_UPSCALE_PASS_EASU = 0, - FSR_UPSCALE_PASS_RCAS = 1 - }; + const Span get_label() const final { return "FSR 1.2 Upscale"; } + void ensure_context(Ref p_render_buffers) final; + void process(Ref p_render_buffers, RID p_source_rd_texture, RID p_destination_texture) final; - struct FSRUpscalePushConstant { - float resolution_width; - float resolution_height; - float upscaled_width; - float upscaled_height; - float sharpness; - int pass; - int _unused0, _unused1; - }; + FSR1Context *create_context(Size2i p_internal_size, Size2i p_target_size, RD::DataFormat p_output_format); - FsrUpscaleShaderRD fsr_shader; - RID shader_version; - PipelineDeferredRD pipeline; -}; +private: + struct { + Fsr1EasuPassShaderRD easu; + Fsr1RcasPassShaderRD rcas; + } shaders; -} // namespace RendererRD + FFXCommon::Device device; +}; +} //namespace RendererRD diff --git a/servers/rendering/renderer_rd/effects/ffx/fsr2.cpp b/servers/rendering/renderer_rd/effects/ffx/fsr2.cpp new file mode 100644 index 000000000000..19390f4fea74 --- /dev/null +++ b/servers/rendering/renderer_rd/effects/ffx/fsr2.cpp @@ -0,0 +1,383 @@ +/**************************************************************************/ +/* fsr2.cpp */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#include "fsr2.h" + +#include "servers/rendering/renderer_rd/storage_rd/material_storage.h" +#include "servers/rendering/renderer_rd/uniform_set_cache_rd.h" + +using namespace RendererRD; + +FSR2Context::~FSR2Context() { + ffxFsr2ContextDestroy(&fsr_context); +} + +FSR2Effect::FSR2Effect() { + FfxDeviceCapabilities capabilities = FFXCommon::get_device_capabilities(); + + String general_defines = + "\n#define FFX_GPU\n" + "\n#define FFX_GLSL 1\n" + "\n#define FFX_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS 1\n" + "\n#define FFX_FSR2_OPTION_HDR_COLOR_INPUT 1\n" + "\n#define FFX_FSR2_OPTION_INVERTED_DEPTH 1\n" + "\n#define FFX_FSR2_OPTION_GODOT_REACTIVE_MASK_CLAMP 1\n" + "\n#define FFX_FSR2_OPTION_GODOT_DERIVE_INVALID_MOTION_VECTORS 1\n"; + + Vector modes_single; + modes_single.push_back(""); + + Vector modes_with_fp16; + modes_with_fp16.push_back(""); + modes_with_fp16.push_back("\n#define FFX_HALF 1\n"); + + // Since Godot currently lacks a shader reflection mechanism to persist the name of the bindings in the shader cache and + // there's also no mechanism to compile the shaders offline, the bindings are created manually by looking at the GLSL + // files included in FSR2 and mapping the macro bindings (#define FSR2_BIND_*) to their respective implementation names. + // + // It is not guaranteed these will remain consistent at all between versions of FSR2, so it'll be necessary to keep these + // bindings up to date whenever the library is updated. In such cases, it is very likely the validation layer will throw an + // error if the bindings do not match. + + { + FFXCommon::Pass &pass = device.passes[FFX_FSR2_PASS_DEPTH_CLIP]; + pass.shader = &shaders.depth_clip; + pass.shader->initialize(modes_with_fp16, general_defines); + pass.shader_version = pass.shader->version_create(); + pass.shader_variant = capabilities.fp16Supported ? 1 : 0; + + pass.sampled_texture_bindings = { + FfxResourceBinding{ 0, 0, 0, L"r_reconstructed_previous_nearest_depth" }, + FfxResourceBinding{ 1, 0, 0, L"r_dilated_motion_vectors" }, + FfxResourceBinding{ 2, 0, 0, L"r_dilatedDepth" }, + FfxResourceBinding{ 3, 0, 0, L"r_reactive_mask" }, + FfxResourceBinding{ 4, 0, 0, L"r_transparency_and_composition_mask" }, + // Godot render graph forces one resource to serve only one usage so we have to remove this binding + // FfxResourceBinding{ 5, 0, 0, L"r_prepared_input_color" }, + FfxResourceBinding{ 6, 0, 0, L"r_previous_dilated_motion_vectors" }, + FfxResourceBinding{ 7, 0, 0, L"r_input_motion_vectors" }, + FfxResourceBinding{ 8, 0, 0, L"r_input_color_jittered" }, + FfxResourceBinding{ 9, 0, 0, L"r_input_depth" }, + FfxResourceBinding{ 10, 0, 0, L"r_input_exposure" } + }; + + pass.storage_texture_bindings = { + // FSR2_BIND_UAV_DEPTH_CLIP (11) does not point to anything. + FfxResourceBinding{ 12, 0, 0, L"rw_dilated_reactive_masks" }, + FfxResourceBinding{ 13, 0, 0, L"rw_prepared_input_color" } + }; + + pass.uniform_bindings = { + FfxResourceBinding{ 14, 0, 0, L"cbFSR2" } + }; + } + + { + FFXCommon::Pass &pass = device.passes[FFX_FSR2_PASS_RECONSTRUCT_PREVIOUS_DEPTH]; + pass.shader = &shaders.reconstruct_previous_depth; + pass.shader->initialize(modes_with_fp16, general_defines); + pass.shader_version = pass.shader->version_create(); + pass.shader_variant = capabilities.fp16Supported ? 1 : 0; + + pass.sampled_texture_bindings = { + FfxResourceBinding{ 0, 0, 0, L"r_input_motion_vectors" }, + FfxResourceBinding{ 1, 0, 0, L"r_input_depth" }, + FfxResourceBinding{ 2, 0, 0, L"r_input_color_jittered" }, + FfxResourceBinding{ 3, 0, 0, L"r_input_exposure" }, + FfxResourceBinding{ 4, 0, 0, L"r_luma_history" } + }; + + pass.storage_texture_bindings = { + FfxResourceBinding{ 5, 0, 0, L"rw_reconstructed_previous_nearest_depth" }, + FfxResourceBinding{ 6, 0, 0, L"rw_dilated_motion_vectors" }, + FfxResourceBinding{ 7, 0, 0, L"rw_dilatedDepth" }, + FfxResourceBinding{ 8, 0, 0, L"rw_prepared_input_color" }, + FfxResourceBinding{ 9, 0, 0, L"rw_luma_history" }, + // FSR2_BIND_UAV_LUMA_INSTABILITY (10) does not point to anything. + FfxResourceBinding{ 11, 0, 0, L"rw_lock_input_luma" } + }; + + pass.uniform_bindings = { + FfxResourceBinding{ 12, 0, 0, L"cbFSR2" } + }; + } + + { + FFXCommon::Pass &pass = device.passes[FFX_FSR2_PASS_LOCK]; + pass.shader = &shaders.lock; + pass.shader->initialize(modes_with_fp16, general_defines); + pass.shader_version = pass.shader->version_create(); + pass.shader_variant = capabilities.fp16Supported ? 1 : 0; + + pass.sampled_texture_bindings = { + FfxResourceBinding{ 0, 0, 0, L"r_lock_input_luma" } + }; + + pass.storage_texture_bindings = { + FfxResourceBinding{ 1, 0, 0, L"rw_new_locks" }, + FfxResourceBinding{ 2, 0, 0, L"rw_reconstructed_previous_nearest_depth" } + }; + + pass.uniform_bindings = { + FfxResourceBinding{ 3, 0, 0, L"cbFSR2" } + }; + } + + { + Vector accumulate_modes_with_fp16; + accumulate_modes_with_fp16.push_back("\n"); + accumulate_modes_with_fp16.push_back("\n#define FFX_FSR2_OPTION_APPLY_SHARPENING 1\n"); + accumulate_modes_with_fp16.push_back("\n#define FFX_HALF 1\n"); + accumulate_modes_with_fp16.push_back("\n#define FFX_HALF 1\n#define FFX_FSR2_OPTION_APPLY_SHARPENING 1\n"); + + // Workaround: Disable FP16 path for the accumulate pass on NVIDIA due to reduced occupancy and high VRAM throughput. + const bool fp16_path_supported = RD::get_singleton()->get_device_vendor_name() != "NVIDIA"; + FFXCommon::Pass &pass = device.passes[FFX_FSR2_PASS_ACCUMULATE]; + pass.shader = &shaders.accumulate; + pass.shader->initialize(accumulate_modes_with_fp16, general_defines); + pass.shader_version = pass.shader->version_create(); + pass.shader_variant = capabilities.fp16Supported && fp16_path_supported ? 2 : 0; + + pass.sampled_texture_bindings = { + FfxResourceBinding{ 0, 0, 0, L"r_input_exposure" }, + FfxResourceBinding{ 1, 0, 0, L"r_dilated_reactive_masks" }, + FfxResourceBinding{ 2, 0, 0, L"r_input_motion_vectors" }, + FfxResourceBinding{ 3, 0, 0, L"r_internal_upscaled_color" }, + FfxResourceBinding{ 4, 0, 0, L"r_lock_status" }, + FfxResourceBinding{ 5, 0, 0, L"r_input_depth" }, + FfxResourceBinding{ 6, 0, 0, L"r_prepared_input_color" }, + // FSR2_BIND_SRV_LUMA_INSTABILITY(7) does not point to anything. + FfxResourceBinding{ 8, 0, 0, L"r_lanczos_lut" }, + FfxResourceBinding{ 9, 0, 0, L"r_upsample_maximum_bias_lut" }, + FfxResourceBinding{ 10, 0, 0, L"r_imgMips" }, + FfxResourceBinding{ 11, 0, 0, L"r_auto_exposure" }, + FfxResourceBinding{ 12, 0, 0, L"r_luma_history" } + }; + + pass.storage_texture_bindings = { + FfxResourceBinding{ 13, 0, 0, L"rw_internal_upscaled_color" }, + FfxResourceBinding{ 14, 0, 0, L"rw_lock_status" }, + FfxResourceBinding{ 15, 0, 0, L"rw_upscaled_output" }, + FfxResourceBinding{ 16, 0, 0, L"rw_new_locks" }, + FfxResourceBinding{ 17, 0, 0, L"rw_luma_history" } + }; + + pass.uniform_bindings = { + FfxResourceBinding{ 18, 0, 0, L"cbFSR2" } + }; + + // Sharpen pass is a clone of the accumulate pass with the sharpening variant. + FFXCommon::Pass &sharpen_pass = device.passes[FFX_FSR2_PASS_ACCUMULATE_SHARPEN]; + sharpen_pass = pass; + sharpen_pass.shader_variant = pass.shader_variant + 1; + } + + { + FFXCommon::Pass &pass = device.passes[FFX_FSR2_PASS_RCAS]; + pass.shader = &shaders.rcas; + pass.shader->initialize(modes_single, general_defines); + pass.shader_version = pass.shader->version_create(); + + pass.sampled_texture_bindings = { + FfxResourceBinding{ 0, 0, 0, L"r_input_exposure" }, + FfxResourceBinding{ 1, 0, 0, L"r_rcas_input" } + }; + + pass.storage_texture_bindings = { + FfxResourceBinding{ 2, 0, 0, L"rw_upscaled_output" } + }; + + pass.uniform_bindings = { + FfxResourceBinding{ 3, 0, 0, L"cbFSR2" }, + FfxResourceBinding{ 4, 0, 0, L"cbRCAS" } + }; + } + + { + FFXCommon::Pass &pass = device.passes[FFX_FSR2_PASS_COMPUTE_LUMINANCE_PYRAMID]; + pass.shader = &shaders.compute_luminance_pyramid; + pass.shader->initialize(modes_single, general_defines); + pass.shader_version = pass.shader->version_create(); + + pass.sampled_texture_bindings = { + FfxResourceBinding{ 0, 0, 0, L"r_input_color_jittered" } + }; + + pass.storage_texture_bindings = { + FfxResourceBinding{ 1, 0, 0, L"rw_spd_global_atomic" }, + FfxResourceBinding{ 2, 0, 0, L"rw_img_mip_shading_change" }, + FfxResourceBinding{ 3, 0, 0, L"rw_img_mip_5" }, + FfxResourceBinding{ 4, 0, 0, L"rw_auto_exposure" } + }; + + pass.uniform_bindings = { + FfxResourceBinding{ 5, 0, 0, L"cbFSR2" }, + FfxResourceBinding{ 6, 0, 0, L"cbSPD" } + }; + } + + { + FFXCommon::Pass &pass = device.passes[FFX_FSR2_PASS_GENERATE_REACTIVE]; + pass.shader = &shaders.autogen_reactive; + pass.shader->initialize(modes_with_fp16, general_defines); + pass.shader_version = pass.shader->version_create(); + pass.shader_variant = capabilities.fp16Supported ? 1 : 0; + + pass.sampled_texture_bindings = { + FfxResourceBinding{ 0, 0, 0, L"r_input_opaque_only" }, + FfxResourceBinding{ 1, 0, 0, L"r_input_color_jittered" }, + }; + + pass.storage_texture_bindings = { + FfxResourceBinding{ 2, 0, 0, L"rw_output_autoreactive" } + }; + + pass.uniform_bindings = { + FfxResourceBinding{ 3, 0, 0, L"cbGenerateReactive" }, + FfxResourceBinding{ 4, 0, 0, L"cbFSR2" } + }; + } + + { + FFXCommon::Pass &pass = device.passes[FFX_FSR2_PASS_TCR_AUTOGENERATE]; + pass.shader = &shaders.tcr_autogen; + pass.shader->initialize(modes_with_fp16, general_defines); + pass.shader_version = pass.shader->version_create(); + pass.shader_variant = capabilities.fp16Supported ? 1 : 0; + + pass.sampled_texture_bindings = { + FfxResourceBinding{ 0, 0, 0, L"r_input_opaque_only" }, + FfxResourceBinding{ 1, 0, 0, L"r_input_color_jittered" }, + FfxResourceBinding{ 2, 0, 0, L"r_input_motion_vectors" }, + FfxResourceBinding{ 3, 0, 0, L"r_input_prev_color_pre_alpha" }, + FfxResourceBinding{ 4, 0, 0, L"r_input_prev_color_post_alpha" }, + FfxResourceBinding{ 5, 0, 0, L"r_reactive_mask" }, + FfxResourceBinding{ 6, 0, 0, L"r_transparency_and_composition_mask" }, + FfxResourceBinding{ 13, 0, 0, L"r_input_depth" }, + }; + + pass.storage_texture_bindings = { + FfxResourceBinding{ 7, 0, 0, L"rw_output_autoreactive" }, + FfxResourceBinding{ 8, 0, 0, L"rw_output_autocomposition" }, + FfxResourceBinding{ 9, 0, 0, L"rw_output_prev_color_pre_alpha" }, + FfxResourceBinding{ 10, 0, 0, L"rw_output_prev_color_post_alpha" } + }; + + pass.uniform_bindings = { + FfxResourceBinding{ 11, 0, 0, L"cbFSR2" }, + FfxResourceBinding{ 12, 0, 0, L"cbGenerateReactive" } + }; + } + + device.linear_clamp_sampler = FFXCommon::create_clamp_sampler(RD::SAMPLER_FILTER_LINEAR); + device.point_clamp_sampler = FFXCommon::create_clamp_sampler(RD::SAMPLER_FILTER_NEAREST); +} + +FSR2Effect::~FSR2Effect() { + RD::get_singleton()->free_rid(device.point_clamp_sampler); + RD::get_singleton()->free_rid(device.linear_clamp_sampler); + + for (uint32_t i = 0; i < FFX_FSR2_PASS_COUNT; i++) { + device.passes[i].shader->version_free(device.passes[i].shader_version); + } +} + +FSR2Context *FSR2Effect::create_context(Size2i p_internal_size, Size2i p_target_size, bool p_autogen_reactive) { + FSR2Context *context = memnew(RendererRD::FSR2Context); + context->fsr_desc.flags = FFX_FSR2_ENABLE_HIGH_DYNAMIC_RANGE | FFX_FSR2_ENABLE_DEPTH_INVERTED; + context->fsr_desc.maxRenderSize.width = p_internal_size.x; + context->fsr_desc.maxRenderSize.height = p_internal_size.y; + context->fsr_desc.displaySize.width = p_target_size.x; + context->fsr_desc.displaySize.height = p_target_size.y; + + FFXCommon::create_ffx_interface(&context->fsr_desc.backendInterface, &context->scratch, &device); + FfxErrorCode result = ffxFsr2ContextCreate(&context->fsr_context, &context->fsr_desc); + if (result == FFX_OK) { + return context; + } else { + memdelete(context); + return nullptr; + } +} + +void FSR2Effect::upscale(const Parameters &p_params) { + FfxFsr2DispatchDescription dispatch_desc = {}; + RID color = p_params.color; + RID depth = p_params.depth; + RID velocity = p_params.velocity; + RID reactive = p_params.reactive; + RID exposure = p_params.exposure; + RID output = p_params.output; + dispatch_desc.commandList = nullptr; + dispatch_desc.color = FFXCommon::get_resource_rd(&color, L"color"); + dispatch_desc.depth = FFXCommon::get_resource_rd(&depth, L"depth"); + dispatch_desc.motionVectors = FFXCommon::get_resource_rd(&velocity, L"velocity"); + + // Optional pass of auto-generating reactive masks from opaque-only color. + // This may reduce flickering in scenarios where there are massive transparent objects. + RID opaque_only = p_params.opaque_only; + bool autogen_masks = opaque_only.is_valid(); + + dispatch_desc.enableAutoReactive = autogen_masks; + if (autogen_masks) { + dispatch_desc.autoTcThreshold = .2f; + dispatch_desc.autoTcScale = 1.0f; + dispatch_desc.autoReactiveScale = 1.0f; + dispatch_desc.autoReactiveMax = 0.9f; + dispatch_desc.colorOpaqueOnly = FFXCommon::get_resource_rd(&opaque_only, L"opaque_only"); + dispatch_desc.reactive = {}; + } else { + dispatch_desc.reactive = FFXCommon::get_resource_rd(&reactive, L"reactive"); + } + + dispatch_desc.exposure = FFXCommon::get_resource_rd(&exposure, L"exposure"); + dispatch_desc.transparencyAndComposition = {}; + dispatch_desc.output = FFXCommon::get_resource_rd(&output, L"output"); + dispatch_desc.jitterOffset.x = p_params.jitter.x; + dispatch_desc.jitterOffset.y = p_params.jitter.y; + dispatch_desc.motionVectorScale.x = float(p_params.internal_size.width); + dispatch_desc.motionVectorScale.y = float(p_params.internal_size.height); + dispatch_desc.reset = p_params.reset_accumulation; + dispatch_desc.renderSize.width = p_params.internal_size.width; + dispatch_desc.renderSize.height = p_params.internal_size.height; + dispatch_desc.enableSharpening = (p_params.sharpness > 1e-6f); + dispatch_desc.sharpness = p_params.sharpness; + dispatch_desc.frameTimeDelta = p_params.delta_time; + dispatch_desc.preExposure = 1.0f; + dispatch_desc.cameraNear = p_params.z_near; + dispatch_desc.cameraFar = p_params.z_far; + dispatch_desc.cameraFovAngleVertical = p_params.fovy; + dispatch_desc.viewSpaceToMetersFactor = 1.0f; + + MaterialStorage::store_camera(p_params.reprojection, dispatch_desc.reprojectionMatrix); + + FfxErrorCode result = ffxFsr2ContextDispatch(&p_params.context->fsr_context, &dispatch_desc); + ERR_FAIL_COND(result != FFX_OK); +} diff --git a/servers/rendering/renderer_rd/effects/ffx/fsr2.h b/servers/rendering/renderer_rd/effects/ffx/fsr2.h new file mode 100644 index 000000000000..986403cf253b --- /dev/null +++ b/servers/rendering/renderer_rd/effects/ffx/fsr2.h @@ -0,0 +1,97 @@ +/**************************************************************************/ +/* fsr2.h */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#pragma once + +#include "servers/rendering/renderer_rd/shaders/effects/ffx/fsr2/fsr2_accumulate_pass.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/effects/ffx/fsr2/fsr2_autogen_reactive_pass.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/effects/ffx/fsr2/fsr2_compute_luminance_pyramid_pass.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/effects/ffx/fsr2/fsr2_depth_clip_pass.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/effects/ffx/fsr2/fsr2_lock_pass.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/effects/ffx/fsr2/fsr2_rcas_pass.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/effects/ffx/fsr2/fsr2_reconstruct_previous_depth_pass.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/effects/ffx/fsr2/fsr2_tcr_autogen_pass.glsl.gen.h" +#include "servers/rendering/rendering_server.h" + +#include "ffx_common.h" +#include "thirdparty/amd-ffx/ffx_fsr2.h" + +namespace RendererRD { +class FSR2Context { +public: + FFXCommon::Scratch scratch; + FfxFsr2Context fsr_context; + FfxFsr2ContextDescription fsr_desc; + + ~FSR2Context(); +}; + +class FSR2Effect { +public: + struct Parameters { + FSR2Context *context; + Size2i internal_size; + RID color; + RID depth; + RID velocity; + RID reactive; + RID opaque_only; + RID exposure; + RID output; + float z_near = 0.0f; + float z_far = 0.0f; + float fovy = 0.0f; + Vector2 jitter; + float delta_time = 0.0f; + float sharpness = 0.0f; + bool reset_accumulation = false; + Projection reprojection; + }; + + FSR2Effect(); + ~FSR2Effect(); + FSR2Context *create_context(Size2i p_internal_size, Size2i p_target_size, bool p_autogen_reactive); + void upscale(const Parameters &p_params); + +private: + struct { + Fsr2DepthClipPassShaderRD depth_clip; + Fsr2ReconstructPreviousDepthPassShaderRD reconstruct_previous_depth; + Fsr2LockPassShaderRD lock; + Fsr2AccumulatePassShaderRD accumulate; + Fsr2RcasPassShaderRD rcas; + Fsr2ComputeLuminancePyramidPassShaderRD compute_luminance_pyramid; + Fsr2AutogenReactivePassShaderRD autogen_reactive; + Fsr2TcrAutogenPassShaderRD tcr_autogen; + } shaders; + + FFXCommon::Device device; +}; +} //namespace RendererRD diff --git a/servers/rendering/renderer_rd/effects/ffx/fsr3_upscaler.cpp b/servers/rendering/renderer_rd/effects/ffx/fsr3_upscaler.cpp new file mode 100644 index 000000000000..ca0470ee6dc4 --- /dev/null +++ b/servers/rendering/renderer_rd/effects/ffx/fsr3_upscaler.cpp @@ -0,0 +1,519 @@ +/**************************************************************************/ +/* fsr3_upscaler.cpp */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#include "fsr3_upscaler.h" + +#include "servers/rendering/renderer_rd/storage_rd/material_storage.h" +#include "servers/rendering/renderer_rd/uniform_set_cache_rd.h" + +using namespace RendererRD; + +static void fsr3_recv_message(FfxMsgType type, const wchar_t *message) { +#ifdef DEV_ENABLED + switch (type) { + case FFX_MESSAGE_TYPE_ERROR: + ERR_PRINT(message); + break; + case FFX_MESSAGE_TYPE_WARNING: + WARN_PRINT(message); + break; + } +#endif +} + +FSR3UpscalerContext::~FSR3UpscalerContext() { + if (generated_reactive_mask.is_valid()) { + RD::get_singleton()->free_rid(generated_reactive_mask); + } + + fsr_desc.backendInterface.fpDestroyResource(&fsr_desc.backendInterface, reconstructed_prev_nearest_depth, -1); + fsr_desc.backendInterface.fpDestroyResource(&fsr_desc.backendInterface, dilated_depth, -1); + fsr_desc.backendInterface.fpDestroyResource(&fsr_desc.backendInterface, dilated_motion_vectors, -1); + + ffxFsr3UpscalerContextDestroy(&fsr_context); +} + +FSR3UpscalerEffect::FSR3UpscalerEffect() { + FfxDeviceCapabilities capabilities = FFXCommon::get_device_capabilities(); + + String general_defines = + "\n#define FFX_GPU\n" + "\n#define FFX_GLSL 1\n" + "\n#define FFX_FSR3UPSCALER_OPTION_LOW_RESOLUTION_MOTION_VECTORS 1\n" + "\n#define FFX_FSR3UPSCALER_OPTION_HDR_COLOR_INPUT 1\n" + "\n#define FFX_FSR3UPSCALER_OPTION_INVERTED_DEPTH 1\n" + "\n#define FFX_FSR3UPSCALER_OPTION_GODOT_REACTIVE_MASK_CLAMP 1\n" + "\n#define FFX_FSR3UPSCALER_OPTION_GODOT_DERIVE_INVALID_MOTION_VECTORS 1\n"; + + Vector modes_single; + modes_single.push_back(""); + + Vector modes_with_fp16; + modes_with_fp16.push_back(""); + modes_with_fp16.push_back("\n#define FFX_HALF 1\n"); + + // Since Godot currently lacks a shader reflection mechanism to persist the name of the bindings in the shader cache and + // there's also no mechanism to compile the shaders offline, the bindings are created manually by looking at the GLSL + // files included in FSR2 and mapping the macro bindings (#define FSR2_BIND_*) to their respective implementation names. + // + // It is not guaranteed these will remain consistent at all between versions of FSR2, so it'll be necessary to keep these + // bindings up to date whenever the library is updated. In such cases, it is very likely the validation layer will throw an + // error if the bindings do not match. + + { + FFXCommon::Pass &pass = device.passes[FFX_FSR3UPSCALER_PASS_PREPARE_INPUTS]; + pass.shader = &shaders.prepare_inputs; + pass.shader->initialize(modes_with_fp16, general_defines); + pass.shader_version = pass.shader->version_create(); + pass.shader_variant = capabilities.fp16Supported ? 1 : 0; + + pass.sampled_texture_bindings = { + FfxResourceBinding{ 0, 0, 0, L"r_input_motion_vectors" }, + FfxResourceBinding{ 1, 0, 0, L"r_input_depth" }, + FfxResourceBinding{ 2, 0, 0, L"r_input_color_jittered" }, + }; + + pass.storage_texture_bindings = { + FfxResourceBinding{ 3, 0, 0, L"rw_dilated_motion_vectors" }, + FfxResourceBinding{ 4, 0, 0, L"rw_dilated_depth" }, + FfxResourceBinding{ 5, 0, 0, L"rw_reconstructed_previous_nearest_depth" }, + FfxResourceBinding{ 6, 0, 0, L"rw_farthest_depth" }, + FfxResourceBinding{ 7, 0, 0, L"rw_current_luma" }, + }; + + pass.uniform_bindings = { + FfxResourceBinding{ 8, 0, 0, L"cbFSR3Upscaler" } + }; + } + + { + FFXCommon::Pass &pass = device.passes[FFX_FSR3UPSCALER_PASS_LUMA_PYRAMID]; + pass.shader = &shaders.luma_pyramid; + pass.shader->initialize(modes_with_fp16, general_defines); + pass.shader_version = pass.shader->version_create(); + pass.shader_variant = capabilities.fp16Supported ? 1 : 0; + + pass.sampled_texture_bindings = { + FfxResourceBinding{ 0, 0, 0, L"r_current_luma" }, + FfxResourceBinding{ 1, 0, 0, L"r_farthest_depth" }, + }; + + pass.storage_texture_bindings = { + FfxResourceBinding{ 2, 0, 0, L"rw_spd_global_atomic" }, + FfxResourceBinding{ 3, 0, 0, L"rw_frame_info" }, + FfxResourceBinding{ 4, 0, 0, L"rw_spd_mip0" }, + FfxResourceBinding{ 5, 0, 0, L"rw_spd_mip1" }, + FfxResourceBinding{ 6, 0, 0, L"rw_spd_mip2" }, + FfxResourceBinding{ 7, 0, 0, L"rw_spd_mip3" }, + FfxResourceBinding{ 8, 0, 0, L"rw_spd_mip4" }, + FfxResourceBinding{ 9, 0, 0, L"rw_spd_mip5" }, + FfxResourceBinding{ 10, 0, 0, L"rw_farthest_depth_mip1" }, + }; + + pass.uniform_bindings = { + FfxResourceBinding{ 11, 0, 0, L"cbFSR3Upscaler" }, + FfxResourceBinding{ 12, 0, 0, L"cbSPD" }, + }; + } + + { + FFXCommon::Pass &pass = device.passes[FFX_FSR3UPSCALER_PASS_SHADING_CHANGE_PYRAMID]; + pass.shader = &shaders.shading_change_pyramid; + pass.shader->initialize(modes_with_fp16, general_defines); + pass.shader_version = pass.shader->version_create(); + pass.shader_variant = capabilities.fp16Supported ? 1 : 0; + + pass.sampled_texture_bindings = { + FfxResourceBinding{ 0, 0, 0, L"r_current_luma" }, + FfxResourceBinding{ 1, 0, 0, L"r_previous_luma" }, + FfxResourceBinding{ 2, 0, 0, L"r_dilated_motion_vectors" }, + FfxResourceBinding{ 3, 0, 0, L"r_input_exposure" }, + }; + + pass.storage_texture_bindings = { + FfxResourceBinding{ 4, 0, 0, L"rw_spd_global_atomic" }, + FfxResourceBinding{ 5, 0, 0, L"rw_spd_mip0" }, + FfxResourceBinding{ 6, 0, 0, L"rw_spd_mip1" }, + FfxResourceBinding{ 7, 0, 0, L"rw_spd_mip2" }, + FfxResourceBinding{ 8, 0, 0, L"rw_spd_mip3" }, + FfxResourceBinding{ 9, 0, 0, L"rw_spd_mip4" }, + FfxResourceBinding{ 10, 0, 0, L"rw_spd_mip5" }, + }; + + pass.uniform_bindings = { + FfxResourceBinding{ 11, 0, 0, L"cbFSR3Upscaler" }, + FfxResourceBinding{ 12, 0, 0, L"cbSPD" }, + }; + } + + { + FFXCommon::Pass &pass = device.passes[FFX_FSR3UPSCALER_PASS_SHADING_CHANGE]; + pass.shader = &shaders.shading_change; + pass.shader->initialize(modes_with_fp16, general_defines); + pass.shader_version = pass.shader->version_create(); + pass.shader_variant = capabilities.fp16Supported ? 1 : 0; + + pass.sampled_texture_bindings = { + FfxResourceBinding{ 0, 0, 0, L"r_spd_mips" }, + }; + + pass.storage_texture_bindings = { + FfxResourceBinding{ 1, 0, 0, L"rw_shading_change" }, + }; + + pass.uniform_bindings = { + FfxResourceBinding{ 2, 0, 0, L"cbFSR3Upscaler" }, + }; + } + + { + FFXCommon::Pass &pass = device.passes[FFX_FSR3UPSCALER_PASS_PREPARE_REACTIVITY]; + pass.shader = &shaders.prepare_reactivity; + pass.shader->initialize(modes_with_fp16, general_defines); + pass.shader_version = pass.shader->version_create(); + pass.shader_variant = capabilities.fp16Supported ? 1 : 0; + + pass.sampled_texture_bindings = { + FfxResourceBinding{ 0, 0, 0, L"r_reconstructed_previous_nearest_depth" }, + FfxResourceBinding{ 1, 0, 0, L"r_dilated_motion_vectors" }, + FfxResourceBinding{ 2, 0, 0, L"r_dilated_depth" }, + FfxResourceBinding{ 3, 0, 0, L"r_reactive_mask" }, + FfxResourceBinding{ 4, 0, 0, L"r_transparency_and_composition_mask" }, + FfxResourceBinding{ 5, 0, 0, L"r_accumulation" }, + FfxResourceBinding{ 6, 0, 0, L"r_shading_change" }, + FfxResourceBinding{ 7, 0, 0, L"r_current_luma" }, + FfxResourceBinding{ 8, 0, 0, L"r_input_exposure" }, + }; + + pass.storage_texture_bindings = { + FfxResourceBinding{ 9, 0, 0, L"rw_dilated_reactive_masks" }, + FfxResourceBinding{ 10, 0, 0, L"rw_new_locks" }, + FfxResourceBinding{ 11, 0, 0, L"rw_accumulation" }, + }; + + pass.uniform_bindings = { + FfxResourceBinding{ 12, 0, 0, L"cbFSR3Upscaler" }, + }; + } + + { + FFXCommon::Pass &pass = device.passes[FFX_FSR3UPSCALER_PASS_LUMA_INSTABILITY]; + pass.shader = &shaders.luma_instability; + pass.shader->initialize(modes_with_fp16, general_defines); + pass.shader_version = pass.shader->version_create(); + pass.shader_variant = capabilities.fp16Supported ? 1 : 0; + + pass.sampled_texture_bindings = { + FfxResourceBinding{ 0, 0, 0, L"r_input_exposure" }, + FfxResourceBinding{ 1, 0, 0, L"r_dilated_reactive_masks" }, + FfxResourceBinding{ 2, 0, 0, L"r_dilated_motion_vectors" }, + FfxResourceBinding{ 3, 0, 0, L"r_frame_info" }, + FfxResourceBinding{ 4, 0, 0, L"r_luma_history" }, + FfxResourceBinding{ 5, 0, 0, L"r_farthest_depth_mip1" }, + FfxResourceBinding{ 6, 0, 0, L"r_current_luma" }, + }; + + pass.storage_texture_bindings = { + FfxResourceBinding{ 7, 0, 0, L"rw_luma_history" }, + FfxResourceBinding{ 8, 0, 0, L"rw_luma_instability" }, + }; + + pass.uniform_bindings = { + FfxResourceBinding{ 9, 0, 0, L"cbFSR3Upscaler" }, + }; + } + + { + Vector accumulate_modes_with_fp16; + accumulate_modes_with_fp16.push_back("\n"); + accumulate_modes_with_fp16.push_back("\n#define FFX_FSR2_OPTION_APPLY_SHARPENING 1\n"); + accumulate_modes_with_fp16.push_back("\n#define FFX_HALF 1\n"); + accumulate_modes_with_fp16.push_back("\n#define FFX_HALF 1\n#define FFX_FSR2_OPTION_APPLY_SHARPENING 1\n"); + + // Workaround: Disable FP16 path for the accumulate pass on NVIDIA due to reduced occupancy and high VRAM throughput. + const bool fp16_path_supported = RD::get_singleton()->get_device_vendor_name() != "NVIDIA"; + FFXCommon::Pass &pass = device.passes[FFX_FSR3UPSCALER_PASS_ACCUMULATE]; + pass.shader = &shaders.accumulate; + pass.shader->initialize(accumulate_modes_with_fp16, general_defines); + pass.shader_version = pass.shader->version_create(); + pass.shader_variant = capabilities.fp16Supported && fp16_path_supported ? 2 : 0; + + pass.sampled_texture_bindings = { + FfxResourceBinding{ 0, 0, 0, L"r_input_exposure" }, + FfxResourceBinding{ 1, 0, 0, L"r_dilated_reactive_masks" }, + FfxResourceBinding{ 2, 0, 0, L"r_input_motion_vectors" }, + FfxResourceBinding{ 3, 0, 0, L"r_internal_upscaled_color" }, + FfxResourceBinding{ 4, 0, 0, L"r_lanczos_lut" }, + FfxResourceBinding{ 5, 0, 0, L"r_farthest_depth_mip1" }, + FfxResourceBinding{ 6, 0, 0, L"r_current_luma" }, + FfxResourceBinding{ 7, 0, 0, L"r_luma_instability" }, + FfxResourceBinding{ 8, 0, 0, L"r_input_color_jittered" }, + }; + + pass.storage_texture_bindings = { + FfxResourceBinding{ 9, 0, 0, L"rw_internal_upscaled_color" }, + FfxResourceBinding{ 10, 0, 0, L"rw_upscaled_output" }, + FfxResourceBinding{ 11, 0, 0, L"rw_new_locks" }, + }; + + pass.uniform_bindings = { + FfxResourceBinding{ 12, 0, 0, L"cbFSR3Upscaler" }, + }; + + // Sharpen pass is a clone of the accumulate pass with the sharpening variant. + FFXCommon::Pass &sharpen_pass = device.passes[FFX_FSR3UPSCALER_PASS_ACCUMULATE_SHARPEN]; + sharpen_pass = pass; + sharpen_pass.shader_variant = pass.shader_variant + 1; + } + + { + FFXCommon::Pass &pass = device.passes[FFX_FSR3UPSCALER_PASS_RCAS]; + pass.shader = &shaders.rcas; + pass.shader->initialize(modes_single, general_defines); + pass.shader_version = pass.shader->version_create(); + + pass.sampled_texture_bindings = { + FfxResourceBinding{ 0, 0, 0, L"r_input_exposure" }, + FfxResourceBinding{ 1, 0, 0, L"r_rcas_input" }, + }; + + pass.storage_texture_bindings = { + FfxResourceBinding{ 2, 0, 0, L"rw_upscaled_output" }, + }; + + pass.uniform_bindings = { + FfxResourceBinding{ 3, 0, 0, L"cbFSR3Upscaler" }, + FfxResourceBinding{ 4, 0, 0, L"cbRCAS" }, + }; + } + + { + FFXCommon::Pass &pass = device.passes[FFX_FSR3UPSCALER_PASS_DEBUG_VIEW]; + pass.shader = &shaders.debug_view; + pass.shader->initialize(modes_single, general_defines); + pass.shader_version = pass.shader->version_create(); + + pass.sampled_texture_bindings = { + FfxResourceBinding{ 0, 0, 0, L"r_dilated_reactive_masks" }, + FfxResourceBinding{ 1, 0, 0, L"r_dilated_motion_vectors" }, + FfxResourceBinding{ 2, 0, 0, L"r_dilated_depth" }, + FfxResourceBinding{ 3, 0, 0, L"r_internal_upscaled_color" }, + FfxResourceBinding{ 4, 0, 0, L"r_input_exposure" }, + }; + + pass.storage_texture_bindings = { + FfxResourceBinding{ 5, 0, 0, L"rw_upscaled_output" }, + }; + + pass.uniform_bindings = { + FfxResourceBinding{ 6, 0, 0, L"cbFSR3Upscaler" }, + }; + } + + { + FFXCommon::Pass &pass = device.passes[FFX_FSR3UPSCALER_PASS_GENERATE_REACTIVE]; + pass.shader = &shaders.autogen_reactive; + pass.shader->initialize(modes_with_fp16, general_defines); + pass.shader_version = pass.shader->version_create(); + pass.shader_variant = capabilities.fp16Supported ? 1 : 0; + + pass.sampled_texture_bindings = { + FfxResourceBinding{ 0, 0, 0, L"r_input_opaque_only" }, + FfxResourceBinding{ 1, 0, 0, L"r_input_color_jittered" }, + }; + + pass.storage_texture_bindings = { + FfxResourceBinding{ 2, 0, 0, L"rw_output_autoreactive" }, + // Though this binding is present in the GLSL source, but the FSR3 CXX side doesn't register it at all. + // So we must comment it out to avoid runtime errors. + // FfxResourceBinding{ 3, 0, 0, L"rw_output_autocomposition" }, + }; + + pass.uniform_bindings = { + FfxResourceBinding{ 4, 0, 0, L"cbFSR3Upscaler" }, + FfxResourceBinding{ 5, 0, 0, L"cbGenerateReactive" }, + }; + } + + device.linear_clamp_sampler = FFXCommon::create_clamp_sampler(RD::SAMPLER_FILTER_LINEAR); + device.point_clamp_sampler = FFXCommon::create_clamp_sampler(RD::SAMPLER_FILTER_NEAREST); +} + +FSR3UpscalerEffect::~FSR3UpscalerEffect() { + RD::get_singleton()->free_rid(device.point_clamp_sampler); + RD::get_singleton()->free_rid(device.linear_clamp_sampler); + + for (uint32_t i = 0; i < FFX_FSR3UPSCALER_PASS_COUNT; i++) { + if (i == FFX_FSR3UPSCALER_PASS_TCR_AUTOGENERATE) { + // These passes are not even created, so no need to be freed + continue; + } + + device.passes[i].shader->version_free(device.passes[i].shader_version); + } +} + +FSR3UpscalerContext *FSR3UpscalerEffect::create_context(Size2i p_internal_size, Size2i p_target_size, bool p_autogen_reactive) { + FSR3UpscalerContext *context = memnew(RendererRD::FSR3UpscalerContext); + context->fsr_desc.flags = FFX_FSR3UPSCALER_ENABLE_HIGH_DYNAMIC_RANGE | FFX_FSR3UPSCALER_ENABLE_DEPTH_INVERTED; +#ifdef DEV_ENABLED + context->fsr_desc.flags |= FFX_FSR3UPSCALER_ENABLE_DEBUG_CHECKING; +#endif + context->fsr_desc.maxRenderSize.width = p_internal_size.x; + context->fsr_desc.maxRenderSize.height = p_internal_size.y; + context->fsr_desc.maxUpscaleSize.width = p_target_size.x; + context->fsr_desc.maxUpscaleSize.height = p_target_size.y; + context->fsr_desc.fpMessage = fsr3_recv_message; + + FFXCommon::create_ffx_interface(&context->fsr_desc.backendInterface, &context->scratch, &device); + FfxErrorCode result = ffxFsr3UpscalerContextCreate(&context->fsr_context, &context->fsr_desc); + if (result == FFX_OK) { + FfxFsr3UpscalerSharedResourceDescriptions shared_resource_descriptions; + ffxFsr3UpscalerGetSharedResourceDescriptions(&context->fsr_context, &shared_resource_descriptions); + + // Create shared resources + result = context->fsr_desc.backendInterface.fpCreateResource(&context->fsr_desc.backendInterface, &shared_resource_descriptions.reconstructedPrevNearestDepth, -1, &context->reconstructed_prev_nearest_depth); + if (result != FFX_OK) { + ERR_PRINT("Failed to create FSR3 Upscaler shared resource: reconstructed_prev_nearest_depth."); + memdelete(context); + return nullptr; + } + + result = context->fsr_desc.backendInterface.fpCreateResource(&context->fsr_desc.backendInterface, &shared_resource_descriptions.dilatedDepth, -1, &context->dilated_depth); + if (result != FFX_OK) { + ERR_PRINT("Failed to create FSR3 Upscaler shared resource: reconstructed_prev_nearest_depth."); + memdelete(context); + return nullptr; + } + + result = context->fsr_desc.backendInterface.fpCreateResource(&context->fsr_desc.backendInterface, &shared_resource_descriptions.dilatedMotionVectors, -1, &context->dilated_motion_vectors); + if (result != FFX_OK) { + ERR_PRINT("Failed to create FSR3 Upscaler shared resource: reconstructed_prev_nearest_depth."); + memdelete(context); + return nullptr; + } + + if (p_autogen_reactive) { + RD::TextureFormat texture_format; + texture_format.texture_type = RD::TEXTURE_TYPE_2D; + texture_format.format = RD::DATA_FORMAT_R8_UNORM; + texture_format.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT | RD::TEXTURE_USAGE_CAN_COPY_TO_BIT; + texture_format.width = p_internal_size.width; + texture_format.height = p_internal_size.height; + texture_format.depth = 1; + texture_format.mipmaps = 1; + + context->generated_reactive_mask = RD::get_singleton()->texture_create(texture_format, RD::TextureView()); + ERR_FAIL_COND_V_MSG(context->generated_reactive_mask.is_null(), nullptr, "Failed to create FSR3 Upscaler generated reactive mask texture."); + RD::get_singleton()->set_resource_name(context->generated_reactive_mask, L"FSR3UPSCALER_GeneratedReactiveMask"); + } + + return context; + } else { + memdelete(context); + return nullptr; + } +} + +void FSR3UpscalerEffect::upscale(const Parameters &p_params) { + RID color = p_params.color; + RID depth = p_params.depth; + RID velocity = p_params.velocity; + RID reactive = p_params.reactive; + RID exposure = p_params.exposure; + RID output = p_params.output; + + FFXCommon::Scratch &scratch = p_params.context->scratch; + + RID opaque_only = p_params.opaque_only; + RID out_reactive = p_params.context->generated_reactive_mask; + bool autogen_masks = opaque_only.is_valid() && p_params.context->generated_reactive_mask.is_valid(); + + // Optional pass of auto-generating reactive masks from opaque-only color. + // This may reduce flickering in scenarios where there are massive transparent objects. + if (autogen_masks) { + FfxFsr3UpscalerGenerateReactiveDescription generate_desc = {}; + + generate_desc.commandList = nullptr; + generate_desc.colorPreUpscale = FFXCommon::get_resource_rd(&color, L"color"); + generate_desc.colorOpaqueOnly = FFXCommon::get_resource_rd(&opaque_only, L"opaque_only"); + generate_desc.outReactive = FFXCommon::get_resource_rd(&out_reactive, L"generated_reactive_mask"); + generate_desc.binaryValue = 0.9f; + generate_desc.renderSize.width = p_params.internal_size.width; + generate_desc.renderSize.height = p_params.internal_size.height; + generate_desc.cutoffThreshold = 0.2f; + generate_desc.scale = 1.f; + + FfxErrorCode err = ffxFsr3UpscalerContextGenerateReactiveMask(&p_params.context->fsr_context, &generate_desc); + if (err != FFX_OK) { + WARN_PRINT_ONCE("FSR3: Generate reactive mask enabled, but corresponding pass failed."); + autogen_masks = false; + } + } + + FfxFsr3UpscalerDispatchDescription dispatch_desc = {}; + RID reconstructed_prev_nearest_depth = scratch.resources.rids[p_params.context->reconstructed_prev_nearest_depth.internalIndex]; + RID dilated_depth = scratch.resources.rids[p_params.context->dilated_depth.internalIndex]; + RID dilated_motion_vectors = scratch.resources.rids[p_params.context->dilated_motion_vectors.internalIndex]; + + dispatch_desc.commandList = nullptr; + dispatch_desc.color = FFXCommon::get_resource_rd(&color, L"color"); + dispatch_desc.depth = FFXCommon::get_resource_rd(&depth, L"depth"); + dispatch_desc.reconstructedPrevNearestDepth = FFXCommon::get_resource_rd(&reconstructed_prev_nearest_depth, L"reconstructed_prev_nearest_depth"); + dispatch_desc.dilatedDepth = FFXCommon::get_resource_rd(&dilated_depth, L"dilated_depth"); + dispatch_desc.dilatedMotionVectors = FFXCommon::get_resource_rd(&dilated_motion_vectors, L"dilated_motion_vectors"); + dispatch_desc.motionVectors = FFXCommon::get_resource_rd(&velocity, L"velocity"); + dispatch_desc.reactive = FFXCommon::get_resource_rd(autogen_masks ? &out_reactive : &reactive, L"reactive"); + dispatch_desc.exposure = FFXCommon::get_resource_rd(&exposure, L"exposure"); + dispatch_desc.transparencyAndComposition = {}; + dispatch_desc.output = FFXCommon::get_resource_rd(&output, L"output"); + dispatch_desc.jitterOffset.x = p_params.jitter.x; + dispatch_desc.jitterOffset.y = p_params.jitter.y; + dispatch_desc.motionVectorScale.x = float(p_params.internal_size.width); + dispatch_desc.motionVectorScale.y = float(p_params.internal_size.height); + dispatch_desc.reset = p_params.reset_accumulation; + dispatch_desc.renderSize.width = p_params.internal_size.width; + dispatch_desc.renderSize.height = p_params.internal_size.height; + dispatch_desc.upscaleSize.width = p_params.target_size.width; + dispatch_desc.upscaleSize.height = p_params.target_size.height; + dispatch_desc.enableSharpening = (p_params.sharpness > 1e-6f); + dispatch_desc.sharpness = p_params.sharpness; + dispatch_desc.frameTimeDelta = p_params.delta_time; + dispatch_desc.preExposure = 1.0f; + dispatch_desc.cameraNear = p_params.z_near; + dispatch_desc.cameraFar = p_params.z_far; + dispatch_desc.cameraFovAngleVertical = p_params.fovy; + dispatch_desc.viewSpaceToMetersFactor = 1.0f; + + MaterialStorage::store_camera(p_params.reprojection, dispatch_desc.reprojectionMatrix); + + FfxErrorCode result = ffxFsr3UpscalerContextDispatch(&p_params.context->fsr_context, &dispatch_desc); + ERR_FAIL_COND(result != FFX_OK); +} diff --git a/servers/rendering/renderer_rd/effects/ffx/fsr3_upscaler.h b/servers/rendering/renderer_rd/effects/ffx/fsr3_upscaler.h new file mode 100644 index 000000000000..311af4ee1a0f --- /dev/null +++ b/servers/rendering/renderer_rd/effects/ffx/fsr3_upscaler.h @@ -0,0 +1,110 @@ +/**************************************************************************/ +/* fsr3_upscaler.h */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#pragma once + +#include "servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_accumulate_pass.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_autogen_reactive_pass.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_debug_view_pass.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_luma_instability_pass.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_luma_pyramid_pass.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_prepare_inputs_pass.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_prepare_reactivity_pass.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_rcas_pass.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_shading_change_pass.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_shading_change_pyramid_pass.glsl.gen.h" + +#include "ffx_common.h" +#include "servers/rendering/rendering_server.h" + +#include "thirdparty/amd-ffx/ffx_fsr3upscaler.h" + +namespace RendererRD { +class FSR3UpscalerContext { +public: + FFXCommon::Scratch scratch; + FfxFsr3UpscalerContext fsr_context; + FfxFsr3UpscalerContextDescription fsr_desc; + + // Output resources from FSR3 Upscaler that are required for frame generation + FfxResourceInternal reconstructed_prev_nearest_depth; + FfxResourceInternal dilated_depth; + FfxResourceInternal dilated_motion_vectors; + // Only if autogen reactive is used + RID generated_reactive_mask = RID(); + + ~FSR3UpscalerContext(); +}; + +class FSR3UpscalerEffect { +public: + struct Parameters { + FSR3UpscalerContext *context; + Size2i internal_size; + Size2i target_size; + RID color; + RID depth; + RID velocity; + RID reactive; + RID opaque_only; + RID exposure; + RID output; + float z_near = 0.0f; + float z_far = 0.0f; + float fovy = 0.0f; + Vector2 jitter; + float delta_time = 0.0f; + float sharpness = 0.0f; + bool reset_accumulation = false; + Projection reprojection; + }; + + FSR3UpscalerEffect(); + ~FSR3UpscalerEffect(); + FSR3UpscalerContext *create_context(Size2i p_internal_size, Size2i p_target_size, bool p_autogen_reactive); + void upscale(const Parameters &p_params); + +private: + struct { + Fsr3UpscalerPrepareInputsPassShaderRD prepare_inputs; + Fsr3UpscalerLumaPyramidPassShaderRD luma_pyramid; + Fsr3UpscalerShadingChangePyramidPassShaderRD shading_change_pyramid; + Fsr3UpscalerShadingChangePassShaderRD shading_change; + Fsr3UpscalerPrepareReactivityPassShaderRD prepare_reactivity; + Fsr3UpscalerLumaInstabilityPassShaderRD luma_instability; + Fsr3UpscalerAccumulatePassShaderRD accumulate; + Fsr3UpscalerRcasPassShaderRD rcas; + Fsr3UpscalerDebugViewPassShaderRD debug_view; + Fsr3UpscalerAutogenReactivePassShaderRD autogen_reactive; + } shaders; + + FFXCommon::Device device; +}; +} //namespace RendererRD diff --git a/servers/rendering/renderer_rd/effects/fsr.cpp b/servers/rendering/renderer_rd/effects/fsr.cpp deleted file mode 100644 index 1c0b5f7c14b3..000000000000 --- a/servers/rendering/renderer_rd/effects/fsr.cpp +++ /dev/null @@ -1,125 +0,0 @@ -/**************************************************************************/ -/* fsr.cpp */ -/**************************************************************************/ -/* This file is part of: */ -/* GODOT ENGINE */ -/* https://godotengine.org */ -/**************************************************************************/ -/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ -/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ -/* */ -/* Permission is hereby granted, free of charge, to any person obtaining */ -/* a copy of this software and associated documentation files (the */ -/* "Software"), to deal in the Software without restriction, including */ -/* without limitation the rights to use, copy, modify, merge, publish, */ -/* distribute, sublicense, and/or sell copies of the Software, and to */ -/* permit persons to whom the Software is furnished to do so, subject to */ -/* the following conditions: */ -/* */ -/* The above copyright notice and this permission notice shall be */ -/* included in all copies or substantial portions of the Software. */ -/* */ -/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ -/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ -/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ -/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ -/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ -/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ -/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/**************************************************************************/ - -#include "fsr.h" -#include "../storage_rd/material_storage.h" -#include "../uniform_set_cache_rd.h" - -using namespace RendererRD; - -FSR::FSR() { - Vector fsr_upscale_modes; - fsr_upscale_modes.push_back("\n#define MODE_FSR_UPSCALE_NORMAL\n"); - fsr_upscale_modes.push_back("\n#define MODE_FSR_UPSCALE_FALLBACK\n"); - fsr_shader.initialize(fsr_upscale_modes); - - FSRShaderVariant variant; - if (RD::get_singleton()->has_feature(RD::SUPPORTS_HALF_FLOAT)) { - variant = FSR_SHADER_VARIANT_NORMAL; - } else { - variant = FSR_SHADER_VARIANT_FALLBACK; - } - - shader_version = fsr_shader.version_create(); - pipeline.create_compute_pipeline(fsr_shader.version_get_shader(shader_version, variant)); -} - -FSR::~FSR() { - pipeline.free(); - fsr_shader.version_free(shader_version); -} - -void FSR::process(Ref p_render_buffers, RID p_source_rd_texture, RID p_destination_texture) { - UniformSetCacheRD *uniform_set_cache = UniformSetCacheRD::get_singleton(); - ERR_FAIL_NULL(uniform_set_cache); - MaterialStorage *material_storage = MaterialStorage::get_singleton(); - ERR_FAIL_NULL(material_storage); - - Size2i internal_size = p_render_buffers->get_internal_size(); - Size2i target_size = p_render_buffers->get_target_size(); - float fsr_upscale_sharpness = p_render_buffers->get_fsr_sharpness(); - - if (!p_render_buffers->has_texture(SNAME("FSR"), SNAME("upscale_texture"))) { - RD::DataFormat format = p_render_buffers->get_base_data_format(); - uint32_t usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT; - uint32_t layers = 1; // we only need one layer, in multiview we're processing one layer at a time. - - p_render_buffers->create_texture(SNAME("FSR"), SNAME("upscale_texture"), format, usage_bits, RD::TEXTURE_SAMPLES_1, target_size, layers); - } - - RID upscale_texture = p_render_buffers->get_texture(SNAME("FSR"), SNAME("upscale_texture")); - - FSRUpscalePushConstant push_constant; - memset(&push_constant, 0, sizeof(FSRUpscalePushConstant)); - - int dispatch_x = (target_size.x + 15) / 16; - int dispatch_y = (target_size.y + 15) / 16; - - RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, pipeline.get_rid()); - - push_constant.resolution_width = internal_size.width; - push_constant.resolution_height = internal_size.height; - push_constant.upscaled_width = target_size.width; - push_constant.upscaled_height = target_size.height; - push_constant.sharpness = fsr_upscale_sharpness; - - RID shader = fsr_shader.version_get_shader(shader_version, 0); - ERR_FAIL_COND(shader.is_null()); - - RID default_sampler = material_storage->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_LINEAR, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED); - - //FSR Easc - RD::Uniform u_source_rd_texture(RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE, 0, { default_sampler, p_source_rd_texture }); - RD::Uniform u_upscale_texture(RD::UNIFORM_TYPE_IMAGE, 0, { upscale_texture }); - - push_constant.pass = FSR_UPSCALE_PASS_EASU; - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 0, u_source_rd_texture), 0); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 1, u_upscale_texture), 1); - - RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(FSRUpscalePushConstant)); - - RD::get_singleton()->compute_list_dispatch(compute_list, dispatch_x, dispatch_y, 1); - RD::get_singleton()->compute_list_add_barrier(compute_list); - - //FSR Rcas - RD::Uniform u_upscale_texture_with_sampler(RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE, 0, { default_sampler, upscale_texture }); - RD::Uniform u_destination_texture(RD::UNIFORM_TYPE_IMAGE, 0, { p_destination_texture }); - - push_constant.pass = FSR_UPSCALE_PASS_RCAS; - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 0, u_upscale_texture_with_sampler), 0); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 1, u_destination_texture), 1); - - RD::get_singleton()->compute_list_set_push_constant(compute_list, &push_constant, sizeof(FSRUpscalePushConstant)); - - RD::get_singleton()->compute_list_dispatch(compute_list, dispatch_x, dispatch_y, 1); - - RD::get_singleton()->compute_list_end(); -} diff --git a/servers/rendering/renderer_rd/effects/fsr2.cpp b/servers/rendering/renderer_rd/effects/fsr2.cpp deleted file mode 100644 index a0a8d8a5b369..000000000000 --- a/servers/rendering/renderer_rd/effects/fsr2.cpp +++ /dev/null @@ -1,880 +0,0 @@ -/**************************************************************************/ -/* fsr2.cpp */ -/**************************************************************************/ -/* This file is part of: */ -/* GODOT ENGINE */ -/* https://godotengine.org */ -/**************************************************************************/ -/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ -/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ -/* */ -/* Permission is hereby granted, free of charge, to any person obtaining */ -/* a copy of this software and associated documentation files (the */ -/* "Software"), to deal in the Software without restriction, including */ -/* without limitation the rights to use, copy, modify, merge, publish, */ -/* distribute, sublicense, and/or sell copies of the Software, and to */ -/* permit persons to whom the Software is furnished to do so, subject to */ -/* the following conditions: */ -/* */ -/* The above copyright notice and this permission notice shall be */ -/* included in all copies or substantial portions of the Software. */ -/* */ -/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ -/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ -/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ -/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ -/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ -/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ -/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/**************************************************************************/ - -#include "fsr2.h" - -#include "../storage_rd/material_storage.h" -#include "../uniform_set_cache_rd.h" - -using namespace RendererRD; - -#ifndef _MSC_VER -#include -#define wcscpy_s wcscpy -#endif - -static RD::TextureType ffx_resource_type_to_rd_texture_type(FfxResourceType p_type) { - switch (p_type) { - case FFX_RESOURCE_TYPE_TEXTURE1D: - return RD::TEXTURE_TYPE_1D; - case FFX_RESOURCE_TYPE_TEXTURE2D: - return RD::TEXTURE_TYPE_2D; - case FFX_RESOURCE_TYPE_TEXTURE3D: - return RD::TEXTURE_TYPE_3D; - default: - return RD::TEXTURE_TYPE_MAX; - } -} - -static FfxResourceType rd_texture_type_to_ffx_resource_type(RD::TextureType p_type) { - switch (p_type) { - case RD::TEXTURE_TYPE_1D: - return FFX_RESOURCE_TYPE_TEXTURE1D; - case RD::TEXTURE_TYPE_2D: - return FFX_RESOURCE_TYPE_TEXTURE2D; - case RD::TEXTURE_TYPE_3D: - return FFX_RESOURCE_TYPE_TEXTURE3D; - default: - return FFX_RESOURCE_TYPE_BUFFER; - } -} - -static RD::DataFormat ffx_surface_format_to_rd_format(FfxSurfaceFormat p_format) { - switch (p_format) { - case FFX_SURFACE_FORMAT_R32G32B32A32_TYPELESS: - return RD::DATA_FORMAT_R32G32B32A32_SFLOAT; - case FFX_SURFACE_FORMAT_R32G32B32A32_FLOAT: - return RD::DATA_FORMAT_R32G32B32A32_SFLOAT; - case FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT: - return RD::DATA_FORMAT_R16G16B16A16_SFLOAT; - case FFX_SURFACE_FORMAT_R16G16B16A16_UNORM: - return RD::DATA_FORMAT_R16G16B16A16_UNORM; - case FFX_SURFACE_FORMAT_R32G32_FLOAT: - return RD::DATA_FORMAT_R32G32_SFLOAT; - case FFX_SURFACE_FORMAT_R32_UINT: - return RD::DATA_FORMAT_R32_UINT; - case FFX_SURFACE_FORMAT_R8G8B8A8_TYPELESS: - return RD::DATA_FORMAT_R8G8B8A8_UNORM; - case FFX_SURFACE_FORMAT_R8G8B8A8_UNORM: - return RD::DATA_FORMAT_R8G8B8A8_UNORM; - case FFX_SURFACE_FORMAT_R11G11B10_FLOAT: - return RD::DATA_FORMAT_B10G11R11_UFLOAT_PACK32; - case FFX_SURFACE_FORMAT_R16G16_FLOAT: - return RD::DATA_FORMAT_R16G16_SFLOAT; - case FFX_SURFACE_FORMAT_R16G16_UINT: - return RD::DATA_FORMAT_R16G16_UINT; - case FFX_SURFACE_FORMAT_R16_FLOAT: - return RD::DATA_FORMAT_R16_SFLOAT; - case FFX_SURFACE_FORMAT_R16_UINT: - return RD::DATA_FORMAT_R16_UINT; - case FFX_SURFACE_FORMAT_R16_UNORM: - return RD::DATA_FORMAT_R16_UNORM; - case FFX_SURFACE_FORMAT_R16_SNORM: - return RD::DATA_FORMAT_R16_SNORM; - case FFX_SURFACE_FORMAT_R8_UNORM: - return RD::DATA_FORMAT_R8_UNORM; - case FFX_SURFACE_FORMAT_R8_UINT: - return RD::DATA_FORMAT_R8_UINT; - case FFX_SURFACE_FORMAT_R8G8_UNORM: - return RD::DATA_FORMAT_R8G8_UNORM; - case FFX_SURFACE_FORMAT_R32_FLOAT: - return RD::DATA_FORMAT_R32_SFLOAT; - default: - return RD::DATA_FORMAT_MAX; - } -} - -static FfxSurfaceFormat rd_format_to_ffx_surface_format(RD::DataFormat p_format) { - switch (p_format) { - case RD::DATA_FORMAT_R32G32B32A32_SFLOAT: - return FFX_SURFACE_FORMAT_R32G32B32A32_FLOAT; - case RD::DATA_FORMAT_R16G16B16A16_SFLOAT: - return FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT; - case RD::DATA_FORMAT_R16G16B16A16_UNORM: - return FFX_SURFACE_FORMAT_R16G16B16A16_UNORM; - case RD::DATA_FORMAT_R32G32_SFLOAT: - return FFX_SURFACE_FORMAT_R32G32_FLOAT; - case RD::DATA_FORMAT_R32_UINT: - return FFX_SURFACE_FORMAT_R32_UINT; - case RD::DATA_FORMAT_R8G8B8A8_UNORM: - return FFX_SURFACE_FORMAT_R8G8B8A8_UNORM; - case RD::DATA_FORMAT_B10G11R11_UFLOAT_PACK32: - return FFX_SURFACE_FORMAT_R11G11B10_FLOAT; - case RD::DATA_FORMAT_R16G16_SFLOAT: - return FFX_SURFACE_FORMAT_R16G16_FLOAT; - case RD::DATA_FORMAT_R16G16_UINT: - return FFX_SURFACE_FORMAT_R16G16_UINT; - case RD::DATA_FORMAT_R16_SFLOAT: - return FFX_SURFACE_FORMAT_R16_FLOAT; - case RD::DATA_FORMAT_R16_UINT: - return FFX_SURFACE_FORMAT_R16_UINT; - case RD::DATA_FORMAT_R16_UNORM: - return FFX_SURFACE_FORMAT_R16_UNORM; - case RD::DATA_FORMAT_R16_SNORM: - return FFX_SURFACE_FORMAT_R16_SNORM; - case RD::DATA_FORMAT_R8_UNORM: - return FFX_SURFACE_FORMAT_R8_UNORM; - case RD::DATA_FORMAT_R8_UINT: - return FFX_SURFACE_FORMAT_R8_UINT; - case RD::DATA_FORMAT_R8G8_UNORM: - return FFX_SURFACE_FORMAT_R8G8_UNORM; - case RD::DATA_FORMAT_R32_SFLOAT: - return FFX_SURFACE_FORMAT_R32_FLOAT; - default: - return FFX_SURFACE_FORMAT_UNKNOWN; - } -} - -static uint32_t ffx_usage_to_rd_usage_flags(uint32_t p_flags) { - uint32_t ret = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT; - - if (p_flags & FFX_RESOURCE_USAGE_RENDERTARGET) { - ret |= RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT; - } - - if (p_flags & FFX_RESOURCE_USAGE_UAV) { - ret |= RD::TEXTURE_USAGE_STORAGE_BIT; - ret |= RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT; - ret |= RD::TEXTURE_USAGE_CAN_COPY_TO_BIT; - } - - return ret; -} - -static FfxErrorCode create_backend_context_rd(FfxFsr2Interface *p_backend_interface, FfxDevice p_device) { - FSR2Context::Scratch &scratch = *reinterpret_cast(p_backend_interface->scratchBuffer); - - // Store pointer to the device common to all contexts. - scratch.device = p_device; - - // Create a ring buffer of uniform buffers. - // FIXME: This could be optimized to be a single memory block if it was possible for RD to create views into a particular memory range of a UBO. - for (uint32_t i = 0; i < FSR2_UBO_RING_BUFFER_SIZE; i++) { - scratch.ubo_ring_buffer[i] = RD::get_singleton()->uniform_buffer_create(FFX_MAX_CONST_SIZE * sizeof(uint32_t)); - ERR_FAIL_COND_V(scratch.ubo_ring_buffer[i].is_null(), FFX_ERROR_BACKEND_API_ERROR); - } - - return FFX_OK; -} - -static FfxErrorCode get_device_capabilities_rd(FfxFsr2Interface *p_backend_interface, FfxDeviceCapabilities *p_out_device_capabilities, FfxDevice p_device) { - FSR2Effect::Device &effect_device = *reinterpret_cast(p_device); - - *p_out_device_capabilities = effect_device.capabilities; - - return FFX_OK; -} - -static FfxErrorCode destroy_backend_context_rd(FfxFsr2Interface *p_backend_interface) { - FSR2Context::Scratch &scratch = *reinterpret_cast(p_backend_interface->scratchBuffer); - - for (uint32_t i = 0; i < FSR2_UBO_RING_BUFFER_SIZE; i++) { - RD::get_singleton()->free_rid(scratch.ubo_ring_buffer[i]); - } - - return FFX_OK; -} - -static FfxErrorCode create_resource_rd(FfxFsr2Interface *p_backend_interface, const FfxCreateResourceDescription *p_create_resource_description, FfxResourceInternal *p_out_resource) { - // FSR2's base implementation won't issue a call to create a heap type that isn't just default on its own, - // so we can safely ignore it as RD does not expose this concept. - ERR_FAIL_COND_V(p_create_resource_description->heapType != FFX_HEAP_TYPE_DEFAULT, FFX_ERROR_INVALID_ARGUMENT); - - RenderingDevice *rd = RD::get_singleton(); - FSR2Context::Scratch &scratch = *reinterpret_cast(p_backend_interface->scratchBuffer); - FfxResourceDescription res_desc = p_create_resource_description->resourceDescription; - - // FSR2's base implementation never requests buffer creation. - ERR_FAIL_COND_V(res_desc.type != FFX_RESOURCE_TYPE_TEXTURE1D && res_desc.type != FFX_RESOURCE_TYPE_TEXTURE2D && res_desc.type != FFX_RESOURCE_TYPE_TEXTURE3D, FFX_ERROR_INVALID_ARGUMENT); - - if (res_desc.mipCount == 0) { - // Mipmap count must be derived from the resource's dimensions. - res_desc.mipCount = uint32_t(1 + std::floor(std::log2(MAX(MAX(res_desc.width, res_desc.height), res_desc.depth)))); - } - - Vector initial_data; - if (p_create_resource_description->initDataSize) { - PackedByteArray byte_array; - byte_array.resize(p_create_resource_description->initDataSize); - memcpy(byte_array.ptrw(), p_create_resource_description->initData, p_create_resource_description->initDataSize); - initial_data.push_back(byte_array); - } - - RD::TextureFormat texture_format; - texture_format.texture_type = ffx_resource_type_to_rd_texture_type(res_desc.type); - texture_format.format = ffx_surface_format_to_rd_format(res_desc.format); - texture_format.usage_bits = ffx_usage_to_rd_usage_flags(p_create_resource_description->usage); - texture_format.width = res_desc.width; - texture_format.height = res_desc.height; - texture_format.depth = res_desc.depth; - texture_format.mipmaps = res_desc.mipCount; - texture_format.is_discardable = true; - - RID texture = rd->texture_create(texture_format, RD::TextureView(), initial_data); - ERR_FAIL_COND_V(texture.is_null(), FFX_ERROR_BACKEND_API_ERROR); - - rd->set_resource_name(texture, String(p_create_resource_description->name)); - - // Add the resource to the storage and use the internal index to reference it. - p_out_resource->internalIndex = scratch.resources.add(texture, false, p_create_resource_description->id, res_desc); - - return FFX_OK; -} - -static FfxErrorCode register_resource_rd(FfxFsr2Interface *p_backend_interface, const FfxResource *p_in_resource, FfxResourceInternal *p_out_resource) { - if (p_in_resource->resource == nullptr) { - // Null resource case. - p_out_resource->internalIndex = -1; - return FFX_OK; - } - - FSR2Context::Scratch &scratch = *reinterpret_cast(p_backend_interface->scratchBuffer); - const RID &rid = *reinterpret_cast(p_in_resource->resource); - ERR_FAIL_COND_V(rid.is_null(), FFX_ERROR_INVALID_ARGUMENT); - - // Add the resource to the storage and use the internal index to reference it. - p_out_resource->internalIndex = scratch.resources.add(rid, true, FSR2Context::RESOURCE_ID_DYNAMIC, p_in_resource->description); - - return FFX_OK; -} - -static FfxErrorCode unregister_resources_rd(FfxFsr2Interface *p_backend_interface) { - FSR2Context::Scratch &scratch = *reinterpret_cast(p_backend_interface->scratchBuffer); - LocalVector dynamic_list_copy = scratch.resources.dynamic_list; - for (uint32_t i : dynamic_list_copy) { - scratch.resources.remove(i); - } - - return FFX_OK; -} - -static FfxResourceDescription get_resource_description_rd(FfxFsr2Interface *p_backend_interface, FfxResourceInternal p_resource) { - if (p_resource.internalIndex != -1) { - FSR2Context::Scratch &scratch = *reinterpret_cast(p_backend_interface->scratchBuffer); - return scratch.resources.descriptions[p_resource.internalIndex]; - } else { - return {}; - } -} - -static FfxErrorCode destroy_resource_rd(FfxFsr2Interface *p_backend_interface, FfxResourceInternal p_resource) { - if (p_resource.internalIndex != -1) { - FSR2Context::Scratch &scratch = *reinterpret_cast(p_backend_interface->scratchBuffer); - if (scratch.resources.rids[p_resource.internalIndex].is_valid()) { - RD::get_singleton()->free_rid(scratch.resources.rids[p_resource.internalIndex]); - scratch.resources.remove(p_resource.internalIndex); - } - } - - return FFX_OK; -} - -static FfxErrorCode create_pipeline_rd(FfxFsr2Interface *p_backend_interface, FfxFsr2Pass p_pass, const FfxPipelineDescription *p_pipeline_description, FfxPipelineState *p_out_pipeline) { - FSR2Context::Scratch &scratch = *reinterpret_cast(p_backend_interface->scratchBuffer); - FSR2Effect::Device &device = *reinterpret_cast(scratch.device); - FSR2Effect::Pass &effect_pass = device.passes[p_pass]; - - if (effect_pass.pipeline.pipeline_rid.is_null()) { - // Create pipeline for the device if it hasn't been created yet. - effect_pass.root_signature.shader_rid = effect_pass.shader->version_get_shader(effect_pass.shader_version, effect_pass.shader_variant); - ERR_FAIL_COND_V(effect_pass.root_signature.shader_rid.is_null(), FFX_ERROR_BACKEND_API_ERROR); - - effect_pass.pipeline.pipeline_rid = RD::get_singleton()->compute_pipeline_create(effect_pass.root_signature.shader_rid); - ERR_FAIL_COND_V(effect_pass.pipeline.pipeline_rid.is_null(), FFX_ERROR_BACKEND_API_ERROR); - } - - // While this is not their intended use, we use the pipeline and root signature pointers to store the - // RIDs to the pipeline and shader that RD needs for the compute pipeline. - p_out_pipeline->pipeline = reinterpret_cast(&effect_pass.pipeline); - p_out_pipeline->rootSignature = reinterpret_cast(&effect_pass.root_signature); - - p_out_pipeline->srvCount = effect_pass.sampled_bindings.size(); - ERR_FAIL_COND_V(p_out_pipeline->srvCount > FFX_MAX_NUM_SRVS, FFX_ERROR_OUT_OF_RANGE); - memcpy(p_out_pipeline->srvResourceBindings, effect_pass.sampled_bindings.ptr(), sizeof(FfxResourceBinding) * p_out_pipeline->srvCount); - - p_out_pipeline->uavCount = effect_pass.storage_bindings.size(); - ERR_FAIL_COND_V(p_out_pipeline->uavCount > FFX_MAX_NUM_UAVS, FFX_ERROR_OUT_OF_RANGE); - memcpy(p_out_pipeline->uavResourceBindings, effect_pass.storage_bindings.ptr(), sizeof(FfxResourceBinding) * p_out_pipeline->uavCount); - - p_out_pipeline->constCount = effect_pass.uniform_bindings.size(); - ERR_FAIL_COND_V(p_out_pipeline->constCount > FFX_MAX_NUM_CONST_BUFFERS, FFX_ERROR_OUT_OF_RANGE); - memcpy(p_out_pipeline->cbResourceBindings, effect_pass.uniform_bindings.ptr(), sizeof(FfxResourceBinding) * p_out_pipeline->constCount); - - bool low_resolution_mvs = (p_pipeline_description->contextFlags & FFX_FSR2_ENABLE_DISPLAY_RESOLUTION_MOTION_VECTORS) == 0; - - if (p_pass == FFX_FSR2_PASS_ACCUMULATE || p_pass == FFX_FSR2_PASS_ACCUMULATE_SHARPEN) { - // Change the binding for motion vectors in this particular pass if low resolution MVs are used. - if (low_resolution_mvs) { - FfxResourceBinding &binding = p_out_pipeline->srvResourceBindings[2]; - wcscpy_s(binding.name, L"r_dilated_motion_vectors"); - } - } - - return FFX_OK; -} - -static FfxErrorCode destroy_pipeline_rd(FfxFsr2Interface *p_backend_interface, FfxPipelineState *p_pipeline) { - // We don't want to destroy pipelines when the FSR2 API deems it necessary as it'll do so whenever the context is destroyed. - - return FFX_OK; -} - -static FfxErrorCode schedule_gpu_job_rd(FfxFsr2Interface *p_backend_interface, const FfxGpuJobDescription *p_job) { - ERR_FAIL_NULL_V(p_backend_interface, FFX_ERROR_INVALID_ARGUMENT); - ERR_FAIL_NULL_V(p_job, FFX_ERROR_INVALID_ARGUMENT); - - FSR2Context::Scratch &scratch = *reinterpret_cast(p_backend_interface->scratchBuffer); - scratch.gpu_jobs.push_back(*p_job); - - return FFX_OK; -} - -static FfxErrorCode execute_gpu_job_clear_float_rd(FSR2Context::Scratch &p_scratch, const FfxClearFloatJobDescription &p_job) { - RID resource = p_scratch.resources.rids[p_job.target.internalIndex]; - FfxResourceDescription &desc = p_scratch.resources.descriptions[p_job.target.internalIndex]; - - ERR_FAIL_COND_V(desc.type == FFX_RESOURCE_TYPE_BUFFER, FFX_ERROR_INVALID_ARGUMENT); - - Color color(p_job.color[0], p_job.color[1], p_job.color[2], p_job.color[3]); - RD::get_singleton()->texture_clear(resource, color, 0, desc.mipCount, 0, 1); - - return FFX_OK; -} - -static FfxErrorCode execute_gpu_job_copy_rd(FSR2Context::Scratch &p_scratch, const FfxCopyJobDescription &p_job) { - RID src = p_scratch.resources.rids[p_job.src.internalIndex]; - RID dst = p_scratch.resources.rids[p_job.dst.internalIndex]; - FfxResourceDescription &src_desc = p_scratch.resources.descriptions[p_job.src.internalIndex]; - FfxResourceDescription &dst_desc = p_scratch.resources.descriptions[p_job.dst.internalIndex]; - - ERR_FAIL_COND_V(src_desc.type == FFX_RESOURCE_TYPE_BUFFER, FFX_ERROR_INVALID_ARGUMENT); - ERR_FAIL_COND_V(dst_desc.type == FFX_RESOURCE_TYPE_BUFFER, FFX_ERROR_INVALID_ARGUMENT); - - for (uint32_t mip_level = 0; mip_level < src_desc.mipCount; mip_level++) { - RD::get_singleton()->texture_copy(src, dst, Vector3(0, 0, 0), Vector3(0, 0, 0), Vector3(src_desc.width, src_desc.height, src_desc.depth), mip_level, mip_level, 0, 0); - } - - return FFX_OK; -} - -static FfxErrorCode execute_gpu_job_compute_rd(FSR2Context::Scratch &p_scratch, const FfxComputeJobDescription &p_job) { - UniformSetCacheRD *uniform_set_cache = UniformSetCacheRD::get_singleton(); - ERR_FAIL_NULL_V(uniform_set_cache, FFX_ERROR_BACKEND_API_ERROR); - - FSR2Effect::RootSignature &root_signature = *reinterpret_cast(p_job.pipeline.rootSignature); - ERR_FAIL_COND_V(root_signature.shader_rid.is_null(), FFX_ERROR_INVALID_ARGUMENT); - - FSR2Effect::Pipeline &backend_pipeline = *reinterpret_cast(p_job.pipeline.pipeline); - ERR_FAIL_COND_V(backend_pipeline.pipeline_rid.is_null(), FFX_ERROR_INVALID_ARGUMENT); - - thread_local LocalVector compute_uniforms; - compute_uniforms.clear(); - - for (uint32_t i = 0; i < p_job.pipeline.srvCount; i++) { - RID texture_rid = p_scratch.resources.rids[p_job.srvs[i].internalIndex]; - RD::Uniform texture_uniform(RD::UNIFORM_TYPE_TEXTURE, p_job.pipeline.srvResourceBindings[i].slotIndex, texture_rid); - compute_uniforms.push_back(texture_uniform); - } - - for (uint32_t i = 0; i < p_job.pipeline.uavCount; i++) { - RID image_rid = p_scratch.resources.rids[p_job.uavs[i].internalIndex]; - RD::Uniform storage_uniform; - storage_uniform.uniform_type = RD::UNIFORM_TYPE_IMAGE; - storage_uniform.binding = p_job.pipeline.uavResourceBindings[i].slotIndex; - - if (p_job.uavMip[i] > 0) { - LocalVector &mip_slice_rids = p_scratch.resources.mip_slice_rids[p_job.uavs[i].internalIndex]; - if (mip_slice_rids.is_empty()) { - mip_slice_rids.resize(p_scratch.resources.descriptions[p_job.uavs[i].internalIndex].mipCount); - } - - ERR_FAIL_COND_V(p_job.uavMip[i] >= mip_slice_rids.size(), FFX_ERROR_INVALID_ARGUMENT); - - if (mip_slice_rids[p_job.uavMip[i]].is_null()) { - mip_slice_rids[p_job.uavMip[i]] = RD::get_singleton()->texture_create_shared_from_slice(RD::TextureView(), image_rid, 0, p_job.uavMip[i]); - } - - ERR_FAIL_COND_V(mip_slice_rids[p_job.uavMip[i]].is_null(), FFX_ERROR_BACKEND_API_ERROR); - - storage_uniform.append_id(mip_slice_rids[p_job.uavMip[i]]); - } else { - storage_uniform.append_id(image_rid); - } - - compute_uniforms.push_back(storage_uniform); - } - - for (uint32_t i = 0; i < p_job.pipeline.constCount; i++) { - RID buffer_rid = p_scratch.ubo_ring_buffer[p_scratch.ubo_ring_buffer_index]; - p_scratch.ubo_ring_buffer_index = (p_scratch.ubo_ring_buffer_index + 1) % FSR2_UBO_RING_BUFFER_SIZE; - - RD::get_singleton()->buffer_update(buffer_rid, 0, p_job.cbs[i].uint32Size * sizeof(uint32_t), p_job.cbs[i].data); - - RD::Uniform buffer_uniform(RD::UNIFORM_TYPE_UNIFORM_BUFFER, p_job.pipeline.cbResourceBindings[i].slotIndex, buffer_rid); - compute_uniforms.push_back(buffer_uniform); - } - - FSR2Effect::Device &device = *reinterpret_cast(p_scratch.device); - RD::Uniform u_point_clamp_sampler(RD::UniformType::UNIFORM_TYPE_SAMPLER, 0, device.point_clamp_sampler); - RD::Uniform u_linear_clamp_sampler(RD::UniformType::UNIFORM_TYPE_SAMPLER, 1, device.linear_clamp_sampler); - - RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); - RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, backend_pipeline.pipeline_rid); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(root_signature.shader_rid, 0, u_point_clamp_sampler, u_linear_clamp_sampler), 0); - RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache_vec(root_signature.shader_rid, 1, compute_uniforms), 1); - RD::get_singleton()->compute_list_dispatch(compute_list, p_job.dimensions[0], p_job.dimensions[1], p_job.dimensions[2]); - RD::get_singleton()->compute_list_end(); - - return FFX_OK; -} - -static FfxErrorCode execute_gpu_jobs_rd(FfxFsr2Interface *p_backend_interface, FfxCommandList p_command_list) { - ERR_FAIL_NULL_V(p_backend_interface, FFX_ERROR_INVALID_ARGUMENT); - - FSR2Context::Scratch &scratch = *reinterpret_cast(p_backend_interface->scratchBuffer); - FfxErrorCode error_code = FFX_OK; - for (const FfxGpuJobDescription &job : scratch.gpu_jobs) { - switch (job.jobType) { - case FFX_GPU_JOB_CLEAR_FLOAT: { - error_code = execute_gpu_job_clear_float_rd(scratch, job.clearJobDescriptor); - } break; - case FFX_GPU_JOB_COPY: { - error_code = execute_gpu_job_copy_rd(scratch, job.copyJobDescriptor); - } break; - case FFX_GPU_JOB_COMPUTE: { - error_code = execute_gpu_job_compute_rd(scratch, job.computeJobDescriptor); - } break; - default: { - error_code = FFX_ERROR_INVALID_ARGUMENT; - } break; - } - - if (error_code != FFX_OK) { - scratch.gpu_jobs.clear(); - return error_code; - } - } - - scratch.gpu_jobs.clear(); - - return FFX_OK; -} - -static FfxResource get_resource_rd(RID *p_rid, const wchar_t *p_name) { - FfxResource res = {}; - if (p_rid->is_null()) { - return res; - } - - wcscpy_s(res.name, p_name); - - RD::TextureFormat texture_format = RD::get_singleton()->texture_get_format(*p_rid); - res.description.type = rd_texture_type_to_ffx_resource_type(texture_format.texture_type); - res.description.format = rd_format_to_ffx_surface_format(texture_format.format); - res.description.width = texture_format.width; - res.description.height = texture_format.height; - res.description.depth = texture_format.depth; - res.description.mipCount = texture_format.mipmaps; - res.description.flags = FFX_RESOURCE_FLAGS_NONE; - res.resource = reinterpret_cast(p_rid); - res.isDepth = texture_format.usage_bits & RD::TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; - - return res; -} - -FSR2Context::~FSR2Context() { - ffxFsr2ContextDestroy(&fsr_context); -} - -FSR2Effect::FSR2Effect() { - FfxDeviceCapabilities &capabilities = device.capabilities; - capabilities.minimumSupportedShaderModel = FFX_SHADER_MODEL_5_1; - capabilities.waveLaneCountMin = 32; - capabilities.waveLaneCountMax = 32; - capabilities.fp16Supported = RD::get_singleton()->has_feature(RD::Features::SUPPORTS_HALF_FLOAT); - capabilities.raytracingSupported = false; - - String general_defines = - "\n#define FFX_GPU\n" - "\n#define FFX_GLSL 1\n" - "\n#define FFX_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS 1\n" - "\n#define FFX_FSR2_OPTION_HDR_COLOR_INPUT 1\n" - "\n#define FFX_FSR2_OPTION_INVERTED_DEPTH 1\n" - "\n#define FFX_FSR2_OPTION_GODOT_REACTIVE_MASK_CLAMP 1\n" - "\n#define FFX_FSR2_OPTION_GODOT_DERIVE_INVALID_MOTION_VECTORS 1\n"; - - Vector modes_single; - modes_single.push_back(""); - - Vector modes_with_fp16; - modes_with_fp16.push_back(""); - modes_with_fp16.push_back("\n#define FFX_HALF 1\n"); - - // Since Godot currently lacks a shader reflection mechanism to persist the name of the bindings in the shader cache and - // there's also no mechanism to compile the shaders offline, the bindings are created manually by looking at the GLSL - // files included in FSR2 and mapping the macro bindings (#define FSR2_BIND_*) to their respective implementation names. - // - // It is not guaranteed these will remain consistent at all between versions of FSR2, so it'll be necessary to keep these - // bindings up to date whenever the library is updated. In such cases, it is very likely the validation layer will throw an - // error if the bindings do not match. - - { - Pass &pass = device.passes[FFX_FSR2_PASS_DEPTH_CLIP]; - pass.shader = &shaders.depth_clip; - pass.shader->initialize(modes_with_fp16, general_defines); - pass.shader_version = pass.shader->version_create(); - pass.shader_variant = capabilities.fp16Supported ? 1 : 0; - - pass.sampled_bindings = { - FfxResourceBinding{ 0, 0, L"r_reconstructed_previous_nearest_depth" }, - FfxResourceBinding{ 1, 0, L"r_dilated_motion_vectors" }, - FfxResourceBinding{ 2, 0, L"r_dilatedDepth" }, - FfxResourceBinding{ 3, 0, L"r_reactive_mask" }, - FfxResourceBinding{ 4, 0, L"r_transparency_and_composition_mask" }, - FfxResourceBinding{ 6, 0, L"r_previous_dilated_motion_vectors" }, - FfxResourceBinding{ 7, 0, L"r_input_motion_vectors" }, - FfxResourceBinding{ 8, 0, L"r_input_color_jittered" }, - FfxResourceBinding{ 9, 0, L"r_input_depth" }, - FfxResourceBinding{ 10, 0, L"r_input_exposure" } - }; - - pass.storage_bindings = { - // FSR2_BIND_UAV_DEPTH_CLIP (11) does not point to anything. - FfxResourceBinding{ 12, 0, L"rw_dilated_reactive_masks" }, - FfxResourceBinding{ 13, 0, L"rw_prepared_input_color" } - }; - - pass.uniform_bindings = { - FfxResourceBinding{ 14, 0, L"cbFSR2" } - }; - } - - { - Pass &pass = device.passes[FFX_FSR2_PASS_RECONSTRUCT_PREVIOUS_DEPTH]; - pass.shader = &shaders.reconstruct_previous_depth; - pass.shader->initialize(modes_with_fp16, general_defines); - pass.shader_version = pass.shader->version_create(); - pass.shader_variant = capabilities.fp16Supported ? 1 : 0; - - pass.sampled_bindings = { - FfxResourceBinding{ 0, 0, L"r_input_motion_vectors" }, - FfxResourceBinding{ 1, 0, L"r_input_depth" }, - FfxResourceBinding{ 2, 0, L"r_input_color_jittered" }, - FfxResourceBinding{ 3, 0, L"r_input_exposure" }, - FfxResourceBinding{ 4, 0, L"r_luma_history" } - }; - - pass.storage_bindings = { - FfxResourceBinding{ 5, 0, L"rw_reconstructed_previous_nearest_depth" }, - FfxResourceBinding{ 6, 0, L"rw_dilated_motion_vectors" }, - FfxResourceBinding{ 7, 0, L"rw_dilatedDepth" }, - FfxResourceBinding{ 8, 0, L"rw_prepared_input_color" }, - FfxResourceBinding{ 9, 0, L"rw_luma_history" }, - // FSR2_BIND_UAV_LUMA_INSTABILITY (10) does not point to anything. - FfxResourceBinding{ 11, 0, L"rw_lock_input_luma" } - }; - - pass.uniform_bindings = { - FfxResourceBinding{ 12, 0, L"cbFSR2" } - }; - } - - { - Pass &pass = device.passes[FFX_FSR2_PASS_LOCK]; - pass.shader = &shaders.lock; - pass.shader->initialize(modes_with_fp16, general_defines); - pass.shader_version = pass.shader->version_create(); - pass.shader_variant = capabilities.fp16Supported ? 1 : 0; - - pass.sampled_bindings = { - FfxResourceBinding{ 0, 0, L"r_lock_input_luma" } - }; - - pass.storage_bindings = { - FfxResourceBinding{ 1, 0, L"rw_new_locks" }, - FfxResourceBinding{ 2, 0, L"rw_reconstructed_previous_nearest_depth" } - }; - - pass.uniform_bindings = { - FfxResourceBinding{ 3, 0, L"cbFSR2" } - }; - } - - { - Vector accumulate_modes_with_fp16; - accumulate_modes_with_fp16.push_back("\n"); - accumulate_modes_with_fp16.push_back("\n#define FFX_FSR2_OPTION_APPLY_SHARPENING 1\n"); - accumulate_modes_with_fp16.push_back("\n#define FFX_HALF 1\n"); - accumulate_modes_with_fp16.push_back("\n#define FFX_HALF 1\n#define FFX_FSR2_OPTION_APPLY_SHARPENING 1\n"); - - // Workaround: Disable FP16 path for the accumulate pass on NVIDIA due to reduced occupancy and high VRAM throughput. - const bool fp16_path_supported = RD::get_singleton()->get_device_vendor_name() != "NVIDIA"; - Pass &pass = device.passes[FFX_FSR2_PASS_ACCUMULATE]; - pass.shader = &shaders.accumulate; - pass.shader->initialize(accumulate_modes_with_fp16, general_defines); - pass.shader_version = pass.shader->version_create(); - pass.shader_variant = capabilities.fp16Supported && fp16_path_supported ? 2 : 0; - - pass.sampled_bindings = { - FfxResourceBinding{ 0, 0, L"r_input_exposure" }, - FfxResourceBinding{ 1, 0, L"r_dilated_reactive_masks" }, - FfxResourceBinding{ 2, 0, L"r_input_motion_vectors" }, - FfxResourceBinding{ 3, 0, L"r_internal_upscaled_color" }, - FfxResourceBinding{ 4, 0, L"r_lock_status" }, - FfxResourceBinding{ 5, 0, L"r_input_depth" }, - FfxResourceBinding{ 6, 0, L"r_prepared_input_color" }, - // FSR2_BIND_SRV_LUMA_INSTABILITY(7) does not point to anything. - FfxResourceBinding{ 8, 0, L"r_lanczos_lut" }, - FfxResourceBinding{ 9, 0, L"r_upsample_maximum_bias_lut" }, - FfxResourceBinding{ 10, 0, L"r_imgMips" }, - FfxResourceBinding{ 11, 0, L"r_auto_exposure" }, - FfxResourceBinding{ 12, 0, L"r_luma_history" } - }; - - pass.storage_bindings = { - FfxResourceBinding{ 13, 0, L"rw_internal_upscaled_color" }, - FfxResourceBinding{ 14, 0, L"rw_lock_status" }, - FfxResourceBinding{ 15, 0, L"rw_upscaled_output" }, - FfxResourceBinding{ 16, 0, L"rw_new_locks" }, - FfxResourceBinding{ 17, 0, L"rw_luma_history" } - }; - - pass.uniform_bindings = { - FfxResourceBinding{ 18, 0, L"cbFSR2" } - }; - - // Sharpen pass is a clone of the accumulate pass with the sharpening variant. - Pass &sharpen_pass = device.passes[FFX_FSR2_PASS_ACCUMULATE_SHARPEN]; - sharpen_pass = pass; - sharpen_pass.shader_variant = pass.shader_variant + 1; - } - - { - Pass &pass = device.passes[FFX_FSR2_PASS_RCAS]; - pass.shader = &shaders.rcas; - pass.shader->initialize(modes_single, general_defines); - pass.shader_version = pass.shader->version_create(); - - pass.sampled_bindings = { - FfxResourceBinding{ 0, 0, L"r_input_exposure" }, - FfxResourceBinding{ 1, 0, L"r_rcas_input" } - }; - - pass.storage_bindings = { - FfxResourceBinding{ 2, 0, L"rw_upscaled_output" } - }; - - pass.uniform_bindings = { - FfxResourceBinding{ 3, 0, L"cbFSR2" }, - FfxResourceBinding{ 4, 0, L"cbRCAS" } - }; - } - - { - Pass &pass = device.passes[FFX_FSR2_PASS_COMPUTE_LUMINANCE_PYRAMID]; - pass.shader = &shaders.compute_luminance_pyramid; - pass.shader->initialize(modes_single, general_defines); - pass.shader_version = pass.shader->version_create(); - - pass.sampled_bindings = { - FfxResourceBinding{ 0, 0, L"r_input_color_jittered" } - }; - - pass.storage_bindings = { - FfxResourceBinding{ 1, 0, L"rw_spd_global_atomic" }, - FfxResourceBinding{ 2, 0, L"rw_img_mip_shading_change" }, - FfxResourceBinding{ 3, 0, L"rw_img_mip_5" }, - FfxResourceBinding{ 4, 0, L"rw_auto_exposure" } - }; - - pass.uniform_bindings = { - FfxResourceBinding{ 5, 0, L"cbFSR2" }, - FfxResourceBinding{ 6, 0, L"cbSPD" } - }; - } - - { - Pass &pass = device.passes[FFX_FSR2_PASS_GENERATE_REACTIVE]; - pass.shader = &shaders.autogen_reactive; - pass.shader->initialize(modes_with_fp16, general_defines); - pass.shader_version = pass.shader->version_create(); - pass.shader_variant = capabilities.fp16Supported ? 1 : 0; - - pass.sampled_bindings = { - FfxResourceBinding{ 0, 0, L"r_input_opaque_only" }, - FfxResourceBinding{ 1, 0, L"r_input_color_jittered" } - }; - - pass.storage_bindings = { - FfxResourceBinding{ 2, 0, L"rw_output_autoreactive" } - }; - - pass.uniform_bindings = { - FfxResourceBinding{ 3, 0, L"cbGenerateReactive" }, - FfxResourceBinding{ 4, 0, L"cbFSR2" } - }; - } - - { - Pass &pass = device.passes[FFX_FSR2_PASS_TCR_AUTOGENERATE]; - pass.shader = &shaders.tcr_autogen; - pass.shader->initialize(modes_with_fp16, general_defines); - pass.shader_version = pass.shader->version_create(); - pass.shader_variant = capabilities.fp16Supported ? 1 : 0; - - pass.sampled_bindings = { - FfxResourceBinding{ 0, 0, L"r_input_opaque_only" }, - FfxResourceBinding{ 1, 0, L"r_input_color_jittered" }, - FfxResourceBinding{ 2, 0, L"r_input_motion_vectors" }, - FfxResourceBinding{ 3, 0, L"r_input_prev_color_pre_alpha" }, - FfxResourceBinding{ 4, 0, L"r_input_prev_color_post_alpha" }, - FfxResourceBinding{ 5, 0, L"r_reactive_mask" }, - FfxResourceBinding{ 6, 0, L"r_transparency_and_composition_mask" }, - FfxResourceBinding{ 13, 0, L"r_input_depth" } - }; - - pass.storage_bindings = { - FfxResourceBinding{ 7, 0, L"rw_output_autoreactive" }, - FfxResourceBinding{ 8, 0, L"rw_output_autocomposition" }, - FfxResourceBinding{ 9, 0, L"rw_output_prev_color_pre_alpha" }, - FfxResourceBinding{ 10, 0, L"rw_output_prev_color_post_alpha" } - }; - - pass.uniform_bindings = { - FfxResourceBinding{ 11, 0, L"cbFSR2" }, - FfxResourceBinding{ 12, 0, L"cbGenerateReactive" } - }; - } - - RD::SamplerState state; - state.mag_filter = RD::SAMPLER_FILTER_NEAREST; - state.min_filter = RD::SAMPLER_FILTER_NEAREST; - state.repeat_u = RD::SAMPLER_REPEAT_MODE_CLAMP_TO_EDGE; - state.repeat_v = RD::SAMPLER_REPEAT_MODE_CLAMP_TO_EDGE; - state.repeat_w = RD::SAMPLER_REPEAT_MODE_CLAMP_TO_EDGE; - state.min_lod = -1000.0f; - state.max_lod = 1000.0f; - state.anisotropy_max = 1.0; - device.point_clamp_sampler = RD::get_singleton()->sampler_create(state); - ERR_FAIL_COND(device.point_clamp_sampler.is_null()); - - state.mag_filter = RD::SAMPLER_FILTER_LINEAR; - state.min_filter = RD::SAMPLER_FILTER_LINEAR; - device.linear_clamp_sampler = RD::get_singleton()->sampler_create(state); - ERR_FAIL_COND(device.linear_clamp_sampler.is_null()); -} - -FSR2Effect::~FSR2Effect() { - RD::get_singleton()->free_rid(device.point_clamp_sampler); - RD::get_singleton()->free_rid(device.linear_clamp_sampler); - - for (uint32_t i = 0; i < FFX_FSR2_PASS_COUNT; i++) { - device.passes[i].shader->version_free(device.passes[i].shader_version); - } -} - -FSR2Context *FSR2Effect::create_context(Size2i p_internal_size, Size2i p_target_size) { - FSR2Context *context = memnew(RendererRD::FSR2Context); - context->fsr_desc.flags = FFX_FSR2_ENABLE_HIGH_DYNAMIC_RANGE | FFX_FSR2_ENABLE_DEPTH_INVERTED; - context->fsr_desc.maxRenderSize.width = p_internal_size.x; - context->fsr_desc.maxRenderSize.height = p_internal_size.y; - context->fsr_desc.displaySize.width = p_target_size.x; - context->fsr_desc.displaySize.height = p_target_size.y; - context->fsr_desc.device = &device; - - FfxFsr2Interface &functions = context->fsr_desc.callbacks; - functions.fpCreateBackendContext = create_backend_context_rd; - functions.fpGetDeviceCapabilities = get_device_capabilities_rd; - functions.fpDestroyBackendContext = destroy_backend_context_rd; - functions.fpCreateResource = create_resource_rd; - functions.fpRegisterResource = register_resource_rd; - functions.fpUnregisterResources = unregister_resources_rd; - functions.fpGetResourceDescription = get_resource_description_rd; - functions.fpDestroyResource = destroy_resource_rd; - functions.fpCreatePipeline = create_pipeline_rd; - functions.fpDestroyPipeline = destroy_pipeline_rd; - functions.fpScheduleGpuJob = schedule_gpu_job_rd; - functions.fpExecuteGpuJobs = execute_gpu_jobs_rd; - functions.scratchBuffer = &context->scratch; - functions.scratchBufferSize = sizeof(context->scratch); - - FfxErrorCode result = ffxFsr2ContextCreate(&context->fsr_context, &context->fsr_desc); - if (result == FFX_OK) { - return context; - } else { - memdelete(context); - return nullptr; - } -} - -void FSR2Effect::upscale(const Parameters &p_params) { - // TODO: Transparency & Composition mask is not implemented. - FfxFsr2DispatchDescription dispatch_desc = {}; - RID color = p_params.color; - RID depth = p_params.depth; - RID velocity = p_params.velocity; - RID reactive = p_params.reactive; - RID exposure = p_params.exposure; - RID output = p_params.output; - dispatch_desc.commandList = nullptr; - dispatch_desc.color = get_resource_rd(&color, L"color"); - dispatch_desc.depth = get_resource_rd(&depth, L"depth"); - dispatch_desc.motionVectors = get_resource_rd(&velocity, L"velocity"); - dispatch_desc.reactive = get_resource_rd(&reactive, L"reactive"); - dispatch_desc.exposure = get_resource_rd(&exposure, L"exposure"); - dispatch_desc.transparencyAndComposition = {}; - dispatch_desc.output = get_resource_rd(&output, L"output"); - dispatch_desc.colorOpaqueOnly = {}; - dispatch_desc.jitterOffset.x = p_params.jitter.x; - dispatch_desc.jitterOffset.y = p_params.jitter.y; - dispatch_desc.motionVectorScale.x = float(p_params.internal_size.width); - dispatch_desc.motionVectorScale.y = float(p_params.internal_size.height); - dispatch_desc.reset = p_params.reset_accumulation; - dispatch_desc.renderSize.width = p_params.internal_size.width; - dispatch_desc.renderSize.height = p_params.internal_size.height; - dispatch_desc.enableSharpening = (p_params.sharpness > 1e-6f); - dispatch_desc.sharpness = p_params.sharpness; - dispatch_desc.frameTimeDelta = p_params.delta_time; - dispatch_desc.preExposure = 1.0f; - dispatch_desc.cameraNear = p_params.z_near; - dispatch_desc.cameraFar = p_params.z_far; - dispatch_desc.cameraFovAngleVertical = p_params.fovy; - dispatch_desc.viewSpaceToMetersFactor = 1.0f; - dispatch_desc.enableAutoReactive = false; - dispatch_desc.autoTcThreshold = 1.0f; - dispatch_desc.autoTcScale = 1.0f; - dispatch_desc.autoReactiveScale = 1.0f; - dispatch_desc.autoReactiveMax = 1.0f; - - RendererRD::MaterialStorage::store_camera(p_params.reprojection, dispatch_desc.reprojectionMatrix); - - FfxErrorCode result = ffxFsr2ContextDispatch(&p_params.context->fsr_context, &dispatch_desc); - ERR_FAIL_COND(result != FFX_OK); -} diff --git a/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp b/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp index 10f96eeb060f..4224f388d2bf 100644 --- a/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp +++ b/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp @@ -84,7 +84,13 @@ void RenderForwardClustered::RenderBufferDataForwardClustered::ensure_voxelgi() void RenderForwardClustered::RenderBufferDataForwardClustered::ensure_fsr2(RendererRD::FSR2Effect *p_effect) { if (fsr2_context == nullptr) { - fsr2_context = p_effect->create_context(render_buffers->get_internal_size(), render_buffers->get_target_size()); + fsr2_context = p_effect->create_context(render_buffers->get_internal_size(), render_buffers->get_target_size(), render_buffers->get_fsr_auto_generate_reactive()); + } +} + +void RenderForwardClustered::RenderBufferDataForwardClustered::ensure_fsr3_upscaler(RendererRD::FSR3UpscalerEffect *p_effect) { + if (fsr3_upscaler_context == nullptr) { + fsr3_upscaler_context = p_effect->create_context(render_buffers->get_internal_size(), render_buffers->get_target_size(), render_buffers->get_fsr_auto_generate_reactive()); } } @@ -127,6 +133,11 @@ void RenderForwardClustered::RenderBufferDataForwardClustered::free_data() { fsr2_context = nullptr; } + if (fsr3_upscaler_context) { + memdelete(fsr3_upscaler_context); + fsr3_upscaler_context = nullptr; + } + #ifdef METAL_MFXTEMPORAL_ENABLED if (mfx_temporal_context) { memdelete(mfx_temporal_context); @@ -1738,6 +1749,7 @@ void RenderForwardClustered::_render_scene(RenderDataRD *p_render_data, const Co enum { SCALE_NONE, SCALE_FSR2, + SCALE_FSR3, SCALE_MFX, } scale_type = SCALE_NONE; @@ -1745,6 +1757,9 @@ void RenderForwardClustered::_render_scene(RenderDataRD *p_render_data, const Co case RS::VIEWPORT_SCALING_3D_MODE_FSR2: scale_type = SCALE_FSR2; break; + case RS::VIEWPORT_SCALING_3D_MODE_FSR3: + scale_type = SCALE_FSR3; + break; case RS::VIEWPORT_SCALING_3D_MODE_METALFX_TEMPORAL: #ifdef METAL_MFXTEMPORAL_ENABLED scale_type = SCALE_MFX; @@ -2359,6 +2374,15 @@ void RenderForwardClustered::_render_scene(RenderDataRD *p_render_data, const Co _process_compositor_effects(RS::COMPOSITOR_EFFECT_CALLBACK_TYPE_PRE_TRANSPARENT, p_render_data); } + // Prepare opaque-only texture for reactive mask generation if needed. + if (rb->get_fsr_auto_generate_reactive()) { + rb->ensure_opaque_only_color_texture(); + RD::get_singleton()->draw_command_begin_label("Copy Opaque-only Color for FSR"); + Size2i copy_size = rb->get_internal_size(); + copy_effects->copy_to_rect(rb->get_internal_texture(), rb->get_opaque_only_color_texture(), Rect2i(0, 0, copy_size.width, copy_size.height)); + RD::get_singleton()->draw_command_end_label(); + } + RENDER_TIMESTAMP("Render 3D Transparent Pass"); RD::get_singleton()->draw_command_begin_label("Render 3D Transparent Pass"); @@ -2434,6 +2458,11 @@ void RenderForwardClustered::_render_scene(RenderDataRD *p_render_data, const Co params.depth = rb->get_depth_texture(v); params.velocity = rb->get_velocity_buffer(false, v); params.reactive = rb->get_internal_texture_reactive(v); + if (rb->get_fsr_auto_generate_reactive()) { + // Provide opaque only texture for reactive mask generation. + params.opaque_only = rb->get_opaque_only_color_texture(v); + } + params.exposure = exposure; params.output = rb->get_upscaled_texture(v); params.z_near = p_render_data->scene_data->z_near; @@ -2455,6 +2484,57 @@ void RenderForwardClustered::_render_scene(RenderDataRD *p_render_data, const Co fsr2_effect->upscale(params); } + RD::get_singleton()->draw_command_end_label(); + } else if (scale_type == SCALE_FSR3) { + rb_data->ensure_fsr3_upscaler(fsr3_upscaler_effect); + + RID exposure; + if (RSG::camera_attributes->camera_attributes_uses_auto_exposure(p_render_data->camera_attributes)) { + exposure = luminance->get_current_luminance_buffer(rb); + } + + RD::get_singleton()->draw_command_begin_label("FSR3 Upscaler"); + RENDER_TIMESTAMP("FSR3 Upscaler"); + + for (uint32_t v = 0; v < rb->get_view_count(); v++) { + real_t fov = p_render_data->scene_data->cam_projection.get_fov(); + real_t aspect = p_render_data->scene_data->cam_projection.get_aspect(); + real_t fovy = p_render_data->scene_data->cam_projection.get_fovy(fov, 1.0 / aspect); + Vector2 jitter = p_render_data->scene_data->taa_jitter * Vector2(rb->get_internal_size()) * 0.5f; + RendererRD::FSR3UpscalerEffect::Parameters params; + params.context = rb_data->get_fsr3_upscaler_context(); + params.internal_size = rb->get_internal_size(); + params.sharpness = CLAMP(1.0f - (rb->get_fsr_sharpness() / 2.0f), 0.0f, 1.0f); + params.color = rb->get_internal_texture(v); + params.depth = rb->get_depth_texture(v); + params.velocity = rb->get_velocity_buffer(false, v); + params.reactive = rb->get_internal_texture_reactive(v); + if (rb->get_fsr_auto_generate_reactive()) { + // Provide opaque only texture for reactive mask generation. + params.opaque_only = rb->get_opaque_only_color_texture(v); + } + + params.exposure = exposure; + params.output = rb->get_upscaled_texture(v); + params.z_near = p_render_data->scene_data->z_near; + params.z_far = p_render_data->scene_data->z_far; + params.fovy = fovy; + params.jitter = jitter; + params.delta_time = float(time_step); + params.reset_accumulation = false; // FIXME: The engine does not provide a way to reset the accumulation. + + Projection correction; + correction.set_depth_correction(true, true, false); + + const Projection &prev_proj = p_render_data->scene_data->prev_cam_projection; + const Projection &cur_proj = p_render_data->scene_data->cam_projection; + const Transform3D &prev_transform = p_render_data->scene_data->prev_cam_transform; + const Transform3D &cur_transform = p_render_data->scene_data->cam_transform; + params.reprojection = (correction * prev_proj) * prev_transform.affine_inverse() * cur_transform * (correction * cur_proj).inverse(); + + fsr3_upscaler_effect->upscale(params); + } + RD::get_singleton()->draw_command_end_label(); } else if (scale_type == SCALE_MFX) { #ifdef METAL_MFXTEMPORAL_ENABLED @@ -5094,6 +5174,7 @@ RenderForwardClustered::RenderForwardClustered() { taa = memnew(RendererRD::TAA); fsr2_effect = memnew(RendererRD::FSR2Effect); + fsr3_upscaler_effect = memnew(RendererRD::FSR3UpscalerEffect); ss_effects = memnew(RendererRD::SSEffects); #ifdef METAL_MFXTEMPORAL_ENABLED motion_vectors_store = memnew(RendererRD::MotionVectorsStore); @@ -5117,6 +5198,11 @@ RenderForwardClustered::~RenderForwardClustered() { fsr2_effect = nullptr; } + if (fsr3_upscaler_effect) { + memdelete(fsr3_upscaler_effect); + fsr3_upscaler_effect = nullptr; + } + #ifdef METAL_MFXTEMPORAL_ENABLED if (mfx_temporal_effect) { memdelete(mfx_temporal_effect); diff --git a/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.h b/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.h index a5b3199f3123..1924a8119101 100644 --- a/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.h +++ b/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.h @@ -33,7 +33,8 @@ #include "core/templates/paged_allocator.h" #include "servers/rendering/multi_uma_buffer.h" #include "servers/rendering/renderer_rd/cluster_builder_rd.h" -#include "servers/rendering/renderer_rd/effects/fsr2.h" +#include "servers/rendering/renderer_rd/effects/ffx/fsr2.h" +#include "servers/rendering/renderer_rd/effects/ffx/fsr3_upscaler.h" #ifdef METAL_ENABLED #include "servers/rendering/renderer_rd/effects/metal_fx.h" #endif @@ -95,6 +96,7 @@ class RenderForwardClustered : public RendererSceneRenderRD { private: RenderSceneBuffersRD *render_buffers = nullptr; RendererRD::FSR2Context *fsr2_context = nullptr; + RendererRD::FSR3UpscalerContext *fsr3_upscaler_context = nullptr; #ifdef METAL_MFXTEMPORAL_ENABLED RendererRD::MFXTemporalContext *mfx_temporal_context = nullptr; #endif @@ -142,7 +144,9 @@ class RenderForwardClustered : public RendererSceneRenderRD { RID get_voxelgi_msaa(uint32_t p_layer) { return render_buffers->get_texture_slice(RB_SCOPE_FORWARD_CLUSTERED, RB_TEX_VOXEL_GI_MSAA, p_layer, 0); } void ensure_fsr2(RendererRD::FSR2Effect *p_effect); + void ensure_fsr3_upscaler(RendererRD::FSR3UpscalerEffect *p_effect); RendererRD::FSR2Context *get_fsr2_context() const { return fsr2_context; } + RendererRD::FSR3UpscalerContext *get_fsr3_upscaler_context() const { return fsr3_upscaler_context; } #ifdef METAL_MFXTEMPORAL_ENABLED bool ensure_mfx_temporal(RendererRD::MFXTemporalEffect *p_effect); @@ -730,6 +734,7 @@ class RenderForwardClustered : public RendererSceneRenderRD { RendererRD::TAA *taa = nullptr; RendererRD::FSR2Effect *fsr2_effect = nullptr; + RendererRD::FSR3UpscalerEffect *fsr3_upscaler_effect = nullptr; RendererRD::SSEffects *ss_effects = nullptr; #ifdef METAL_MFXTEMPORAL_ENABLED diff --git a/servers/rendering/renderer_rd/renderer_scene_render_rd.cpp b/servers/rendering/renderer_rd/renderer_scene_render_rd.cpp index 20443d66d292..7e8a86d5328f 100644 --- a/servers/rendering/renderer_rd/renderer_scene_render_rd.cpp +++ b/servers/rendering/renderer_rd/renderer_scene_render_rd.cpp @@ -464,11 +464,11 @@ void RendererSceneRenderRD::_render_buffers_post_process_and_tonemap(const Rende bool can_use_storage = _render_buffers_can_be_storage(); RS::ViewportScaling3DMode scale_mode = rb->get_scaling_3d_mode(); - bool use_upscaled_texture = rb->has_upscaled_texture() && (scale_mode == RS::VIEWPORT_SCALING_3D_MODE_FSR2 || scale_mode == RS::VIEWPORT_SCALING_3D_MODE_METALFX_TEMPORAL); + bool use_upscaled_texture = rb->has_upscaled_texture() && (scale_mode == RS::VIEWPORT_SCALING_3D_MODE_FSR2 || scale_mode == RS::VIEWPORT_SCALING_3D_MODE_FSR3 || scale_mode == RS::VIEWPORT_SCALING_3D_MODE_METALFX_TEMPORAL); SpatialUpscaler *spatial_upscaler = nullptr; if (can_use_effects) { - if (scale_mode == RS::VIEWPORT_SCALING_3D_MODE_FSR) { - spatial_upscaler = fsr; + if (scale_mode == RS::VIEWPORT_SCALING_3D_MODE_FSR1) { + spatial_upscaler = fsr1; } else if (scale_mode == RS::VIEWPORT_SCALING_3D_MODE_METALFX_SPATIAL) { #if METAL_ENABLED spatial_upscaler = mfx_spatial; @@ -1696,7 +1696,7 @@ void RendererSceneRenderRD::init() { vrs = memnew(RendererRD::VRS); } if (can_use_storage) { - fsr = memnew(RendererRD::FSR); + fsr1 = memnew(RendererRD::FSR1Effect); } #ifdef METAL_ENABLED mfx_spatial = memnew(RendererRD::MFXSpatialEffect); @@ -1730,8 +1730,8 @@ RendererSceneRenderRD::~RendererSceneRenderRD() { if (vrs) { memdelete(vrs); } - if (fsr) { - memdelete(fsr); + if (fsr1) { + memdelete(fsr1); } #ifdef METAL_ENABLED if (mfx_spatial) { diff --git a/servers/rendering/renderer_rd/renderer_scene_render_rd.h b/servers/rendering/renderer_rd/renderer_scene_render_rd.h index 831140da357d..40560ee8bcfb 100644 --- a/servers/rendering/renderer_rd/renderer_scene_render_rd.h +++ b/servers/rendering/renderer_rd/renderer_scene_render_rd.h @@ -34,7 +34,7 @@ #include "servers/rendering/renderer_rd/effects/bokeh_dof.h" #include "servers/rendering/renderer_rd/effects/copy_effects.h" #include "servers/rendering/renderer_rd/effects/debug_effects.h" -#include "servers/rendering/renderer_rd/effects/fsr.h" +#include "servers/rendering/renderer_rd/effects/ffx/fsr1.h" #include "servers/rendering/renderer_rd/effects/luminance.h" #ifdef METAL_ENABLED #include "servers/rendering/renderer_rd/effects/metal_fx.h" @@ -65,9 +65,9 @@ class RendererSceneRenderRD : public RendererSceneRender, public RenderingShader RendererRD::Luminance *luminance = nullptr; RendererRD::SMAA *smaa = nullptr; RendererRD::ToneMapper *tone_mapper = nullptr; - RendererRD::FSR *fsr = nullptr; RendererRD::VRS *vrs = nullptr; RendererRD::Resolve *resolve_effects = nullptr; + RendererRD::FSR1Effect *fsr1 = nullptr; #ifdef METAL_ENABLED RendererRD::MFXSpatialEffect *mfx_spatial = nullptr; #endif diff --git a/servers/rendering/renderer_rd/shaders/effects/SCsub b/servers/rendering/renderer_rd/shaders/effects/SCsub index e5517e52eba0..4dd8a4cdc206 100644 --- a/servers/rendering/renderer_rd/shaders/effects/SCsub +++ b/servers/rendering/renderer_rd/shaders/effects/SCsub @@ -17,4 +17,4 @@ if "RD_GLSL" in env["BUILDERS"]: for glsl_file in glsl_files: env.RD_GLSL(glsl_file) -SConscript("fsr2/SCsub") +SConscript("ffx/SCsub") diff --git a/servers/rendering/renderer_rd/shaders/effects/ffx/SCsub b/servers/rendering/renderer_rd/shaders/effects/ffx/SCsub new file mode 100644 index 000000000000..4d6ae4ba7481 --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/effects/ffx/SCsub @@ -0,0 +1,28 @@ +#!/usr/bin/env python +from misc.utility.scons_hints import * + +Import("env") + +if "RD_GLSL" in env["BUILDERS"]: + # find all include files + gl_include_files = ( + [str(f) for f in Glob("*_inc.glsl")] + + [str(f) for f in Glob("../*_inc.glsl")] + + [str(f) for f in Glob("*/*_inc.glsl")] + ) + + # Add all FSR2 shader and header files. + ffx_dir = "#thirdparty/amd-ffx" + gl_include_files += [str(f) for f in Glob(ffx_dir + "/shaders/*/*.glsl")] + gl_include_files += [str(f) for f in Glob(ffx_dir + "/gpu/*.h")] + gl_include_files += [str(f) for f in Glob(ffx_dir + "/gpu/*/*.h")] + + # find all shader code(all glsl files excluding our include files) + glsl_files = [str(f) for f in Glob("*/*.glsl") if str(f) not in gl_include_files] + + # make sure we recompile shaders if include files change + env.Depends([f + ".gen.h" for f in glsl_files], gl_include_files + ["#glsl_builders.py"]) + + # compile shaders + for glsl_file in glsl_files: + env.RD_GLSL(glsl_file) diff --git a/servers/rendering/renderer_rd/shaders/effects/ffx/fsr1/fsr1_easu_pass.glsl b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr1/fsr1_easu_pass.glsl new file mode 100644 index 000000000000..5467a16c6ba7 --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr1/fsr1_easu_pass.glsl @@ -0,0 +1,7 @@ +#[compute] + +#version 450 + +#VERSION_DEFINES + +#include "thirdparty/amd-ffx/shaders/fsr1/ffx_fsr1_easu_pass.glsl" diff --git a/servers/rendering/renderer_rd/shaders/effects/ffx/fsr1/fsr1_rcas_pass.glsl b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr1/fsr1_rcas_pass.glsl new file mode 100644 index 000000000000..c3d0d33f983b --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr1/fsr1_rcas_pass.glsl @@ -0,0 +1,7 @@ +#[compute] + +#version 450 + +#VERSION_DEFINES + +#include "thirdparty/amd-ffx/shaders/fsr1/ffx_fsr1_rcas_pass.glsl" diff --git a/servers/rendering/renderer_rd/shaders/effects/ffx/fsr2/fsr2_accumulate_pass.glsl b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr2/fsr2_accumulate_pass.glsl new file mode 100644 index 000000000000..8adc90f1ea73 --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr2/fsr2_accumulate_pass.glsl @@ -0,0 +1,8 @@ +#[compute] + +#version 450 + +#VERSION_DEFINES + +#include "../../motion_vector_inc.glsl" +#include "thirdparty/amd-ffx/shaders/fsr2/ffx_fsr2_accumulate_pass.glsl" diff --git a/servers/rendering/renderer_rd/shaders/effects/ffx/fsr2/fsr2_autogen_reactive_pass.glsl b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr2/fsr2_autogen_reactive_pass.glsl new file mode 100644 index 000000000000..bf6e1aedd251 --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr2/fsr2_autogen_reactive_pass.glsl @@ -0,0 +1,8 @@ +#[compute] + +#version 450 + +#VERSION_DEFINES + +#include "../../motion_vector_inc.glsl" +#include "thirdparty/amd-ffx/shaders/fsr2/ffx_fsr2_autogen_reactive_pass.glsl" diff --git a/servers/rendering/renderer_rd/shaders/effects/ffx/fsr2/fsr2_compute_luminance_pyramid_pass.glsl b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr2/fsr2_compute_luminance_pyramid_pass.glsl new file mode 100644 index 000000000000..2a21d360d422 --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr2/fsr2_compute_luminance_pyramid_pass.glsl @@ -0,0 +1,7 @@ +#[compute] + +#version 450 + +#VERSION_DEFINES + +#include "thirdparty/amd-ffx/shaders/fsr2/ffx_fsr2_compute_luminance_pyramid_pass.glsl" diff --git a/servers/rendering/renderer_rd/shaders/effects/ffx/fsr2/fsr2_depth_clip_pass.glsl b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr2/fsr2_depth_clip_pass.glsl new file mode 100644 index 000000000000..511f1e26ff75 --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr2/fsr2_depth_clip_pass.glsl @@ -0,0 +1,8 @@ +#[compute] + +#version 450 + +#VERSION_DEFINES + +#include "../../motion_vector_inc.glsl" +#include "thirdparty/amd-ffx/shaders/fsr2/ffx_fsr2_depth_clip_pass.glsl" diff --git a/servers/rendering/renderer_rd/shaders/effects/ffx/fsr2/fsr2_lock_pass.glsl b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr2/fsr2_lock_pass.glsl new file mode 100644 index 000000000000..586362f6bedb --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr2/fsr2_lock_pass.glsl @@ -0,0 +1,7 @@ +#[compute] + +#version 450 + +#VERSION_DEFINES + +#include "thirdparty/amd-ffx/shaders/fsr2/ffx_fsr2_lock_pass.glsl" diff --git a/servers/rendering/renderer_rd/shaders/effects/ffx/fsr2/fsr2_rcas_pass.glsl b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr2/fsr2_rcas_pass.glsl new file mode 100644 index 000000000000..e0134b9ca71c --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr2/fsr2_rcas_pass.glsl @@ -0,0 +1,7 @@ +#[compute] + +#version 450 + +#VERSION_DEFINES + +#include "thirdparty/amd-ffx/shaders/fsr2/ffx_fsr2_rcas_pass.glsl" diff --git a/servers/rendering/renderer_rd/shaders/effects/ffx/fsr2/fsr2_reconstruct_previous_depth_pass.glsl b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr2/fsr2_reconstruct_previous_depth_pass.glsl new file mode 100644 index 000000000000..370e2ecc9359 --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr2/fsr2_reconstruct_previous_depth_pass.glsl @@ -0,0 +1,8 @@ +#[compute] + +#version 450 + +#VERSION_DEFINES + +#include "../../motion_vector_inc.glsl" +#include "thirdparty/amd-ffx/shaders/fsr2/ffx_fsr2_reconstruct_previous_depth_pass.glsl" diff --git a/servers/rendering/renderer_rd/shaders/effects/ffx/fsr2/fsr2_tcr_autogen_pass.glsl b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr2/fsr2_tcr_autogen_pass.glsl new file mode 100644 index 000000000000..09114ac46a84 --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr2/fsr2_tcr_autogen_pass.glsl @@ -0,0 +1,8 @@ +#[compute] + +#version 450 + +#VERSION_DEFINES + +#include "../../motion_vector_inc.glsl" +#include "thirdparty/amd-ffx/shaders/fsr2/ffx_fsr2_tcr_autogen_pass.glsl" diff --git a/servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_accumulate_pass.glsl b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_accumulate_pass.glsl new file mode 100644 index 000000000000..4de99bf9c901 --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_accumulate_pass.glsl @@ -0,0 +1,8 @@ +#[compute] + +#version 450 + +#VERSION_DEFINES + +#include "../../motion_vector_inc.glsl" +#include "thirdparty/amd-ffx/shaders/fsr3upscaler/ffx_fsr3upscaler_accumulate_pass.glsl" diff --git a/servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_autogen_reactive_pass.glsl b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_autogen_reactive_pass.glsl new file mode 100644 index 000000000000..5ef9e308ba57 --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_autogen_reactive_pass.glsl @@ -0,0 +1,8 @@ +#[compute] + +#version 450 + +#VERSION_DEFINES + +#include "../../motion_vector_inc.glsl" +#include "thirdparty/amd-ffx/shaders/fsr3upscaler/ffx_fsr3upscaler_autogen_reactive_pass.glsl" diff --git a/servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_debug_view_pass.glsl b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_debug_view_pass.glsl new file mode 100644 index 000000000000..acff62a7a969 --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_debug_view_pass.glsl @@ -0,0 +1,8 @@ +#[compute] + +#version 450 + +#VERSION_DEFINES + +#include "../../motion_vector_inc.glsl" +#include "thirdparty/amd-ffx/shaders/fsr3upscaler/ffx_fsr3upscaler_debug_view_pass.glsl" diff --git a/servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_luma_instability_pass.glsl b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_luma_instability_pass.glsl new file mode 100644 index 000000000000..33451b183bfd --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_luma_instability_pass.glsl @@ -0,0 +1,7 @@ +#[compute] + +#version 450 + +#VERSION_DEFINES + +#include "thirdparty/amd-ffx/shaders/fsr3upscaler/ffx_fsr3upscaler_luma_instability_pass.glsl" diff --git a/servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_luma_pyramid_pass.glsl b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_luma_pyramid_pass.glsl new file mode 100644 index 000000000000..44c5479209a8 --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_luma_pyramid_pass.glsl @@ -0,0 +1,7 @@ +#[compute] + +#version 450 + +#VERSION_DEFINES + +#include "thirdparty/amd-ffx/shaders/fsr3upscaler/ffx_fsr3upscaler_luma_pyramid_pass.glsl" diff --git a/servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_prepare_inputs_pass.glsl b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_prepare_inputs_pass.glsl new file mode 100644 index 000000000000..66260e393fc5 --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_prepare_inputs_pass.glsl @@ -0,0 +1,8 @@ +#[compute] + +#version 450 + +#VERSION_DEFINES + +#include "../../motion_vector_inc.glsl" +#include "thirdparty/amd-ffx/shaders/fsr3upscaler/ffx_fsr3upscaler_prepare_inputs_pass.glsl" diff --git a/servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_prepare_reactivity_pass.glsl b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_prepare_reactivity_pass.glsl new file mode 100644 index 000000000000..04279dfcb62e --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_prepare_reactivity_pass.glsl @@ -0,0 +1,8 @@ +#[compute] + +#version 450 + +#VERSION_DEFINES + +#include "../../motion_vector_inc.glsl" +#include "thirdparty/amd-ffx/shaders/fsr3upscaler/ffx_fsr3upscaler_prepare_reactivity_pass.glsl" diff --git a/servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_rcas_pass.glsl b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_rcas_pass.glsl new file mode 100644 index 000000000000..10778593af5d --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_rcas_pass.glsl @@ -0,0 +1,7 @@ +#[compute] + +#version 450 + +#VERSION_DEFINES + +#include "thirdparty/amd-ffx/shaders/fsr3upscaler/ffx_fsr3upscaler_rcas_pass.glsl" diff --git a/servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_shading_change_pass.glsl b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_shading_change_pass.glsl new file mode 100644 index 000000000000..9321a763340a --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_shading_change_pass.glsl @@ -0,0 +1,8 @@ +#[compute] + +#version 450 + +#VERSION_DEFINES + +#include "../../motion_vector_inc.glsl" +#include "thirdparty/amd-ffx/shaders/fsr3upscaler/ffx_fsr3upscaler_shading_change_pass.glsl" diff --git a/servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_shading_change_pyramid_pass.glsl b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_shading_change_pyramid_pass.glsl new file mode 100644 index 000000000000..0b4b31aab2fe --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/effects/ffx/fsr3upscaler/fsr3upscaler_shading_change_pyramid_pass.glsl @@ -0,0 +1,8 @@ +#[compute] + +#version 450 + +#VERSION_DEFINES + +#include "../../motion_vector_inc.glsl" +#include "thirdparty/amd-ffx/shaders/fsr3upscaler/ffx_fsr3upscaler_shading_change_pyramid_pass.glsl" diff --git a/servers/rendering/renderer_rd/shaders/effects/fsr2/SCsub b/servers/rendering/renderer_rd/shaders/effects/fsr2/SCsub deleted file mode 100644 index 53f3ee397752..000000000000 --- a/servers/rendering/renderer_rd/shaders/effects/fsr2/SCsub +++ /dev/null @@ -1,23 +0,0 @@ -#!/usr/bin/env python -from misc.utility.scons_hints import * - -Import("env") - -if "RD_GLSL" in env["BUILDERS"]: - # find all include files - gl_include_files = [str(f) for f in Glob("*_inc.glsl")] + [str(f) for f in Glob("../*_inc.glsl")] - - # Add all FSR2 shader and header files. - fsr2_dir = "#thirdparty/amd-fsr2/shaders" - gl_include_files += [str(f) for f in Glob(fsr2_dir + "/*.h")] - gl_include_files += [str(f) for f in Glob(fsr2_dir + "/*.glsl")] - - # find all shader code(all glsl files excluding our include files) - glsl_files = [str(f) for f in Glob("*.glsl") if str(f) not in gl_include_files] - - # make sure we recompile shaders if include files change - env.Depends([f + ".gen.h" for f in glsl_files], gl_include_files + ["#glsl_builders.py"]) - - # compile shaders - for glsl_file in glsl_files: - env.RD_GLSL(glsl_file) diff --git a/servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_accumulate_pass.glsl b/servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_accumulate_pass.glsl deleted file mode 100644 index 67fce9a34294..000000000000 --- a/servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_accumulate_pass.glsl +++ /dev/null @@ -1,8 +0,0 @@ -#[compute] - -#version 450 - -#VERSION_DEFINES - -#include "../motion_vector_inc.glsl" -#include "thirdparty/amd-fsr2/shaders/ffx_fsr2_accumulate_pass.glsl" diff --git a/servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_autogen_reactive_pass.glsl b/servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_autogen_reactive_pass.glsl deleted file mode 100644 index d362958aa693..000000000000 --- a/servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_autogen_reactive_pass.glsl +++ /dev/null @@ -1,8 +0,0 @@ -#[compute] - -#version 450 - -#VERSION_DEFINES - -#include "../motion_vector_inc.glsl" -#include "thirdparty/amd-fsr2/shaders/ffx_fsr2_autogen_reactive_pass.glsl" diff --git a/servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_compute_luminance_pyramid_pass.glsl b/servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_compute_luminance_pyramid_pass.glsl deleted file mode 100644 index 37504c2e530b..000000000000 --- a/servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_compute_luminance_pyramid_pass.glsl +++ /dev/null @@ -1,7 +0,0 @@ -#[compute] - -#version 450 - -#VERSION_DEFINES - -#include "thirdparty/amd-fsr2/shaders/ffx_fsr2_compute_luminance_pyramid_pass.glsl" diff --git a/servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_depth_clip_pass.glsl b/servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_depth_clip_pass.glsl deleted file mode 100644 index 0ee08e4c76f1..000000000000 --- a/servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_depth_clip_pass.glsl +++ /dev/null @@ -1,8 +0,0 @@ -#[compute] - -#version 450 - -#VERSION_DEFINES - -#include "../motion_vector_inc.glsl" -#include "thirdparty/amd-fsr2/shaders/ffx_fsr2_depth_clip_pass.glsl" diff --git a/servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_lock_pass.glsl b/servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_lock_pass.glsl deleted file mode 100644 index 8c8430d4b1c3..000000000000 --- a/servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_lock_pass.glsl +++ /dev/null @@ -1,7 +0,0 @@ -#[compute] - -#version 450 - -#VERSION_DEFINES - -#include "thirdparty/amd-fsr2/shaders/ffx_fsr2_lock_pass.glsl" diff --git a/servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_rcas_pass.glsl b/servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_rcas_pass.glsl deleted file mode 100644 index 4120cfe64495..000000000000 --- a/servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_rcas_pass.glsl +++ /dev/null @@ -1,7 +0,0 @@ -#[compute] - -#version 450 - -#VERSION_DEFINES - -#include "thirdparty/amd-fsr2/shaders/ffx_fsr2_rcas_pass.glsl" diff --git a/servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_reconstruct_previous_depth_pass.glsl b/servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_reconstruct_previous_depth_pass.glsl deleted file mode 100644 index f31abec215d5..000000000000 --- a/servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_reconstruct_previous_depth_pass.glsl +++ /dev/null @@ -1,8 +0,0 @@ -#[compute] - -#version 450 - -#VERSION_DEFINES - -#include "../motion_vector_inc.glsl" -#include "thirdparty/amd-fsr2/shaders/ffx_fsr2_reconstruct_previous_depth_pass.glsl" diff --git a/servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_tcr_autogen_pass.glsl b/servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_tcr_autogen_pass.glsl deleted file mode 100644 index 818374e43c9a..000000000000 --- a/servers/rendering/renderer_rd/shaders/effects/fsr2/fsr2_tcr_autogen_pass.glsl +++ /dev/null @@ -1,8 +0,0 @@ -#[compute] - -#version 450 - -#VERSION_DEFINES - -#include "../motion_vector_inc.glsl" -#include "thirdparty/amd-fsr2/shaders/ffx_fsr2_tcr_autogen_pass.glsl" diff --git a/servers/rendering/renderer_rd/shaders/effects/fsr_upscale.glsl b/servers/rendering/renderer_rd/shaders/effects/fsr_upscale.glsl deleted file mode 100644 index 221e97becea7..000000000000 --- a/servers/rendering/renderer_rd/shaders/effects/fsr_upscale.glsl +++ /dev/null @@ -1,173 +0,0 @@ -/**************************************************************************/ -/* fsr_upscale.glsl */ -/**************************************************************************/ -/* This file is part of: */ -/* GODOT ENGINE */ -/* https://godotengine.org */ -/**************************************************************************/ -/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ -/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ -/* */ -/* Permission is hereby granted, free of charge, to any person obtaining */ -/* a copy of this software and associated documentation files (the */ -/* "Software"), to deal in the Software without restriction, including */ -/* without limitation the rights to use, copy, modify, merge, publish, */ -/* distribute, sublicense, and/or sell copies of the Software, and to */ -/* permit persons to whom the Software is furnished to do so, subject to */ -/* the following conditions: */ -/* */ -/* The above copyright notice and this permission notice shall be */ -/* included in all copies or substantial portions of the Software. */ -/* */ -/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ -/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ -/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ -/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ -/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ -/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ -/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/**************************************************************************/ - -#[compute] - -#version 450 - -#VERSION_DEFINES - -#define A_GPU -#define A_GLSL - -#ifdef MODE_FSR_UPSCALE_NORMAL - -#define A_HALF - -#endif - -#include "thirdparty/amd-fsr/ffx_a.h" - -layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; - -layout(rgba16f, set = 1, binding = 0) uniform restrict writeonly image2D fsr_image; -layout(set = 0, binding = 0) uniform sampler2D source_image; - -#define FSR_UPSCALE_PASS_TYPE_EASU 0 -#define FSR_UPSCALE_PASS_TYPE_RCAS 1 - -layout(push_constant, std430) uniform Params { - float resolution_width; - float resolution_height; - float upscaled_width; - float upscaled_height; - float sharpness; - int pass; -} -params; - -AU4 Const0, Const1, Const2, Const3; - -#ifdef MODE_FSR_UPSCALE_FALLBACK - -#define FSR_EASU_F -AF4 FsrEasuRF(AF2 p) { - AF4 res = textureGather(source_image, p, 0); - return res; -} -AF4 FsrEasuGF(AF2 p) { - AF4 res = textureGather(source_image, p, 1); - return res; -} -AF4 FsrEasuBF(AF2 p) { - AF4 res = textureGather(source_image, p, 2); - return res; -} - -#define FSR_RCAS_F -AF4 FsrRcasLoadF(ASU2 p) { - return AF4(texelFetch(source_image, ASU2(p), 0)); -} -void FsrRcasInputF(inout AF1 r, inout AF1 g, inout AF1 b) {} - -#else - -#define FSR_EASU_H -AH4 FsrEasuRH(AF2 p) { - AH4 res = AH4(textureGather(source_image, p, 0)); - return res; -} -AH4 FsrEasuGH(AF2 p) { - AH4 res = AH4(textureGather(source_image, p, 1)); - return res; -} -AH4 FsrEasuBH(AF2 p) { - AH4 res = AH4(textureGather(source_image, p, 2)); - return res; -} - -#define FSR_RCAS_H -AH4 FsrRcasLoadH(ASW2 p) { - return AH4(texelFetch(source_image, ASU2(p), 0)); -} -void FsrRcasInputH(inout AH1 r, inout AH1 g, inout AH1 b) {} - -#endif - -#include "thirdparty/amd-fsr/ffx_fsr1.h" - -void fsr_easu_pass(AU2 pos) { -#ifdef MODE_FSR_UPSCALE_NORMAL - - AH3 Gamma2Color = AH3(0, 0, 0); - FsrEasuH(Gamma2Color, pos, Const0, Const1, Const2, Const3); - imageStore(fsr_image, ASU2(pos), AH4(Gamma2Color, 1)); - -#else - - AF3 Gamma2Color = AF3(0, 0, 0); - FsrEasuF(Gamma2Color, pos, Const0, Const1, Const2, Const3); - imageStore(fsr_image, ASU2(pos), AF4(Gamma2Color, 1)); - -#endif -} - -void fsr_rcas_pass(AU2 pos) { -#ifdef MODE_FSR_UPSCALE_NORMAL - - AH3 Gamma2Color = AH3(0, 0, 0); - FsrRcasH(Gamma2Color.r, Gamma2Color.g, Gamma2Color.b, pos, Const0); - imageStore(fsr_image, ASU2(pos), AH4(Gamma2Color, 1)); - -#else - - AF3 Gamma2Color = AF3(0, 0, 0); - FsrRcasF(Gamma2Color.r, Gamma2Color.g, Gamma2Color.b, pos, Const0); - imageStore(fsr_image, ASU2(pos), AF4(Gamma2Color, 1)); - -#endif -} - -void fsr_pass(AU2 pos) { - if (params.pass == FSR_UPSCALE_PASS_TYPE_EASU) { - fsr_easu_pass(pos); - } else if (params.pass == FSR_UPSCALE_PASS_TYPE_RCAS) { - fsr_rcas_pass(pos); - } -} - -void main() { - // Clang does not like unused functions. If ffx_a.h is included in the binary, clang will throw a fit and not compile so we must configure FSR in this shader - if (params.pass == FSR_UPSCALE_PASS_TYPE_EASU) { - FsrEasuCon(Const0, Const1, Const2, Const3, params.resolution_width, params.resolution_height, params.resolution_width, params.resolution_height, params.upscaled_width, params.upscaled_height); - } else if (params.pass == FSR_UPSCALE_PASS_TYPE_RCAS) { - FsrRcasCon(Const0, params.sharpness); - } - - AU2 gxy = ARmp8x8(gl_LocalInvocationID.x) + AU2(gl_WorkGroupID.x << 4u, gl_WorkGroupID.y << 4u); - - fsr_pass(gxy); - gxy.x += 8u; - fsr_pass(gxy); - gxy.y += 8u; - fsr_pass(gxy); - gxy.x -= 8u; - fsr_pass(gxy); -} diff --git a/servers/rendering/renderer_rd/shaders/effects/motion_vector_inc.glsl b/servers/rendering/renderer_rd/shaders/effects/motion_vector_inc.glsl index cbf202653e78..c3174fa27f4a 100644 --- a/servers/rendering/renderer_rd/shaders/effects/motion_vector_inc.glsl +++ b/servers/rendering/renderer_rd/shaders/effects/motion_vector_inc.glsl @@ -3,4 +3,4 @@ vec2 derive_motion_vector(vec2 uv, float depth, mat4 reprojection_matrix) { return 0.5f + (previous_pos_ndc.xy / previous_pos_ndc.w) * 0.5f - uv; } -#define FFX_FSR2_OPTION_GODOT_DERIVE_INVALID_MOTION_VECTORS_FUNCTION(i, j, k) derive_motion_vector(i, j, k) +#define FFX_FSR_OPTION_GODOT_DERIVE_INVALID_MOTION_VECTORS_FUNCTION(i, j, k) derive_motion_vector(i, j, k) diff --git a/servers/rendering/renderer_rd/storage_rd/render_scene_buffers_rd.cpp b/servers/rendering/renderer_rd/storage_rd/render_scene_buffers_rd.cpp index 9169111872c4..b6401769e25b 100644 --- a/servers/rendering/renderer_rd/storage_rd/render_scene_buffers_rd.cpp +++ b/servers/rendering/renderer_rd/storage_rd/render_scene_buffers_rd.cpp @@ -71,6 +71,7 @@ void RenderSceneBuffersRD::_bind_methods() { ClassDB::bind_method(D_METHOD("get_target_size"), &RenderSceneBuffersRD::get_target_size); ClassDB::bind_method(D_METHOD("get_scaling_3d_mode"), &RenderSceneBuffersRD::get_scaling_3d_mode); ClassDB::bind_method(D_METHOD("get_fsr_sharpness"), &RenderSceneBuffersRD::get_fsr_sharpness); + ClassDB::bind_method(D_METHOD("get_fsr_auto_generate_reactive"), &RenderSceneBuffersRD::get_fsr_auto_generate_reactive); ClassDB::bind_method(D_METHOD("get_msaa_3d"), &RenderSceneBuffersRD::get_msaa_3d); ClassDB::bind_method(D_METHOD("get_texture_samples"), &RenderSceneBuffersRD::get_texture_samples); ClassDB::bind_method(D_METHOD("get_screen_space_aa"), &RenderSceneBuffersRD::get_screen_space_aa); @@ -140,6 +141,11 @@ void RenderSceneBuffersRD::cleanup() { } } + if (fsr1_context) { + memdelete(fsr1_context); + fsr1_context = nullptr; + } + #ifdef METAL_ENABLED if (mfx_spatial_context) { memdelete(mfx_spatial_context); @@ -163,6 +169,7 @@ void RenderSceneBuffersRD::configure(const RenderSceneBuffersConfiguration *p_co screen_space_aa = p_config->get_screen_space_aa(); fsr_sharpness = p_config->get_fsr_sharpness(); + fsr_auto_generate_reactive = p_config->get_fsr_auto_generate_reactive(); texture_mipmap_bias = p_config->get_texture_mipmap_bias(); anisotropic_filtering_level = p_config->get_anisotropic_filtering_level(); use_taa = p_config->get_use_taa(); @@ -179,7 +186,12 @@ void RenderSceneBuffersRD::configure(const RenderSceneBuffersConfiguration *p_co // Create our color buffer. const bool resolve_target = msaa_3d != RS::VIEWPORT_MSAA_DISABLED; - create_texture(RB_SCOPE_BUFFERS, RB_TEX_COLOR, get_base_data_format(), get_color_usage_bits(resolve_target, false, can_be_storage)); + uint32_t color_texture_usage_bits = get_color_usage_bits(resolve_target, false, can_be_storage); + if (fsr_auto_generate_reactive) { + // We need to copy the color texture if we have to record opaque-only color + color_texture_usage_bits |= RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT; + } + create_texture(RB_SCOPE_BUFFERS, RB_TEX_COLOR, get_base_data_format(), color_texture_usage_bits); // TODO: Detect when it is safe to use RD::TEXTURE_USAGE_TRANSIENT_BIT for RB_TEX_DEPTH, RB_TEX_COLOR_MSAA and/or RB_TEX_DEPTH_MSAA. // (it means we cannot sample from it, we cannot copy from/to it) to save VRAM (and maybe performance too). @@ -217,6 +229,7 @@ void RenderSceneBuffersRD::configure_for_reflections(const Size2i p_reflection_s render_target = RID(); scaling_3d_mode = RS::VIEWPORT_SCALING_3D_MODE_OFF; fsr_sharpness = 0.0; + fsr_auto_generate_reactive = false; msaa_3d = RS::VIEWPORT_MSAA_DISABLED; screen_space_aa = RS::VIEWPORT_SCREEN_SPACE_AA_DISABLED; use_taa = false; @@ -236,6 +249,10 @@ void RenderSceneBuffersRD::set_fsr_sharpness(float p_fsr_sharpness) { fsr_sharpness = p_fsr_sharpness; } +void RenderSceneBuffersRD::set_fsr_auto_generate_reactive(bool p_fsr_auto_generate_reactive) { + fsr_auto_generate_reactive = p_fsr_auto_generate_reactive; +} + void RenderSceneBuffersRD::set_texture_mipmap_bias(float p_texture_mipmap_bias) { texture_mipmap_bias = p_texture_mipmap_bias; @@ -252,6 +269,22 @@ void RenderSceneBuffersRD::set_use_debanding(bool p_use_debanding) { use_debanding = p_use_debanding; } +void RenderSceneBuffersRD::ensure_fsr1(RendererRD::FSR1Effect *p_effect) { + if (fsr1_context) { + return; + } + + RendererRD::TextureStorage *texture_storage = RendererRD::TextureStorage::get_singleton(); + RenderingDevice *rd = RD::get_singleton(); + + // Determine the output format of the render target. + RID dest = texture_storage->render_target_get_rd_texture(render_target); + RD::TextureFormat tf = rd->texture_get_format(dest); + RD::DataFormat output_format = tf.format; + + fsr1_context = p_effect->create_context(internal_size, target_size, output_format); +} + #ifdef METAL_ENABLED void RenderSceneBuffersRD::ensure_mfx(RendererRD::MFXSpatialEffect *p_effect) { if (mfx_spatial_context) { @@ -714,6 +747,13 @@ RID RenderSceneBuffersRD::get_velocity_depth_buffer() { return velocity_depth; } +// Opaque-only color +void RenderSceneBuffersRD::ensure_opaque_only_color_texture() { + if (!has_texture(RB_SCOPE_BUFFERS, RB_TEX_COLOR_OPAQUE_ONLY)) { + create_texture(RB_SCOPE_BUFFERS, RB_TEX_COLOR_OPAQUE_ONLY, get_base_data_format(), get_color_usage_bits(true, false, can_be_storage)); + } +} + uint32_t RenderSceneBuffersRD::get_color_usage_bits(bool p_resolve, bool p_msaa, bool p_storage) { DEV_ASSERT((!p_resolve && !p_msaa) || (p_resolve != p_msaa)); diff --git a/servers/rendering/renderer_rd/storage_rd/render_scene_buffers_rd.h b/servers/rendering/renderer_rd/storage_rd/render_scene_buffers_rd.h index e2c2175e1cb5..7c5603215861 100644 --- a/servers/rendering/renderer_rd/storage_rd/render_scene_buffers_rd.h +++ b/servers/rendering/renderer_rd/storage_rd/render_scene_buffers_rd.h @@ -33,6 +33,7 @@ #ifdef METAL_ENABLED #include "../effects/metal_fx.h" #endif +#include "../effects/ffx/fsr1.h" #include "../effects/vrs.h" #include "core/templates/hash_map.h" #include "material_storage.h" @@ -52,6 +53,7 @@ #define RB_TEX_DEPTH_MSAA SNAME("depth_msaa") #define RB_TEX_VELOCITY SNAME("velocity") #define RB_TEX_VELOCITY_MSAA SNAME("velocity_msaa") +#define RB_TEX_COLOR_OPAQUE_ONLY SNAME("color_opaque_only") #define RB_TEX_BLUR_0 SNAME("blur_0") #define RB_TEX_BLUR_1 SNAME("blur_1") @@ -80,9 +82,11 @@ class RenderSceneBuffersRD : public RenderSceneBuffers { Size2i internal_size = Size2i(0, 0); RS::ViewportScaling3DMode scaling_3d_mode = RS::VIEWPORT_SCALING_3D_MODE_OFF; float fsr_sharpness = 0.2f; + bool fsr_auto_generate_reactive = false; float texture_mipmap_bias = 0.0f; RS::ViewportAnisotropicFiltering anisotropic_filtering_level = RS::VIEWPORT_ANISOTROPY_4X; + RendererRD::FSR1Context *fsr1_context = nullptr; #ifdef METAL_ENABLED RendererRD::MFXSpatialContext *mfx_spatial_context = nullptr; #endif @@ -197,10 +201,14 @@ class RenderSceneBuffersRD : public RenderSceneBuffers { virtual void configure(const RenderSceneBuffersConfiguration *p_config) override; void configure_for_reflections(const Size2i p_reflection_size); virtual void set_fsr_sharpness(float p_fsr_sharpness) override; + virtual void set_fsr_auto_generate_reactive(bool p_fsr_auto_generate_reactive) override; virtual void set_texture_mipmap_bias(float p_texture_mipmap_bias) override; virtual void set_anisotropic_filtering_level(RS::ViewportAnisotropicFiltering p_anisotropic_filtering_level) override; virtual void set_use_debanding(bool p_use_debanding) override; + void ensure_fsr1(RendererRD::FSR1Effect *p_effect); + _FORCE_INLINE_ RendererRD::FSR1Context *get_fsr1_context() const { return fsr1_context; } + #ifdef METAL_ENABLED void ensure_mfx(RendererRD::MFXSpatialEffect *p_effect); _FORCE_INLINE_ RendererRD::MFXSpatialContext *get_mfx_spatial_context() const { return mfx_spatial_context; } @@ -236,6 +244,7 @@ class RenderSceneBuffersRD : public RenderSceneBuffers { _FORCE_INLINE_ Size2i get_target_size() const { return target_size; } _FORCE_INLINE_ RS::ViewportScaling3DMode get_scaling_3d_mode() const { return scaling_3d_mode; } _FORCE_INLINE_ float get_fsr_sharpness() const { return fsr_sharpness; } + _FORCE_INLINE_ bool get_fsr_auto_generate_reactive() const { return fsr_auto_generate_reactive; } _FORCE_INLINE_ RS::ViewportMSAA get_msaa_3d() const { return msaa_3d; } _FORCE_INLINE_ RD::TextureSamples get_texture_samples() const { return texture_samples; } _FORCE_INLINE_ RS::ViewportScreenSpaceAA get_screen_space_aa() const { return screen_space_aa; } @@ -307,7 +316,7 @@ class RenderSceneBuffersRD : public RenderSceneBuffers { return get_texture_slice(RB_SCOPE_BUFFERS, RB_TEX_COLOR_UPSCALED, p_layer, 0); } - // Velocity, currently only used by TAA (Clustered) but we'll be using this in other places soon too. + // Velocity, used by TAA and FSR. void ensure_velocity(); bool has_velocity_buffer(bool p_has_msaa); @@ -316,6 +325,19 @@ class RenderSceneBuffersRD : public RenderSceneBuffers { RID get_velocity_depth_buffer(); + // Opaque-only color buffer, used for FSR2/3 + + void ensure_opaque_only_color_texture(); + _FORCE_INLINE_ bool has_opaque_only_color_texture() const { + return has_texture(RB_SCOPE_BUFFERS, RB_TEX_COLOR_OPAQUE_ONLY); + } + _FORCE_INLINE_ RID get_opaque_only_color_texture() const { + return get_texture(RB_SCOPE_BUFFERS, RB_TEX_COLOR_OPAQUE_ONLY); + } + _FORCE_INLINE_ RID get_opaque_only_color_texture(const uint32_t p_layer) { + return get_texture_slice(RB_SCOPE_BUFFERS, RB_TEX_COLOR_OPAQUE_ONLY, p_layer, 0); + } + // Samplers adjusted with the mipmap bias that is best fit for the configuration of these render buffers. _FORCE_INLINE_ RendererRD::MaterialStorage::Samplers get_samplers() const { diff --git a/servers/rendering/renderer_viewport.cpp b/servers/rendering/renderer_viewport.cpp index c622decaa657..65a4c89f4d2b 100644 --- a/servers/rendering/renderer_viewport.cpp +++ b/servers/rendering/renderer_viewport.cpp @@ -166,7 +166,7 @@ void RendererViewport::_configure_3d_render_buffers(Viewport *p_viewport) { } if (scaling_3d_mode == RS::VIEWPORT_SCALING_3D_MODE_METALFX_SPATIAL && !RD::get_singleton()->has_feature(RD::SUPPORTS_METALFX_SPATIAL)) { - scaling_3d_mode = RS::VIEWPORT_SCALING_3D_MODE_FSR; + scaling_3d_mode = RS::VIEWPORT_SCALING_3D_MODE_FSR1; WARN_PRINT_ONCE("MetalFX spatial upscaling is not supported by the current renderer or hardware. Falling back to FSR scaling."); } @@ -207,7 +207,7 @@ void RendererViewport::_configure_3d_render_buffers(Viewport *p_viewport) { if (use_taa && (scaling_type == RS::VIEWPORT_SCALING_3D_TYPE_TEMPORAL)) { // Temporal upscalers can't be used with TAA. // Turn it off and prefer using the temporal upscaler. - WARN_PRINT_ONCE("FSR 2 or MetalFX Temporal is not compatible with TAA. Disabling TAA internally."); + WARN_PRINT_ONCE("FSR 2/3 or MetalFX Temporal is not compatible with TAA. Disabling TAA internally."); use_taa = false; } @@ -227,8 +227,9 @@ void RendererViewport::_configure_3d_render_buffers(Viewport *p_viewport) { break; case RS::VIEWPORT_SCALING_3D_MODE_METALFX_SPATIAL: case RS::VIEWPORT_SCALING_3D_MODE_METALFX_TEMPORAL: - case RS::VIEWPORT_SCALING_3D_MODE_FSR: + case RS::VIEWPORT_SCALING_3D_MODE_FSR1: case RS::VIEWPORT_SCALING_3D_MODE_FSR2: + case RS::VIEWPORT_SCALING_3D_MODE_FSR3: target_width = p_viewport->size.width; target_height = p_viewport->size.height; render_width = MAX(target_width * scaling_3d_scale, 1.0); // target_width / (target_width * scaling) @@ -278,6 +279,7 @@ void RendererViewport::_configure_3d_render_buffers(Viewport *p_viewport) { rb_config.set_msaa_3d(msaa_3d); rb_config.set_screen_space_aa(p_viewport->screen_space_aa); rb_config.set_fsr_sharpness(p_viewport->fsr_sharpness); + rb_config.set_fsr_auto_generate_reactive(p_viewport->fsr_auto_generate_reactive); rb_config.set_texture_mipmap_bias(texture_mipmap_bias); rb_config.set_anisotropic_filtering_level(p_viewport->anisotropic_filtering_level); rb_config.set_use_taa(use_taa); @@ -989,8 +991,9 @@ void RendererViewport::viewport_set_scaling_3d_mode(RID p_viewport, RS::Viewport ERR_FAIL_NULL(viewport); const String rendering_method = OS::get_singleton()->get_current_rendering_method(); if (rendering_method != "forward_plus") { - ERR_FAIL_COND_EDMSG(p_mode == RS::VIEWPORT_SCALING_3D_MODE_FSR, "FSR1 is only available when using the Forward+ renderer."); + ERR_FAIL_COND_EDMSG(p_mode == RS::VIEWPORT_SCALING_3D_MODE_FSR1, "FSR1 is only available when using the Forward+ renderer."); ERR_FAIL_COND_EDMSG(p_mode == RS::VIEWPORT_SCALING_3D_MODE_FSR2, "FSR2 is only available when using the Forward+ renderer."); + ERR_FAIL_COND_EDMSG(p_mode == RS::VIEWPORT_SCALING_3D_MODE_FSR3, "FSR3 is only available when using the Forward+ renderer."); ERR_FAIL_COND_EDMSG(p_mode == RS::VIEWPORT_SCALING_3D_MODE_METALFX_TEMPORAL, "MetalFX Temporal is only available when using the Forward+ renderer."); } if (rendering_method == "gl_compatibility") { @@ -1020,6 +1023,14 @@ void RendererViewport::viewport_set_fsr_sharpness(RID p_viewport, float p_sharpn _configure_3d_render_buffers(viewport); } +void RendererViewport::viewport_set_fsr_auto_generate_reactive(RID p_viewport, bool p_fsr_auto_generate_reactive) { + Viewport *viewport = viewport_owner.get_or_null(p_viewport); + ERR_FAIL_NULL(viewport); + + viewport->fsr_auto_generate_reactive = p_fsr_auto_generate_reactive; + _configure_3d_render_buffers(viewport); +} + void RendererViewport::viewport_set_texture_mipmap_bias(RID p_viewport, float p_mipmap_bias) { Viewport *viewport = viewport_owner.get_or_null(p_viewport); ERR_FAIL_NULL(viewport); diff --git a/servers/rendering/renderer_viewport.h b/servers/rendering/renderer_viewport.h index fa506be98cd8..087691844f3b 100644 --- a/servers/rendering/renderer_viewport.h +++ b/servers/rendering/renderer_viewport.h @@ -57,6 +57,7 @@ class RendererViewport { RS::ViewportScaling3DMode scaling_3d_mode = RenderingServer::VIEWPORT_SCALING_3D_MODE_BILINEAR; float scaling_3d_scale = 1.0; float fsr_sharpness = 0.2f; + bool fsr_auto_generate_reactive = false; float texture_mipmap_bias = 0.0f; RS::ViewportAnisotropicFiltering anisotropic_filtering_level = RenderingServer::VIEWPORT_ANISOTROPY_4X; bool fsr_enabled = false; @@ -231,6 +232,7 @@ class RendererViewport { void viewport_set_scaling_3d_mode(RID p_viewport, RS::ViewportScaling3DMode p_mode); void viewport_set_scaling_3d_scale(RID p_viewport, float p_scaling_3d_scale); void viewport_set_fsr_sharpness(RID p_viewport, float p_sharpness); + void viewport_set_fsr_auto_generate_reactive(RID p_viewport, bool p_auto_generate_reactive); void viewport_set_texture_mipmap_bias(RID p_viewport, float p_mipmap_bias); void viewport_set_anisotropic_filtering_level(RID p_viewport, RS::ViewportAnisotropicFiltering p_anisotropic_filtering_level); diff --git a/servers/rendering/rendering_device.cpp b/servers/rendering/rendering_device.cpp index 0ed6c0ff6280..7f55ceeb5146 100644 --- a/servers/rendering/rendering_device.cpp +++ b/servers/rendering/rendering_device.cpp @@ -40,6 +40,7 @@ #include "core/profiling/profiling.h" #include "core/templates/fixed_vector.h" #include "modules/modules_enabled.gen.h" +#include "renderer_rd/effects/ffx/ffx_common.h" #include "servers/rendering/rendering_shader_container.h" #ifdef MODULE_GLSLANG_ENABLED diff --git a/servers/rendering/rendering_server.cpp b/servers/rendering/rendering_server.cpp index 62f13bfc975c..aa49b19de8e3 100644 --- a/servers/rendering/rendering_server.cpp +++ b/servers/rendering/rendering_server.cpp @@ -2894,6 +2894,7 @@ void RenderingServer::_bind_methods() { ClassDB::bind_method(D_METHOD("viewport_set_scaling_3d_mode", "viewport", "scaling_3d_mode"), &RenderingServer::viewport_set_scaling_3d_mode); ClassDB::bind_method(D_METHOD("viewport_set_scaling_3d_scale", "viewport", "scale"), &RenderingServer::viewport_set_scaling_3d_scale); ClassDB::bind_method(D_METHOD("viewport_set_fsr_sharpness", "viewport", "sharpness"), &RenderingServer::viewport_set_fsr_sharpness); + ClassDB::bind_method(D_METHOD("viewport_set_fsr_auto_generate_reactive", "viewport", "fsr_auto_generate_reactive"), &RenderingServer::viewport_set_fsr_auto_generate_reactive); ClassDB::bind_method(D_METHOD("viewport_set_texture_mipmap_bias", "viewport", "mipmap_bias"), &RenderingServer::viewport_set_texture_mipmap_bias); ClassDB::bind_method(D_METHOD("viewport_set_anisotropic_filtering_level", "viewport", "anisotropic_filtering_level"), &RenderingServer::viewport_set_anisotropic_filtering_level); ClassDB::bind_method(D_METHOD("viewport_set_update_mode", "viewport", "update_mode"), &RenderingServer::viewport_set_update_mode); @@ -2947,8 +2948,9 @@ void RenderingServer::_bind_methods() { ClassDB::bind_method(D_METHOD("viewport_set_vrs_texture", "viewport", "texture"), &RenderingServer::viewport_set_vrs_texture); BIND_ENUM_CONSTANT(VIEWPORT_SCALING_3D_MODE_BILINEAR); - BIND_ENUM_CONSTANT(VIEWPORT_SCALING_3D_MODE_FSR); + BIND_ENUM_CONSTANT(VIEWPORT_SCALING_3D_MODE_FSR1); BIND_ENUM_CONSTANT(VIEWPORT_SCALING_3D_MODE_FSR2); + BIND_ENUM_CONSTANT(VIEWPORT_SCALING_3D_MODE_FSR3); BIND_ENUM_CONSTANT(VIEWPORT_SCALING_3D_MODE_METALFX_SPATIAL); BIND_ENUM_CONSTANT(VIEWPORT_SCALING_3D_MODE_METALFX_TEMPORAL); BIND_ENUM_CONSTANT(VIEWPORT_SCALING_3D_MODE_MAX); @@ -3784,7 +3786,7 @@ void RenderingServer::init() { String mode_hints; String mode_hints_metal; { - Vector mode_hints_arr = { "Bilinear (Fastest)", "FSR 1.0 (Fast)", "FSR 2.2 (Slow)" }; + Vector mode_hints_arr = { "Bilinear (Fastest)", "FSR 1.2 (Fast)", "FSR 2.3 (Slow)", "FSR 3.1 (Slow)" }; mode_hints = String(",").join(mode_hints_arr); mode_hints_arr.push_back("MetalFX (Spatial)"); @@ -3798,6 +3800,7 @@ void RenderingServer::init() { } GLOBAL_DEF(PropertyInfo(Variant::FLOAT, "rendering/scaling_3d/scale", PROPERTY_HINT_RANGE, "0.25,2.0,0.01"), 1.0); GLOBAL_DEF(PropertyInfo(Variant::FLOAT, "rendering/scaling_3d/fsr_sharpness", PROPERTY_HINT_RANGE, "0,2,0.1"), 0.2f); + GLOBAL_DEF(PropertyInfo(Variant::BOOL, "rendering/scaling_3d/fsr_auto_generate_reactive"), false); GLOBAL_DEF(PropertyInfo(Variant::FLOAT, "rendering/textures/default_filters/texture_mipmap_bias", PROPERTY_HINT_RANGE, "-2,2,0.001"), 0.0f); GLOBAL_DEF(PropertyInfo(Variant::INT, "rendering/textures/decals/filter", PROPERTY_HINT_ENUM, "Nearest (Fast),Linear (Fast),Nearest Mipmap (Fast),Linear Mipmap (Fast),Nearest Mipmap Anisotropic (Average),Linear Mipmap Anisotropic (Average)"), DECAL_FILTER_LINEAR_MIPMAPS); diff --git a/servers/rendering/rendering_server.h b/servers/rendering/rendering_server.h index 23f4b9d0cef6..4949478954a2 100644 --- a/servers/rendering/rendering_server.h +++ b/servers/rendering/rendering_server.h @@ -948,8 +948,9 @@ class RenderingServer : public Object { enum ViewportScaling3DMode { VIEWPORT_SCALING_3D_MODE_BILINEAR, - VIEWPORT_SCALING_3D_MODE_FSR, + VIEWPORT_SCALING_3D_MODE_FSR1, VIEWPORT_SCALING_3D_MODE_FSR2, + VIEWPORT_SCALING_3D_MODE_FSR3, VIEWPORT_SCALING_3D_MODE_METALFX_SPATIAL, VIEWPORT_SCALING_3D_MODE_METALFX_TEMPORAL, VIEWPORT_SCALING_3D_MODE_MAX, @@ -973,9 +974,9 @@ class RenderingServer : public Object { }; _ALWAYS_INLINE_ static ViewportScaling3DType scaling_3d_mode_type(ViewportScaling3DMode p_mode) { - if (p_mode == VIEWPORT_SCALING_3D_MODE_BILINEAR || p_mode == VIEWPORT_SCALING_3D_MODE_FSR || p_mode == VIEWPORT_SCALING_3D_MODE_METALFX_SPATIAL) { + if (p_mode == VIEWPORT_SCALING_3D_MODE_BILINEAR || p_mode == VIEWPORT_SCALING_3D_MODE_FSR1 || p_mode == VIEWPORT_SCALING_3D_MODE_METALFX_SPATIAL) { return VIEWPORT_SCALING_3D_TYPE_SPATIAL; - } else if (p_mode == VIEWPORT_SCALING_3D_MODE_FSR2 || p_mode == VIEWPORT_SCALING_3D_MODE_METALFX_TEMPORAL) { + } else if (p_mode == VIEWPORT_SCALING_3D_MODE_FSR2 || p_mode == VIEWPORT_SCALING_3D_MODE_FSR3 || p_mode == VIEWPORT_SCALING_3D_MODE_METALFX_TEMPORAL) { return VIEWPORT_SCALING_3D_TYPE_TEMPORAL; } return VIEWPORT_SCALING_3D_TYPE_NONE; @@ -996,6 +997,7 @@ class RenderingServer : public Object { virtual void viewport_set_scaling_3d_mode(RID p_viewport, ViewportScaling3DMode p_scaling_3d_mode) = 0; virtual void viewport_set_scaling_3d_scale(RID p_viewport, float p_scaling_3d_scale) = 0; virtual void viewport_set_fsr_sharpness(RID p_viewport, float p_fsr_sharpness) = 0; + virtual void viewport_set_fsr_auto_generate_reactive(RID p_viewport, bool p_fsr_auto_generate_reactive) = 0; virtual void viewport_set_texture_mipmap_bias(RID p_viewport, float p_texture_mipmap_bias) = 0; virtual void viewport_set_anisotropic_filtering_level(RID p_viewport, ViewportAnisotropicFiltering p_anisotropic_filtering_level) = 0; diff --git a/servers/rendering/rendering_server_default.h b/servers/rendering/rendering_server_default.h index 34faef662c41..9c6d562b5486 100644 --- a/servers/rendering/rendering_server_default.h +++ b/servers/rendering/rendering_server_default.h @@ -711,6 +711,7 @@ class RenderingServerDefault : public RenderingServer { FUNC2(viewport_set_scaling_3d_mode, RID, ViewportScaling3DMode) FUNC2(viewport_set_scaling_3d_scale, RID, float) FUNC2(viewport_set_fsr_sharpness, RID, float) + FUNC2(viewport_set_fsr_auto_generate_reactive, RID, bool) FUNC2(viewport_set_texture_mipmap_bias, RID, float) FUNC2(viewport_set_anisotropic_filtering_level, RID, ViewportAnisotropicFiltering) diff --git a/servers/rendering/storage/render_scene_buffers.cpp b/servers/rendering/storage/render_scene_buffers.cpp index 67135a4c4b5f..ccbf665bae14 100644 --- a/servers/rendering/storage/render_scene_buffers.cpp +++ b/servers/rendering/storage/render_scene_buffers.cpp @@ -49,7 +49,7 @@ void RenderSceneBuffersConfiguration::_bind_methods() { ClassDB::bind_method(D_METHOD("get_scaling_3d_mode"), &RenderSceneBuffersConfiguration::get_scaling_3d_mode); ClassDB::bind_method(D_METHOD("set_scaling_3d_mode", "scaling_3d_mode"), &RenderSceneBuffersConfiguration::set_scaling_3d_mode); - ADD_PROPERTY(PropertyInfo(Variant::INT, "scaling_3d_mode", PROPERTY_HINT_ENUM, "Bilinear (Fastest),FSR 1.0 (Fast),FSR 2.2 (Slow),MetalFX (Spatial),MetalFX (Temporal)"), "set_scaling_3d_mode", "get_scaling_3d_mode"); // TODO VIEWPORT_SCALING_3D_MODE_OFF is possible here too, but we can't specify an enum string for it. + ADD_PROPERTY(PropertyInfo(Variant::INT, "scaling_3d_mode", PROPERTY_HINT_ENUM, "Bilinear (Fastest),FSR 1.2 (Fast),FSR 2.3 (Slow),FSR 3.1 (Slow),MetalFX (Spatial),MetalFX (Temporal)"), "set_scaling_3d_mode", "get_scaling_3d_mode"); // TODO VIEWPORT_SCALING_3D_MODE_OFF is possible here too, but we can't specify an enum string for it. ClassDB::bind_method(D_METHOD("get_msaa_3d"), &RenderSceneBuffersConfiguration::get_msaa_3d); ClassDB::bind_method(D_METHOD("set_msaa_3d", "msaa_3d"), &RenderSceneBuffersConfiguration::set_msaa_3d); @@ -63,6 +63,10 @@ void RenderSceneBuffersConfiguration::_bind_methods() { ClassDB::bind_method(D_METHOD("set_fsr_sharpness", "fsr_sharpness"), &RenderSceneBuffersConfiguration::set_fsr_sharpness); ADD_PROPERTY(PropertyInfo(Variant::BOOL, "fsr_sharpness"), "set_fsr_sharpness", "get_fsr_sharpness"); + ClassDB::bind_method(D_METHOD("get_fsr_auto_generate_reactive"), &RenderSceneBuffersConfiguration::get_fsr_auto_generate_reactive); + ClassDB::bind_method(D_METHOD("set_fsr_auto_generate_reactive", "fsr_auto_generate_reactive"), &RenderSceneBuffersConfiguration::set_fsr_auto_generate_reactive); + ADD_PROPERTY(PropertyInfo(Variant::BOOL, "fsr_auto_generate_reactive"), "set_fsr_auto_generate_reactive", "get_fsr_auto_generate_reactive"); + ClassDB::bind_method(D_METHOD("get_texture_mipmap_bias"), &RenderSceneBuffersConfiguration::get_texture_mipmap_bias); ClassDB::bind_method(D_METHOD("set_texture_mipmap_bias", "texture_mipmap_bias"), &RenderSceneBuffersConfiguration::set_texture_mipmap_bias); ADD_PROPERTY(PropertyInfo(Variant::BOOL, "texture_mipmap_bias"), "set_texture_mipmap_bias", "get_texture_mipmap_bias"); @@ -79,6 +83,7 @@ void RenderSceneBuffers::_bind_methods() { void RenderSceneBuffersExtension::_bind_methods() { GDVIRTUAL_BIND(_configure, "config"); GDVIRTUAL_BIND(_set_fsr_sharpness, "fsr_sharpness"); + GDVIRTUAL_BIND(_set_fsr_auto_generate_reactive, "fsr_auto_generate_reactive"); GDVIRTUAL_BIND(_set_texture_mipmap_bias, "texture_mipmap_bias"); GDVIRTUAL_BIND(_set_anisotropic_filtering_level, "anisotropic_filtering_level"); GDVIRTUAL_BIND(_set_use_debanding, "use_debanding"); @@ -92,6 +97,10 @@ void RenderSceneBuffersExtension::set_fsr_sharpness(float p_fsr_sharpness) { GDVIRTUAL_CALL(_set_fsr_sharpness, p_fsr_sharpness); } +void RenderSceneBuffersExtension::set_fsr_auto_generate_reactive(bool p_fsr_auto_generate_reactive) { + GDVIRTUAL_CALL(_set_fsr_auto_generate_reactive, p_fsr_auto_generate_reactive); +} + void RenderSceneBuffersExtension::set_texture_mipmap_bias(float p_texture_mipmap_bias) { GDVIRTUAL_CALL(_set_texture_mipmap_bias, p_texture_mipmap_bias); } diff --git a/servers/rendering/storage/render_scene_buffers.h b/servers/rendering/storage/render_scene_buffers.h index fead81c6256f..fcc8b37690ba 100644 --- a/servers/rendering/storage/render_scene_buffers.h +++ b/servers/rendering/storage/render_scene_buffers.h @@ -49,6 +49,7 @@ class RenderSceneBuffersConfiguration : public RefCounted { RS::ViewportAnisotropicFiltering anisotropic_filtering_level = RS::VIEWPORT_ANISOTROPY_4X; float fsr_sharpness = 0.0; + bool fsr_auto_generate_reactive = false; float texture_mipmap_bias = 0.0; bool use_taa = false; bool use_debanding = false; @@ -81,6 +82,9 @@ class RenderSceneBuffersConfiguration : public RefCounted { float get_fsr_sharpness() const { return fsr_sharpness; } void set_fsr_sharpness(float p_fsr_sharpness) { fsr_sharpness = p_fsr_sharpness; } + bool get_fsr_auto_generate_reactive() const { return fsr_auto_generate_reactive; } + void set_fsr_auto_generate_reactive(bool p_fsr_auto_generate_reactive) { fsr_auto_generate_reactive = p_fsr_auto_generate_reactive; } + float get_texture_mipmap_bias() const { return texture_mipmap_bias; } void set_texture_mipmap_bias(float p_texture_mipmap_bias) { texture_mipmap_bias = p_texture_mipmap_bias; } @@ -111,6 +115,7 @@ class RenderSceneBuffers : public RefCounted { // for those settings that are unlikely to require buffers to be recreated, we'll add setters virtual void set_fsr_sharpness(float p_fsr_sharpness) = 0; + virtual void set_fsr_auto_generate_reactive(bool p_fsr_auto_generate_reactive) = 0; virtual void set_texture_mipmap_bias(float p_texture_mipmap_bias) = 0; virtual void set_anisotropic_filtering_level(RS::ViewportAnisotropicFiltering p_anisotropic_filtering_level) = 0; virtual void set_use_debanding(bool p_use_debanding) = 0; @@ -124,6 +129,7 @@ class RenderSceneBuffersExtension : public RenderSceneBuffers { GDVIRTUAL1(_configure, const RenderSceneBuffersConfiguration *) GDVIRTUAL1(_set_fsr_sharpness, float) + GDVIRTUAL1(_set_fsr_auto_generate_reactive, bool) GDVIRTUAL1(_set_texture_mipmap_bias, float) GDVIRTUAL1(_set_anisotropic_filtering_level, int) GDVIRTUAL1(_set_use_debanding, bool) @@ -134,6 +140,7 @@ class RenderSceneBuffersExtension : public RenderSceneBuffers { virtual void configure(const RenderSceneBuffersConfiguration *p_config) override; virtual void set_fsr_sharpness(float p_fsr_sharpness) override; + virtual void set_fsr_auto_generate_reactive(bool p_fsr_auto_generate_reactive) override; virtual void set_texture_mipmap_bias(float p_texture_mipmap_bias) override; virtual void set_anisotropic_filtering_level(RS::ViewportAnisotropicFiltering p_anisotropic_filtering_level) override; virtual void set_use_debanding(bool p_use_debanding) override; diff --git a/thirdparty/README.md b/thirdparty/README.md index dc7524be8d37..b1af40953328 100644 --- a/thirdparty/README.md +++ b/thirdparty/README.md @@ -17,34 +17,20 @@ Files extracted from upstream source: - `LICENSE-MIT` -## amd-fsr +## amd-ffx -- Upstream: https://github.com/GPUOpen-Effects/FidelityFX-FSR -- Version: 1.0.2 (a21ffb8f6c13233ba336352bdff293894c706575, 2021) +- Upstream: https://github.com/GPUOpen-LibrariesAndSDKs/FidelityFX-SDK +- Version: 1.1.4 (c6efa6bf7f2027b3ec94f28578bb5965eabb9e55, 2025) - License: MIT Files extracted from upstream source: -- `ffx_a.h` and `ffx_fsr1.h` from `ffx-fsr` -- `license.txt` - - -## amd-fsr2 - -- Upstream: https://github.com/GPUOpen-Effects/FidelityFX-FSR2 -- Version: 2.2.1 (1680d1edd5c034f88ebbbb793d8b88f8842cf804, 2023) -- License: MIT - -Files extracted from upstream source: - -- `ffx_*.cpp` and `ffx_*.h` from `src/ffx-fsr2-api` -- `shaders` folder from `src/ffx-fsr2-api` with `ffx_*.hlsl` files excluded -- `LICENSE.txt` - -Patches: - -- `0001-build-fixes.patch` (GH-81197) -- `0002-godot-fsr2-options.patch` (GH-81197) +- `ffx_*.h` from `sdk/include/FidelityFX/host` +- `ffx_message.cpp`, `ffx_assert.cpp`, `ffx_object_management.cpp` and `ffx_object_management.h` from `sdk/src/shared` +- `ffx_*.cpp` and `ffx_*_private.h` from `sdk/src/components/*` +- `gpu` folder from `sdk/include/FidelityFX/gpu` +- `shaders` folder from `sdk/src/backends/vk/shaders` +- `license.md` ## angle diff --git a/thirdparty/amd-fsr2/ffx_assert.cpp b/thirdparty/amd-ffx/ffx_assert.cpp similarity index 83% rename from thirdparty/amd-fsr2/ffx_assert.cpp rename to thirdparty/amd-ffx/ffx_assert.cpp index 8a70ad501a12..4ba09b26f343 100644 --- a/thirdparty/amd-fsr2/ffx_assert.cpp +++ b/thirdparty/amd-ffx/ffx_assert.cpp @@ -1,13 +1,14 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (C) 2024 Advanced Micro Devices, Inc. // // Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal +// of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell // copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: +// furnished to do so, subject to the following conditions : +// // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // @@ -49,8 +50,8 @@ bool ffxAssertReport(const char* file, int32_t line, const char* condition, cons #ifdef _WIN32 // form the final assertion string and output to the TTY. - const size_t bufferSize = static_cast(snprintf(nullptr, 0, "%s(%d): ASSERTION FAILED. %s\n", file, line, message ? message : condition)) + 1; - char* tempBuf = static_cast(malloc(bufferSize)); + const size_t bufferSize = snprintf(NULL, 0, "%s(%d): ASSERTION FAILED. %s\n", file, line, message ? message : condition) + 1; + char* tempBuf = (char*)malloc(bufferSize); if (!tempBuf) { return true; diff --git a/thirdparty/amd-fsr2/ffx_assert.h b/thirdparty/amd-ffx/ffx_assert.h similarity index 90% rename from thirdparty/amd-fsr2/ffx_assert.h rename to thirdparty/amd-ffx/ffx_assert.h index ae32d2a73345..b0df5af2813d 100644 --- a/thirdparty/amd-fsr2/ffx_assert.h +++ b/thirdparty/amd-ffx/ffx_assert.h @@ -1,13 +1,14 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (C) 2024 Advanced Micro Devices, Inc. // // Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal +// of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell // copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: +// furnished to do so, subject to the following conditions : +// // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // @@ -28,6 +29,11 @@ extern "C" { #endif // #ifdef __cplusplus +/// @defgroup Asserts Asserts +/// Asserts used by FidelityFX SDK functions +/// +/// @ingroup ffxHost + #ifdef _DEBUG #ifdef _WIN32 @@ -37,6 +43,8 @@ extern "C" { } #else /// Macro to force the debugger to break at this point in the code. +/// +/// @ingroup Asserts #define FFX_DEBUG_BREAK __debugbreak(); #endif #else @@ -58,6 +66,7 @@ extern "C" { /// /// @param [in] message The message generated by the assert. /// +/// @ingroup Asserts typedef void (*FfxAssertCallback)(const char* message); /// Function to report an assert. @@ -70,16 +79,20 @@ typedef void (*FfxAssertCallback)(const char* message); /// @returns /// Always returns true. /// +/// @ingroup Asserts FFX_API bool ffxAssertReport(const char* file, int32_t line, const char* condition, const char* msg); /// Provides the ability to set a callback for assert messages. /// /// @param [in] callback The callback function that will receive assert messages. /// +/// @ingroup Asserts FFX_API void ffxAssertSetPrintingCallback(FfxAssertCallback callback); #ifdef _DEBUG /// Standard assert macro. +/// +/// @ingroup Asserts #define FFX_ASSERT(condition) \ do \ { \ @@ -88,6 +101,8 @@ FFX_API void ffxAssertSetPrintingCallback(FfxAssertCallback callback); } while (0) /// Assert macro with message. +/// +/// @ingroup Asserts #define FFX_ASSERT_MESSAGE(condition, msg) \ do \ { \ @@ -96,6 +111,8 @@ FFX_API void ffxAssertSetPrintingCallback(FfxAssertCallback callback); } while (0) /// Assert macro that always fails. +/// +/// @ingroup Asserts #define FFX_ASSERT_FAIL(message) \ do \ { \ @@ -125,6 +142,8 @@ FFX_API void ffxAssertSetPrintingCallback(FfxAssertCallback callback); #endif // #if _DEBUG /// Simple static assert. +/// +/// @ingroup Asserts #define FFX_STATIC_ASSERT(condition) static_assert(condition, #condition) #ifdef __cplusplus diff --git a/thirdparty/amd-ffx/ffx_error.h b/thirdparty/amd-ffx/ffx_error.h new file mode 100644 index 000000000000..40599719b7a0 --- /dev/null +++ b/thirdparty/amd-ffx/ffx_error.h @@ -0,0 +1,79 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +#include "ffx_types.h" + +/// @defgroup Errors Error Codes +/// Error codes returned from FidelityFX SDK functions +/// +/// @ingroup ffxHost + +/// Typedef for error codes returned from functions in the FidelityFX SDK. +/// +/// @ingroup Errors +typedef int32_t FfxErrorCode; + +/// Error codes and their meaning +/// +/// @ingroup Errors +typedef enum FfxErrorCodes { + + FFX_OK = 0, ///< The operation completed successfully. + FFX_ERROR_INVALID_POINTER = 0x80000000, ///< The operation failed due to an invalid pointer. + FFX_ERROR_INVALID_ALIGNMENT = 0x80000001, ///< The operation failed due to an invalid alignment. + FFX_ERROR_INVALID_SIZE = 0x80000002, ///< The operation failed due to an invalid size. + FFX_EOF = 0x80000003, ///< The end of the file was encountered. + FFX_ERROR_INVALID_PATH = 0x80000004, ///< The operation failed because the specified path was invalid. + FFX_ERROR_EOF = 0x80000005, ///< The operation failed because end of file was reached. + FFX_ERROR_MALFORMED_DATA = 0x80000006, ///< The operation failed because of some malformed data. + FFX_ERROR_OUT_OF_MEMORY = 0x80000007, ///< The operation failed because it ran out memory. + FFX_ERROR_INCOMPLETE_INTERFACE = 0x80000008, ///< The operation failed because the interface was not fully configured. + FFX_ERROR_INVALID_ENUM = 0x80000009, ///< The operation failed because of an invalid enumeration value. + FFX_ERROR_INVALID_ARGUMENT = 0x8000000a, ///< The operation failed because an argument was invalid. + FFX_ERROR_OUT_OF_RANGE = 0x8000000b, ///< The operation failed because a value was out of range. + FFX_ERROR_NULL_DEVICE = 0x8000000c, ///< The operation failed because a device was null. + FFX_ERROR_BACKEND_API_ERROR = 0x8000000d, ///< The operation failed because the backend API returned an error code. + FFX_ERROR_INSUFFICIENT_MEMORY = 0x8000000e, ///< The operation failed because there was not enough memory. + FFX_ERROR_INVALID_VERSION = 0x8000000f, ///< The operation failed because the wrong backend was linked. + FFX_ERROR_ACCESS_DENIED = 0x80000010, ///< The operation failed because access to the resource was denied. + +}FfxErrorCodes; + +/// Helper macro to return error code y from a function when a specific condition, x, is not met. +/// +/// @ingroup Errors +#define FFX_RETURN_ON_ERROR(x, y) \ + if (!(x)) \ + { \ + return (y); \ + } + +/// Helper macro to return error code x from a function when it is not FFX_OK. +/// +/// @ingroup Errors +#define FFX_VALIDATE(x) \ + { \ + FfxErrorCode ret = x; \ + FFX_RETURN_ON_ERROR(ret == FFX_OK, ret); \ + } diff --git a/thirdparty/amd-ffx/ffx_frameinterpolation.cpp b/thirdparty/amd-ffx/ffx_frameinterpolation.cpp new file mode 100644 index 000000000000..ec13f7aed7d2 --- /dev/null +++ b/thirdparty/amd-ffx/ffx_frameinterpolation.cpp @@ -0,0 +1,1265 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include // for max used inside SPD CPU code. +#include // for fabs, abs, sinf, sqrt, etc. +#include // for memset +#include // for FLT_EPSILON +#include "ffx_frameinterpolation.h" + +#define FFX_CPU + +#ifdef __clang__ +#pragma clang diagnostic ignored "-Wunused-function" +#pragma clang diagnostic ignored "-Wsign-compare" +#endif + +#include "gpu/ffx_core.h" +#include "gpu/spd/ffx_spd.h" +#include "ffx_object_management.h" + +#include "ffx_frameinterpolation_private.h" + +// lists to map shader resource bindpoint name to resource identifier +typedef struct ResourceBinding +{ + uint32_t index; + wchar_t name[64]; +}ResourceBinding; + +static const ResourceBinding srvResourceBindingTable[] = +{ + // Frame Interpolation textures + {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DEPTH, L"r_input_depth"}, + {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_MOTION_VECTORS, L"r_input_motion_vectors"}, + {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DISTORTION_FIELD, L"r_input_distortion_field"}, + + {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DILATED_DEPTH, L"r_dilated_depth"}, + {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS, L"r_dilated_motion_vectors"}, + {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_RECONSTRUCTED_DEPTH_PREVIOUS_FRAME, L"r_reconstructed_depth_previous_frame"}, + {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_RECONSTRUCTED_DEPTH_INTERPOLATED_FRAME, L"r_reconstructed_depth_interpolated_frame"}, + {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_PREVIOUS_INTERPOLATION_SOURCE, L"r_previous_interpolation_source"}, + {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_CURRENT_INTERPOLATION_SOURCE, L"r_current_interpolation_source"}, + {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DISOCCLUSION_MASK, L"r_disocclusion_mask"}, + {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_GAME_MOTION_VECTOR_FIELD_X, L"r_game_motion_vector_field_x"}, + {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_GAME_MOTION_VECTOR_FIELD_Y, L"r_game_motion_vector_field_y"}, + {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OPTICAL_FLOW_MOTION_VECTOR_FIELD_X, L"r_optical_flow_motion_vector_field_x"}, + {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OPTICAL_FLOW_MOTION_VECTOR_FIELD_Y, L"r_optical_flow_motion_vector_field_y"}, + + {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OPTICAL_FLOW_VECTOR, L"r_optical_flow"}, + {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OPTICAL_FLOW_CONFIDENCE, L"r_optical_flow_confidence"}, + {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OPTICAL_FLOW_GLOBAL_MOTION, L"r_optical_flow_global_motion"}, + {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OPTICAL_FLOW_SCENE_CHANGE_DETECTION, L"r_optical_flow_scd"}, + {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OUTPUT, L"r_output"}, + {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_MASK, L"r_inpainting_mask"}, + {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID, L"r_inpainting_pyramid"}, + {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_PRESENT_BACKBUFFER, L"r_present_backbuffer"}, + {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_COUNTERS, L"r_counters"}, +}; + +static const ResourceBinding uavResourceBindingTable[] = +{ + // Frame Interpolation textures + {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DILATED_DEPTH, L"rw_dilated_depth"}, + {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS, L"rw_dilated_motion_vectors"}, + {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_RECONSTRUCTED_DEPTH_PREVIOUS_FRAME, L"rw_reconstructed_depth_previous_frame"}, + {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_RECONSTRUCTED_DEPTH_INTERPOLATED_FRAME, L"rw_reconstructed_depth_interpolated_frame"}, + {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OUTPUT, L"rw_output"}, + {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DISOCCLUSION_MASK, L"rw_disocclusion_mask"}, + {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_GAME_MOTION_VECTOR_FIELD_X, L"rw_game_motion_vector_field_x"}, + {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_GAME_MOTION_VECTOR_FIELD_Y, L"rw_game_motion_vector_field_y"}, + {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OPTICAL_FLOW_MOTION_VECTOR_FIELD_X, L"rw_optical_flow_motion_vector_field_x"}, + {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OPTICAL_FLOW_MOTION_VECTOR_FIELD_Y, L"rw_optical_flow_motion_vector_field_y"}, + {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_MASK, L"rw_inpainting_mask"}, + + {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_COUNTERS, L"rw_counters"}, + {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID_MIPMAP_0, L"rw_inpainting_pyramid0"}, + {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID_MIPMAP_1, L"rw_inpainting_pyramid1"}, + {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID_MIPMAP_2, L"rw_inpainting_pyramid2"}, + {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID_MIPMAP_3, L"rw_inpainting_pyramid3"}, + {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID_MIPMAP_4, L"rw_inpainting_pyramid4"}, + {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID_MIPMAP_5, L"rw_inpainting_pyramid5"}, // extra declaration, as this is globallycoherent + {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID_MIPMAP_6, L"rw_inpainting_pyramid6"}, + {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID_MIPMAP_7, L"rw_inpainting_pyramid7"}, + {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID_MIPMAP_8, L"rw_inpainting_pyramid8"}, + {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID_MIPMAP_9, L"rw_inpainting_pyramid9"}, + {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID_MIPMAP_10, L"rw_inpainting_pyramid10"}, + {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID_MIPMAP_11, L"rw_inpainting_pyramid11"}, + {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID_MIPMAP_12, L"rw_inpainting_pyramid12"}, +}; + +static const ResourceBinding cbResourceBindingTable[] = +{ + {FFX_FRAMEINTERPOLATION_CONSTANTBUFFER_IDENTIFIER, L"cbFI"}, + {FFX_FRAMEINTERPOLATION_INPAINTING_PYRAMID_CONSTANTBUFFER_IDENTIFIER, L"cbInpaintingPyramid"}, +}; + +// Broad structure of the root signature. +typedef enum FrameInterpolationRootSignatureLayout { + + FRAMEINTERPOLATION_ROOT_SIGNATURE_LAYOUT_UAVS, + FRAMEINTERPOLATION_ROOT_SIGNATURE_LAYOUT_SRVS, + FRAMEINTERPOLATION_ROOT_SIGNATURE_LAYOUT_CONSTANTS, + FRAMEINTERPOLATION_ROOT_SIGNATURE_LAYOUT_CONSTANTS_REGISTER_1, + FRAMEINTERPOLATION_ROOT_SIGNATURE_LAYOUT_PARAMETER_COUNT +} FrameInterpolationRootSignatureLayout; + +typedef union FrameInterpolationSecondaryUnion +{ + InpaintingPyramidConstants inpaintingPyramidConstants; +} FrameInterpolationSecondaryUnion; + +// Lanczos +static float lanczos2(float value) +{ + return abs(value) < FFX_EPSILON ? 1.f : (sinf(FFX_PI * value) / (FFX_PI * value)) * (sinf(0.5f * FFX_PI * value) / (0.5f * FFX_PI * value)); +} + +// Calculate halton number for index and base. +static float halton(int32_t index, int32_t base) +{ + float f = 1.0f, result = 0.0f; + + for (int32_t currentIndex = index; currentIndex > 0;) { + + f /= (float)base; + result = result + f * (float)(currentIndex % base); + currentIndex = (uint32_t)(floorf((float)(currentIndex) / (float)(base))); + } + + return result; +} + +static FfxErrorCode patchResourceBindings(FfxPipelineState* inoutPipeline) +{ + for (uint32_t srvIndex = 0; srvIndex < inoutPipeline->srvTextureCount; ++srvIndex) + { + int32_t mapIndex = 0; + for (mapIndex = 0; mapIndex < _countof(srvResourceBindingTable); ++mapIndex) + { + if (0 == wcscmp(srvResourceBindingTable[mapIndex].name, inoutPipeline->srvTextureBindings[srvIndex].name)) + break; + } + if (mapIndex == _countof(srvResourceBindingTable)) + return FFX_ERROR_INVALID_ARGUMENT; + + inoutPipeline->srvTextureBindings[srvIndex].resourceIdentifier = srvResourceBindingTable[mapIndex].index; + } + + // check for UAVs where mip chains are to be bound + for (uint32_t uavIndex = 0; uavIndex < inoutPipeline->uavTextureCount; ++uavIndex) + { + int32_t mapIndex = 0; + for (mapIndex = 0; mapIndex < _countof(uavResourceBindingTable); ++mapIndex) + { + if (0 == wcscmp(uavResourceBindingTable[mapIndex].name, inoutPipeline->uavTextureBindings[uavIndex].name)) + break; + } + if (mapIndex == _countof(uavResourceBindingTable)) + return FFX_ERROR_INVALID_ARGUMENT; + + inoutPipeline->uavTextureBindings[uavIndex].resourceIdentifier = uavResourceBindingTable[mapIndex].index; + } + + for (uint32_t cbIndex = 0; cbIndex < inoutPipeline->constCount; ++cbIndex) + { + int32_t mapIndex = 0; + for (mapIndex = 0; mapIndex < _countof(cbResourceBindingTable); ++mapIndex) + { + if (0 == wcscmp(cbResourceBindingTable[mapIndex].name, inoutPipeline->constantBufferBindings[cbIndex].name)) + break; + } + if (mapIndex == _countof(cbResourceBindingTable)) + return FFX_ERROR_INVALID_ARGUMENT; + + inoutPipeline->constantBufferBindings[cbIndex].resourceIdentifier = cbResourceBindingTable[mapIndex].index; + } + + for (uint32_t uavBufferIndex = 0; uavBufferIndex < inoutPipeline->uavBufferCount; ++uavBufferIndex) + { + int32_t mapIndex = 0; + for (mapIndex = 0; mapIndex < _countof(uavResourceBindingTable); ++mapIndex) + { + if (0 == wcscmp(uavResourceBindingTable[mapIndex].name, inoutPipeline->uavBufferBindings[uavBufferIndex].name)) + break; + } + if (mapIndex == _countof(uavResourceBindingTable)) + return FFX_ERROR_INVALID_ARGUMENT; + + inoutPipeline->uavBufferBindings[uavBufferIndex].resourceIdentifier = uavResourceBindingTable[mapIndex].index; + } + + for (uint32_t srvBufferIndex = 0; srvBufferIndex < inoutPipeline->srvBufferCount; ++srvBufferIndex) + { + int32_t mapIndex = 0; + for (mapIndex = 0; mapIndex < _countof(srvResourceBindingTable); ++mapIndex) + { + if (0 == wcscmp(srvResourceBindingTable[mapIndex].name, inoutPipeline->srvBufferBindings[srvBufferIndex].name)) + break; + } + if (mapIndex == _countof(srvResourceBindingTable)) + return FFX_ERROR_INVALID_ARGUMENT; + + inoutPipeline->srvBufferBindings[srvBufferIndex].resourceIdentifier = srvResourceBindingTable[mapIndex].index; + } + + + return FFX_OK; +} + +static uint32_t getPipelinePermutationFlags(uint32_t contextFlags, FfxPass, bool fp16, bool force64, bool) +{ + // work out what permutation to load. + uint32_t flags = 0; + flags |= (contextFlags & FFX_FRAMEINTERPOLATION_ENABLE_DISPLAY_RESOLUTION_MOTION_VECTORS) ? 0 : FRAMEINTERPOLATION_SHADER_PERMUTATION_LOW_RES_MOTION_VECTORS; + flags |= (contextFlags & FFX_FRAMEINTERPOLATION_ENABLE_JITTER_MOTION_VECTORS) ? FRAMEINTERPOLATION_SHADER_PERMUTATION_JITTER_MOTION_VECTORS : 0; + flags |= (contextFlags & FFX_FRAMEINTERPOLATION_ENABLE_DEPTH_INVERTED) ? FRAMEINTERPOLATION_SHADER_PERMUTATION_DEPTH_INVERTED : 0; + flags |= (force64) ? FRAMEINTERPOLATION_SHADER_PERMUTATION_FORCE_WAVE64 : 0; + flags |= (fp16) ? FRAMEINTERPOLATION_SHADER_PERMUTATION_ALLOW_FP16 : 0; + return flags; +} + +static FfxErrorCode createPipelineStates(FfxFrameInterpolationContext_Private* context) +{ + FFX_ASSERT(context); + + FfxPipelineDescription pipelineDescription = {}; + pipelineDescription.contextFlags = context->contextDescription.flags; + pipelineDescription.stage = FFX_BIND_COMPUTE_SHADER_STAGE; + + // Samplers + pipelineDescription.samplerCount = 2; + FfxSamplerDescription samplerDescs[2] = { + {FFX_FILTER_TYPE_MINMAGMIP_POINT, FFX_ADDRESS_MODE_CLAMP, FFX_ADDRESS_MODE_CLAMP, FFX_ADDRESS_MODE_CLAMP, FFX_BIND_COMPUTE_SHADER_STAGE}, + {FFX_FILTER_TYPE_MINMAGMIP_LINEAR, FFX_ADDRESS_MODE_CLAMP, FFX_ADDRESS_MODE_CLAMP, FFX_ADDRESS_MODE_CLAMP, FFX_BIND_COMPUTE_SHADER_STAGE}}; + pipelineDescription.samplers = samplerDescs; + + // Root constants + pipelineDescription.rootConstantBufferCount = 2; + FfxRootConstantDescription rootConstantDescs[2] = + { + {sizeof(FrameInterpolationConstants) / sizeof(uint32_t), FFX_BIND_COMPUTE_SHADER_STAGE}, + {sizeof(InpaintingPyramidConstants) / sizeof(uint32_t), FFX_BIND_COMPUTE_SHADER_STAGE} + }; + pipelineDescription.rootConstants = rootConstantDescs; + + // Query device capabilities + FfxDeviceCapabilities capabilities; + context->contextDescription.backendInterface.fpGetDeviceCapabilities(&context->contextDescription.backendInterface, &capabilities); + + // Setup a few options used to determine permutation flags + bool haveShaderModel66 = capabilities.maximumSupportedShaderModel >= FFX_SHADER_MODEL_6_6; + bool supportedFP16 = capabilities.fp16Supported; + bool canForceWave64 = false; + bool useLut = false; + + const uint32_t waveLaneCountMin = capabilities.waveLaneCountMin; + const uint32_t waveLaneCountMax = capabilities.waveLaneCountMax; + if (waveLaneCountMin == 32 && waveLaneCountMax == 64) + { + useLut = true; + canForceWave64 = haveShaderModel66; + } + else + canForceWave64 = false; + + // Work out what permutation to load. + uint32_t contextFlags = context->contextDescription.flags; + + // Set up pipeline descriptor (basically RootSignature and binding) + auto CreateComputePipeline = [&](FfxPass pass, const wchar_t* name, FfxPipelineState* pipeline) -> FfxErrorCode { + ffxSafeReleasePipeline(&context->contextDescription.backendInterface, pipeline, context->effectContextId); + wcscpy_s(pipelineDescription.name, name); + FFX_VALIDATE(context->contextDescription.backendInterface.fpCreatePipeline( + &context->contextDescription.backendInterface, + FFX_EFFECT_FRAMEINTERPOLATION, + pass, + getPipelinePermutationFlags(contextFlags, pass, supportedFP16, canForceWave64, useLut), + &pipelineDescription, + context->effectContextId, + pipeline)); + patchResourceBindings(pipeline); + + return FFX_OK; + }; + + auto CreateRasterPipeline = [&](FfxPass pass, const wchar_t* name, FfxPipelineState* pipeline) -> FfxErrorCode { + wcscpy_s(pipelineDescription.name, name); + pipelineDescription.stage = (FfxBindStage)(FFX_BIND_VERTEX_SHADER_STAGE | FFX_BIND_PIXEL_SHADER_STAGE); + pipelineDescription.backbufferFormat = context->contextDescription.backBufferFormat; + FFX_VALIDATE(context->contextDescription.backendInterface.fpCreatePipeline( + &context->contextDescription.backendInterface, + FFX_EFFECT_FRAMEINTERPOLATION, + pass, + getPipelinePermutationFlags(contextFlags, pass, supportedFP16, canForceWave64, useLut), + &pipelineDescription, + context->effectContextId, + pipeline)); + + return FFX_OK; + }; + + // Frame Interpolation Pipelines + CreateComputePipeline(FFX_FRAMEINTERPOLATION_PASS_RECONSTRUCT_AND_DILATE, L"RECONSTRUCT_AND_DILATE", &context->pipelineFiReconstructAndDilate); + CreateComputePipeline(FFX_FRAMEINTERPOLATION_PASS_SETUP, L"SETUP", &context->pipelineFiSetup); + CreateComputePipeline(FFX_FRAMEINTERPOLATION_PASS_RECONSTRUCT_PREV_DEPTH, L"RECONSTRUCT_PREV_DEPTH", &context->pipelineFiReconstructPreviousDepth); + CreateComputePipeline(FFX_FRAMEINTERPOLATION_PASS_GAME_MOTION_VECTOR_FIELD, L"GAME_MOTION_VECTOR_FIELD", &context->pipelineFiGameMotionVectorField); + CreateComputePipeline(FFX_FRAMEINTERPOLATION_PASS_OPTICAL_FLOW_VECTOR_FIELD, L"OPTICAL_FLOW_VECTOR_FIELD", &context->pipelineFiOpticalFlowVectorField); + CreateComputePipeline(FFX_FRAMEINTERPOLATION_PASS_DISOCCLUSION_MASK, L"DISOCCLUSION_MASK", &context->pipelineFiDisocclusionMask); + CreateComputePipeline(FFX_FRAMEINTERPOLATION_PASS_INTERPOLATION, L"INTERPOLATION", &context->pipelineFiScfi); + CreateComputePipeline(FFX_FRAMEINTERPOLATION_PASS_INPAINTING_PYRAMID, L"INPAINTING_PYRAMID", &context->pipelineInpaintingPyramid); + CreateComputePipeline(FFX_FRAMEINTERPOLATION_PASS_INPAINTING, L"INPAINTING", &context->pipelineInpainting); + CreateComputePipeline(FFX_FRAMEINTERPOLATION_PASS_GAME_VECTOR_FIELD_INPAINTING_PYRAMID, L"GAME_VECTOR_FIELD_INPAINTING_PYRAMID", & context->pipelineGameVectorFieldInpaintingPyramid); + CreateComputePipeline(FFX_FRAMEINTERPOLATION_PASS_DEBUG_VIEW, L"DEBUG_VIEW", &context->pipelineDebugView); + + return FFX_OK; +} + + +// Format precision group for HUDless. +// Also format needs at least the 3 RGB channels to be valid +// int formats aren't accepted. +int GetFormatPrecisionGroup(FfxSurfaceFormat format) +{ + switch (format) + { + case FFX_SURFACE_FORMAT_R32G32B32A32_TYPELESS: + case FFX_SURFACE_FORMAT_R32G32B32A32_FLOAT: + case FFX_SURFACE_FORMAT_R32G32B32_FLOAT: + return 0; + + case FFX_SURFACE_FORMAT_R16G16B16A16_TYPELESS: + case FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT: + return 1; + + case FFX_SURFACE_FORMAT_R8G8B8A8_TYPELESS: + case FFX_SURFACE_FORMAT_R8G8B8A8_UNORM: + case FFX_SURFACE_FORMAT_B8G8R8A8_TYPELESS: + case FFX_SURFACE_FORMAT_B8G8R8A8_UNORM: + return 2; + + case FFX_SURFACE_FORMAT_R8G8B8A8_SNORM: + return 3; + + case FFX_SURFACE_FORMAT_R8G8B8A8_SRGB: + case FFX_SURFACE_FORMAT_B8G8R8A8_SRGB: + return 4; + + case FFX_SURFACE_FORMAT_R11G11B10_FLOAT: + return 5; + + case FFX_SURFACE_FORMAT_R10G10B10A2_TYPELESS: + case FFX_SURFACE_FORMAT_R10G10B10A2_UNORM: + return 6; + + case FFX_SURFACE_FORMAT_R9G9B9E5_SHAREDEXP: + return 7; + + // we don't accept the following formats + case FFX_SURFACE_FORMAT_R32G32B32A32_UINT: + case FFX_SURFACE_FORMAT_R32G32_FLOAT: + case FFX_SURFACE_FORMAT_R8_UINT: + case FFX_SURFACE_FORMAT_R32_UINT: + case FFX_SURFACE_FORMAT_R16G16_UINT: + case FFX_SURFACE_FORMAT_R16G16_SINT: + case FFX_SURFACE_FORMAT_R16G16_FLOAT: + case FFX_SURFACE_FORMAT_R16_FLOAT: + case FFX_SURFACE_FORMAT_R16_UINT: + case FFX_SURFACE_FORMAT_R16_UNORM: + case FFX_SURFACE_FORMAT_R16_SNORM: + case FFX_SURFACE_FORMAT_R8_UNORM: + case FFX_SURFACE_FORMAT_R8G8_UNORM: + case FFX_SURFACE_FORMAT_R8G8_UINT: + case FFX_SURFACE_FORMAT_R32_FLOAT: + case FFX_SURFACE_FORMAT_UNKNOWN: + default: + return -1; + } +} + +static FfxErrorCode frameinterpolationCreate(FfxFrameInterpolationContext_Private* context, const FfxFrameInterpolationContextDescription* contextDescription) +{ + FFX_ASSERT(context); + FFX_ASSERT(contextDescription); + + // validate compatibility between backbuffer and hudless formats + int backBufferGroup = GetFormatPrecisionGroup(contextDescription->backBufferFormat); + int previousInterpolationSourceGroup = GetFormatPrecisionGroup(contextDescription->previousInterpolationSourceFormat); + FFX_RETURN_ON_ERROR(backBufferGroup >= 0 && previousInterpolationSourceGroup >= 0 && backBufferGroup == previousInterpolationSourceGroup, FFX_ERROR_INVALID_ARGUMENT); + + // Setup the data for implementation. + memset(context, 0, sizeof(FfxFrameInterpolationContext_Private)); + context->device = contextDescription->backendInterface.device; + + memcpy(&context->contextDescription, contextDescription, sizeof(FfxFrameInterpolationContextDescription)); + + // Check version info - make sure we are linked with the right backend version + FfxVersionNumber version = context->contextDescription.backendInterface.fpGetSDKVersion(&context->contextDescription.backendInterface); + FFX_RETURN_ON_ERROR(version == FFX_SDK_MAKE_VERSION(1, 1, 4), FFX_ERROR_INVALID_VERSION); + + // Create the context. + FfxErrorCode errorCode = context->contextDescription.backendInterface.fpCreateBackendContext(&context->contextDescription.backendInterface, FFX_EFFECT_FRAMEINTERPOLATION, nullptr, &context->effectContextId); + FFX_RETURN_ON_ERROR(errorCode == FFX_OK, errorCode); + + // call out for device caps. + errorCode = context->contextDescription.backendInterface.fpGetDeviceCapabilities(&context->contextDescription.backendInterface, &context->deviceCapabilities); + FFX_RETURN_ON_ERROR(errorCode == FFX_OK, errorCode); + + // set defaults + context->firstExecution = true; + + context->asyncSupported = (contextDescription->flags & FFX_FRAMEINTERPOLATION_ENABLE_ASYNC_SUPPORT) == FFX_FRAMEINTERPOLATION_ENABLE_ASYNC_SUPPORT; + context->constants.maxRenderSize[0] = contextDescription->maxRenderSize.width; + context->constants.maxRenderSize[1] = contextDescription->maxRenderSize.height; + context->constants.displaySize[0] = contextDescription->displaySize.width; + context->constants.displaySize[1] = contextDescription->displaySize.height; + context->constants.displaySizeRcp[0] = 1.0f / contextDescription->displaySize.width; + context->constants.displaySizeRcp[1] = 1.0f / contextDescription->displaySize.height; + context->constants.interpolationRectBase[0] = 0; + context->constants.interpolationRectBase[1] = 0; + context->constants.interpolationRectSize[0] = contextDescription->displaySize.width; + context->constants.interpolationRectSize[1] = contextDescription->displaySize.height; + + // generate the data for the LUT. + const uint32_t lanczos2LutWidth = 128; + int16_t lanczos2Weights[lanczos2LutWidth] = { }; + + for (uint32_t currentLanczosWidthIndex = 0; currentLanczosWidthIndex < lanczos2LutWidth; currentLanczosWidthIndex++) { + + const float x = 2.0f * currentLanczosWidthIndex / float(lanczos2LutWidth - 1); + const float y = lanczos2(x); + lanczos2Weights[currentLanczosWidthIndex] = int16_t(roundf(y * 32767.0f)); + } + + uint8_t defaultDistortionFieldData[2] = { 0, 0 }; + uint32_t atomicInitData[2] = { 0, 0 }; + float defaultExposure[] = { 0.0f, 0.0f }; + const FfxResourceType texture1dResourceType = (context->contextDescription.flags & FFX_FRAMEINTERPOLATION_ENABLE_TEXTURE1D_USAGE) ? FFX_RESOURCE_TYPE_TEXTURE1D : FFX_RESOURCE_TYPE_TEXTURE2D; + + // declare internal resources needed + const FfxInternalResourceDescription internalSurfaceDesc[] = { + + {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_RECONSTRUCTED_DEPTH_INTERPOLATED_FRAME, L"FI_ReconstructedDepthInterpolatedFrame", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV, + FFX_SURFACE_FORMAT_R32_UINT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED}}, + {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_GAME_MOTION_VECTOR_FIELD_X, L"FI_GameMotionVectorFieldX", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV, + FFX_SURFACE_FORMAT_R32_UINT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED}}, + {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_GAME_MOTION_VECTOR_FIELD_Y, L"FI_GameMotionVectorFieldY", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV, + FFX_SURFACE_FORMAT_R32_UINT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED}}, + {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID, L"FI_InpaintingPyramid", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV, + FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT, contextDescription->displaySize.width / 2, contextDescription->displaySize.height / 2, 0, FFX_RESOURCE_FLAGS_ALIASABLE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED}}, + {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_COUNTERS, L"FI_Counters", FFX_RESOURCE_TYPE_BUFFER, FFX_RESOURCE_USAGE_UAV, + FFX_SURFACE_FORMAT_UNKNOWN, 8, 4, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED}}, // structured buffer contraining 2 UINT values + {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OPTICAL_FLOW_MOTION_VECTOR_FIELD_X, L"FI_OpticalFlowMotionVectorFieldX", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV, + FFX_SURFACE_FORMAT_R32_UINT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED}}, + {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OPTICAL_FLOW_MOTION_VECTOR_FIELD_Y, L"FI_OpticalFlowMotionVectorFieldY", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV, + FFX_SURFACE_FORMAT_R32_UINT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED}}, + {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_PREVIOUS_INTERPOLATION_SOURCE, L"FI_PreviousInterpolationSouce", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV, + contextDescription->previousInterpolationSourceFormat, contextDescription->displaySize.width, contextDescription->displaySize.height, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED}}, + {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_MASK, L"FI_InpaintingMask", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV, + FFX_SURFACE_FORMAT_R8_UNORM, contextDescription->displaySize.width, contextDescription->displaySize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED}}, + {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DISOCCLUSION_MASK, L"FI_DisocclusionMask", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV, + FFX_SURFACE_FORMAT_R8G8_UNORM, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED}}, + {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DEFAULT_DISTORTION_FIELD, L"FI_DefaultDistortionField", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_READ_ONLY, + FFX_SURFACE_FORMAT_R8G8_UNORM, 1, 1, 1, FFX_RESOURCE_FLAGS_NONE, FfxResourceInitData::FfxResourceInitBuffer(sizeof(defaultDistortionFieldData), defaultDistortionFieldData) }, + + }; + + // clear the SRV resources to NULL. + memset(context->srvResources, 0, sizeof(context->srvResources)); + + for (int32_t currentSurfaceIndex = 0; currentSurfaceIndex < FFX_ARRAY_ELEMENTS(internalSurfaceDesc); ++currentSurfaceIndex) { + + const FfxInternalResourceDescription* currentSurfaceDescription = &internalSurfaceDesc[currentSurfaceIndex]; + const FfxResourceDescription resourceDescription = {currentSurfaceDescription->type, + currentSurfaceDescription->format, + currentSurfaceDescription->width, + currentSurfaceDescription->height, + 1, + currentSurfaceDescription->mipCount, + currentSurfaceDescription->flags, + currentSurfaceDescription->usage}; + FfxResourceStates initialState = FFX_RESOURCE_STATE_UNORDERED_ACCESS; + if (currentSurfaceDescription->usage == FFX_RESOURCE_USAGE_READ_ONLY) initialState = FFX_RESOURCE_STATE_COMPUTE_READ; + if (currentSurfaceDescription->usage == FFX_RESOURCE_USAGE_RENDERTARGET) initialState = FFX_RESOURCE_STATE_RENDER_TARGET; + + const FfxCreateResourceDescription createResourceDescription = { FFX_HEAP_TYPE_DEFAULT, resourceDescription, initialState, currentSurfaceDescription->name, currentSurfaceDescription->id, currentSurfaceDescription->initData }; + + FFX_VALIDATE(context->contextDescription.backendInterface.fpCreateResource(&context->contextDescription.backendInterface, &createResourceDescription, context->effectContextId, &context->srvResources[currentSurfaceDescription->id])); + } + + // copy resources to uavResrouces list + memcpy(context->uavResources, context->srvResources, sizeof(context->srvResources)); + + // avoid compiling pipelines on first render + { + context->refreshPipelineStates = false; + errorCode = createPipelineStates(context); + FFX_RETURN_ON_ERROR(errorCode == FFX_OK, errorCode); + } + + return FFX_OK; +} + +static FfxErrorCode frameinterpolationRelease(FfxFrameInterpolationContext_Private* context) +{ + FFX_ASSERT(context); + + ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineFiReconstructAndDilate, context->effectContextId); + ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineFiSetup, context->effectContextId); + ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineFiReconstructPreviousDepth, context->effectContextId); + ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineFiGameMotionVectorField, context->effectContextId); + ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineFiOpticalFlowVectorField, context->effectContextId); + ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineFiDisocclusionMask, context->effectContextId); + ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineFiScfi, context->effectContextId); + ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineInpaintingPyramid, context->effectContextId); + ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineInpainting, context->effectContextId); + ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineGameVectorFieldInpaintingPyramid, context->effectContextId); + ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineDebugView, context->effectContextId); + + // unregister resources not created internally + context->srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_CURRENT_INTERPOLATION_SOURCE] = {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_NULL}; + context->srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OPTICAL_FLOW_VECTOR] = {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_NULL}; + context->srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OPTICAL_FLOW_CONFIDENCE] = {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_NULL}; + context->srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OPTICAL_FLOW_GLOBAL_MOTION] = {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_NULL}; + context->srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OPTICAL_FLOW_SCENE_CHANGE_DETECTION] = {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_NULL}; + context->srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OUTPUT] = {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_NULL}; + context->uavResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OUTPUT] = {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_NULL}; + context->srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DILATED_DEPTH] = {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_NULL}; + context->uavResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DILATED_DEPTH] = {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_NULL}; + context->srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS] = {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_NULL}; + context->uavResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS] = {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_NULL}; + context->srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_RECONSTRUCTED_DEPTH_PREVIOUS_FRAME] = {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_NULL}; + context->uavResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_RECONSTRUCTED_DEPTH_PREVIOUS_FRAME] = {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_NULL}; + context->srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DISTORTION_FIELD] = {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_NULL}; + + // Release the copy resources for those that had init data + ffxSafeReleaseCopyResource(&context->contextDescription.backendInterface, context->srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_COUNTERS], context->effectContextId); + ffxSafeReleaseCopyResource(&context->contextDescription.backendInterface, context->srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DEFAULT_DISTORTION_FIELD], context->effectContextId); + + // release internal resources + for (int32_t currentResourceIndex = 0; currentResourceIndex < FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_COUNT; ++currentResourceIndex) { + + ffxSafeReleaseResource(&context->contextDescription.backendInterface, context->srvResources[currentResourceIndex], context->effectContextId); + } + + // Destroy the context + context->contextDescription.backendInterface.fpDestroyBackendContext(&context->contextDescription.backendInterface, context->effectContextId); + + return FFX_OK; +} + +static void scheduleDispatch(FfxFrameInterpolationContext_Private* context, const FfxPipelineState* pipeline, uint32_t dispatchX, uint32_t dispatchY) +{ + FfxComputeJobDescription jobDescriptor = {}; + + for (uint32_t currentShaderResourceViewIndex = 0; currentShaderResourceViewIndex < pipeline->srvTextureCount; ++currentShaderResourceViewIndex) + { + const uint32_t currentResourceId = pipeline->srvTextureBindings[currentShaderResourceViewIndex].resourceIdentifier; + const FfxResourceInternal currentResource = context->srvResources[currentResourceId]; + jobDescriptor.srvTextures[currentShaderResourceViewIndex].resource = currentResource; +#ifdef FFX_DEBUG + wcscpy_s(jobDescriptor.srvTextures[currentShaderResourceViewIndex].name, pipeline->srvTextureBindings[currentShaderResourceViewIndex].name); +#endif + } + + for (uint32_t currentUnorderedAccessViewIndex = 0; currentUnorderedAccessViewIndex < pipeline->uavTextureCount; ++currentUnorderedAccessViewIndex) { + + const uint32_t currentResourceId = pipeline->uavTextureBindings[currentUnorderedAccessViewIndex].resourceIdentifier; +#ifdef FFX_DEBUG + wcscpy_s(jobDescriptor.uavTextures[currentUnorderedAccessViewIndex].name, pipeline->uavTextureBindings[currentUnorderedAccessViewIndex].name); +#endif + + if (currentResourceId >= FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID_MIPMAP_0 && currentResourceId <= FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID_MIPMAP_12) + { + const FfxResourceInternal currentResource = context->uavResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID]; + jobDescriptor.uavTextures[currentUnorderedAccessViewIndex].resource = currentResource; + jobDescriptor.uavTextures[currentUnorderedAccessViewIndex].mip = currentResourceId - FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID_MIPMAP_0; + } + else + { + const FfxResourceInternal currentResource = context->uavResources[currentResourceId]; + jobDescriptor.uavTextures[currentUnorderedAccessViewIndex].resource = currentResource; + jobDescriptor.uavTextures[currentUnorderedAccessViewIndex].mip = 0; + } + } + + jobDescriptor.dimensions[0] = dispatchX; + jobDescriptor.dimensions[1] = dispatchY; + jobDescriptor.dimensions[2] = 1; + jobDescriptor.pipeline = *pipeline; + + for (uint32_t currentRootConstantIndex = 0; currentRootConstantIndex < pipeline->constCount; ++currentRootConstantIndex) { +#ifdef FFX_DEBUG + wcscpy_s(jobDescriptor.cbNames[currentRootConstantIndex], pipeline->constantBufferBindings[currentRootConstantIndex].name); +#endif + jobDescriptor.cbs[currentRootConstantIndex] = context->constantBuffers[pipeline->constantBufferBindings[currentRootConstantIndex].resourceIdentifier]; + } + + for (uint32_t currentUnorderedAccessViewIndex = 0; currentUnorderedAccessViewIndex < pipeline->uavBufferCount; ++currentUnorderedAccessViewIndex) + { + const uint32_t currentResourceId = pipeline->uavBufferBindings[currentUnorderedAccessViewIndex].resourceIdentifier; + jobDescriptor.uavBuffers[currentUnorderedAccessViewIndex].resource = context->uavResources[currentResourceId]; +#ifdef FFX_DEBUG + wcscpy_s(jobDescriptor.uavBuffers[currentUnorderedAccessViewIndex].name, pipeline->uavBufferBindings[currentUnorderedAccessViewIndex].name); +#endif + } + + for (uint32_t currentShaderResourceViewIndex = 0; currentShaderResourceViewIndex < pipeline->srvBufferCount; ++currentShaderResourceViewIndex) + { + const uint32_t currentResourceId = pipeline->srvBufferBindings[currentShaderResourceViewIndex].resourceIdentifier; + jobDescriptor.srvBuffers[currentShaderResourceViewIndex].resource = context->srvResources[currentResourceId]; +#ifdef FFX_DEBUG + wcscpy_s(jobDescriptor.srvBuffers[currentShaderResourceViewIndex].name, pipeline->srvBufferBindings[currentShaderResourceViewIndex].name); +#endif + } + + FfxGpuJobDescription dispatchJob = { FFX_GPU_JOB_COMPUTE }; + wcscpy_s(dispatchJob.jobLabel, pipeline->name); + dispatchJob.computeJobDescriptor = jobDescriptor; + + context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &dispatchJob); +} + +FFX_API FfxErrorCode ffxFrameInterpolationGetSharedResourceDescriptions(FfxFrameInterpolationContext* context, FfxFrameInterpolationSharedResourceDescriptions* SharedResources) +{ + FFX_RETURN_ON_ERROR( + context, + FFX_ERROR_INVALID_POINTER); + FFX_RETURN_ON_ERROR( + SharedResources, + FFX_ERROR_INVALID_POINTER); + + FfxFrameInterpolationContext_Private* contextPrivate = (FfxFrameInterpolationContext_Private*)(context); + SharedResources->dilatedDepth = { FFX_HEAP_TYPE_DEFAULT, { FFX_RESOURCE_TYPE_TEXTURE2D, FFX_SURFACE_FORMAT_R32_FLOAT, contextPrivate->contextDescription.maxRenderSize.width, contextPrivate->contextDescription.maxRenderSize.height, 1, 1, FFX_RESOURCE_FLAGS_NONE, (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV | FFX_RESOURCE_USAGE_DCC_RENDERTARGET) }, + FFX_RESOURCE_STATE_UNORDERED_ACCESS, L"FISHARED_DilatedDepth", FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DILATED_DEPTH, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} }; + SharedResources->dilatedMotionVectors = { FFX_HEAP_TYPE_DEFAULT, { FFX_RESOURCE_TYPE_TEXTURE2D, FFX_SURFACE_FORMAT_R16G16_FLOAT, contextPrivate->contextDescription.maxRenderSize.width, contextPrivate->contextDescription.maxRenderSize.height, 1, 1, FFX_RESOURCE_FLAGS_NONE, (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV | FFX_RESOURCE_USAGE_DCC_RENDERTARGET) }, + FFX_RESOURCE_STATE_UNORDERED_ACCESS, L"FISHARED_DilatedVelocity", FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} }; + SharedResources->reconstructedPrevNearestDepth = { FFX_HEAP_TYPE_DEFAULT, { FFX_RESOURCE_TYPE_TEXTURE2D, FFX_SURFACE_FORMAT_R32_UINT, contextPrivate->contextDescription.maxRenderSize.width, contextPrivate->contextDescription.maxRenderSize.height, 1, 1, FFX_RESOURCE_FLAGS_NONE, (FfxResourceUsage)(FFX_RESOURCE_USAGE_UAV) }, + FFX_RESOURCE_STATE_UNORDERED_ACCESS, L"FISHARED_ReconstructedPrevNearestDepth", FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_RECONSTRUCTED_DEPTH_PREVIOUS_FRAME, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} }; + + return FFX_OK; +} + +FfxErrorCode ffxFrameInterpolationContextCreate(FfxFrameInterpolationContext* context, FfxFrameInterpolationContextDescription* contextDescription) +{ + // zero context memory + //memset(context, 0, sizeof(FfxFrameinterpolationContext)); + + // check pointers are valid. + FFX_RETURN_ON_ERROR( + context, + FFX_ERROR_INVALID_POINTER); + FFX_RETURN_ON_ERROR( + contextDescription, + FFX_ERROR_INVALID_POINTER); + + // validate that all callbacks are set for the interface + FFX_RETURN_ON_ERROR(contextDescription->backendInterface.fpGetSDKVersion, FFX_ERROR_INCOMPLETE_INTERFACE); + FFX_RETURN_ON_ERROR(contextDescription->backendInterface.fpGetDeviceCapabilities, FFX_ERROR_INCOMPLETE_INTERFACE); + FFX_RETURN_ON_ERROR(contextDescription->backendInterface.fpCreateBackendContext, FFX_ERROR_INCOMPLETE_INTERFACE); + FFX_RETURN_ON_ERROR(contextDescription->backendInterface.fpDestroyBackendContext, FFX_ERROR_INCOMPLETE_INTERFACE); + + // if a scratch buffer is declared, then we must have a size + if (contextDescription->backendInterface.scratchBuffer) { + + FFX_RETURN_ON_ERROR(contextDescription->backendInterface.scratchBufferSize, FFX_ERROR_INCOMPLETE_INTERFACE); + } + + // ensure the context is large enough for the internal context. + FFX_STATIC_ASSERT(sizeof(FfxFrameInterpolationContext) >= sizeof(FfxFrameInterpolationContext_Private)); + + // create the context. + FfxFrameInterpolationContext_Private* contextPrivate = (FfxFrameInterpolationContext_Private*)(context); + FfxErrorCode errorCode = frameinterpolationCreate(contextPrivate, contextDescription); + + return errorCode; +} + +FFX_API FfxErrorCode ffxFrameInterpolationContextGetGpuMemoryUsage(FfxFrameInterpolationContext* context, FfxEffectMemoryUsage* vramUsage) +{ + FFX_RETURN_ON_ERROR(context, FFX_ERROR_INVALID_POINTER); + FFX_RETURN_ON_ERROR(vramUsage, FFX_ERROR_INVALID_POINTER); + FfxFrameInterpolationContext_Private* contextPrivate = (FfxFrameInterpolationContext_Private*)(context); + + FFX_RETURN_ON_ERROR(contextPrivate->device, FFX_ERROR_NULL_DEVICE); + + FfxErrorCode errorCode = contextPrivate->contextDescription.backendInterface.fpGetEffectGpuMemoryUsage( + &contextPrivate->contextDescription.backendInterface, contextPrivate->effectContextId, vramUsage); + FFX_RETURN_ON_ERROR(errorCode == FFX_OK, errorCode); + + return FFX_OK; +} + +FFX_API FfxErrorCode ffxSharedContextGetGpuMemoryUsage(FfxInterface* backendInterfaceShared, FfxEffectMemoryUsage* vramUsage) +{ + FFX_RETURN_ON_ERROR(backendInterfaceShared, FFX_ERROR_INVALID_POINTER); + FFX_RETURN_ON_ERROR(vramUsage, FFX_ERROR_INVALID_POINTER); + + FfxErrorCode errorCode = backendInterfaceShared->fpGetEffectGpuMemoryUsage( + backendInterfaceShared, 0, vramUsage); + FFX_RETURN_ON_ERROR(errorCode == FFX_OK, errorCode); + + return FFX_OK; +} + +FfxErrorCode ffxFrameInterpolationContextDestroy(FfxFrameInterpolationContext* context) +{ + FFX_RETURN_ON_ERROR( + context, + FFX_ERROR_INVALID_POINTER); + + // destroy the context. + FfxFrameInterpolationContext_Private* contextPrivate = (FfxFrameInterpolationContext_Private*)(context); + const FfxErrorCode errorCode = frameinterpolationRelease(contextPrivate); + + return errorCode; +} + +FfxErrorCode ffxFrameInterpolationContextEnqueueRefreshPipelineRequest(FfxFrameInterpolationContext* context) +{ + FFX_RETURN_ON_ERROR( + context, + FFX_ERROR_INVALID_POINTER); + + FfxFrameInterpolationContext_Private* contextPrivate = (FfxFrameInterpolationContext_Private*)context; + contextPrivate->refreshPipelineStates = true; + + return FFX_OK; +} + +static void setupDeviceDepthToViewSpaceDepthParams(FfxFrameInterpolationContext_Private* context, const FfxFrameInterpolationRenderDescription* params, FrameInterpolationConstants* constants) +{ + const bool bInverted = (context->contextDescription.flags & FFX_FRAMEINTERPOLATION_ENABLE_DEPTH_INVERTED) == FFX_FRAMEINTERPOLATION_ENABLE_DEPTH_INVERTED; + const bool bInfinite = (context->contextDescription.flags & FFX_FRAMEINTERPOLATION_ENABLE_DEPTH_INFINITE) == FFX_FRAMEINTERPOLATION_ENABLE_DEPTH_INFINITE; + + // make sure it has no impact if near and far plane values are swapped in dispatch params + // the flags "inverted" and "infinite" will decide what transform to use + float fMin = FFX_MINIMUM(params->cameraNear, params->cameraFar); + float fMax = FFX_MAXIMUM(params->cameraNear, params->cameraFar); + + if (bInverted) { + float tmp = fMin; + fMin = fMax; + fMax = tmp; + } + + // a 0 0 0 x + // 0 b 0 0 y + // 0 0 c d z + // 0 0 e 0 1 + + const float fQ = fMax / (fMin - fMax); + const float d = -1.0f; // for clarity + + const float matrix_elem_c[2][2] = { + fQ, // non reversed, non infinite + -1.0f - FLT_EPSILON, // non reversed, infinite + fQ, // reversed, non infinite + 0.0f + FLT_EPSILON // reversed, infinite + }; + + const float matrix_elem_e[2][2] = { + fQ * fMin, // non reversed, non infinite + -fMin - FLT_EPSILON, // non reversed, infinite + fQ * fMin, // reversed, non infinite + fMax, // reversed, infinite + }; + + constants->deviceToViewDepth[0] = d * matrix_elem_c[bInverted][bInfinite]; + constants->deviceToViewDepth[1] = matrix_elem_e[bInverted][bInfinite] * params->viewSpaceToMetersFactor; + + // revert x and y coords + const float aspect = params->renderSize.width / float(params->renderSize.height); + const float cotHalfFovY = cosf(0.5f * params->cameraFovAngleVertical) / sinf(0.5f * params->cameraFovAngleVertical); + const float a = cotHalfFovY / aspect; + const float b = cotHalfFovY; + + constants->deviceToViewDepth[2] = (1.0f / a); + constants->deviceToViewDepth[3] = (1.0f / b); + +} + +FFX_API bool ffxFrameInterpolationResourceIsNull(FfxResource resource) +{ + return resource.resource == NULL; +} + +static const float debugBarColorSequence[] = { + 0.0f, 1.0f, 1.0f, // teal + 1.0f, 0.42f, 0.0f, // orange + 0.0f, 0.16f, 1.0f, // blue + 0.74f, 1.0f, 0.0f, // lime + 0.68f, 0.0f, 1.0f, // purple + 0.0f, 1.0f, 0.1f, // green + 1.0f, 1.0f, 0.48f // bright yellow +}; +const size_t debugBarColorSequenceLength = 7; + +static void fsr3FrameInterpolationDebugCheckPrepare(FfxFrameInterpolationContext_Private* context, const FfxFrameInterpolationPrepareDescription* params) +{ + + static const FfxFloat32x3 zeroVector3D = { 0.f,0.f,0.f }; + if ((memcmp(params->cameraPosition, zeroVector3D, sizeof(FfxFloat32x3)) == 0) && + (memcmp(params->cameraUp, zeroVector3D, sizeof(FfxFloat32x3)) == 0) && + (memcmp(params->cameraRight, zeroVector3D, sizeof(FfxFloat32x3)) == 0) && + (memcmp(params->cameraForward, zeroVector3D, sizeof(FfxFloat32x3)) == 0)) + { + FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_WARNING, L"ffxDispatchDescFrameGenerationPrepareCameraInfo needs to be passed as linked struct. This is a required input to FSR3.1.4 and onwards for best quality."); + } +} + +FFX_API FfxErrorCode ffxFrameInterpolationPrepare(FfxFrameInterpolationContext* context, + const FfxFrameInterpolationPrepareDescription* params) +{ + FfxFrameInterpolationContext_Private* contextPrivate = (FfxFrameInterpolationContext_Private*)(context); + + if ((contextPrivate->contextDescription.flags & FFX_FRAMEINTERPOLATION_ENABLE_DEBUG_CHECKING) == FFX_FRAMEINTERPOLATION_ENABLE_DEBUG_CHECKING) + { + fsr3FrameInterpolationDebugCheckPrepare(contextPrivate, params); + } + + contextPrivate->constants.renderSize[0] = params->renderSize.width; + contextPrivate->constants.renderSize[1] = params->renderSize.height; + contextPrivate->constants.jitter[0] = params->jitterOffset.x; + contextPrivate->constants.jitter[1] = params->jitterOffset.y; + + const int32_t* motionVectorsTargetSize = (contextPrivate->contextDescription.flags & FFX_FRAMEINTERPOLATION_ENABLE_DISPLAY_RESOLUTION_MOTION_VECTORS) + ? contextPrivate->constants.displaySize + : contextPrivate->constants.renderSize; + contextPrivate->constants.motionVectorScale[0] = (params->motionVectorScale.x / motionVectorsTargetSize[0]); + contextPrivate->constants.motionVectorScale[1] = (params->motionVectorScale.y / motionVectorsTargetSize[1]); + + contextPrivate->contextDescription.backendInterface.fpStageConstantBufferDataFunc( + &contextPrivate->contextDescription.backendInterface, + &contextPrivate->constants, + sizeof(contextPrivate->constants), + &contextPrivate->constantBuffers[FFX_FRAMEINTERPOLATION_CONSTANTBUFFER_IDENTIFIER]); + + FFX_ASSERT(!ffxFrameInterpolationResourceIsNull(params->depth)); + FFX_ASSERT(!ffxFrameInterpolationResourceIsNull(params->motionVectors)); + + contextPrivate->contextDescription.backendInterface.fpRegisterResource( + &contextPrivate->contextDescription.backendInterface, + ¶ms->depth, + contextPrivate->effectContextId, + &contextPrivate->srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DEPTH]); + + contextPrivate->contextDescription.backendInterface.fpRegisterResource( + &contextPrivate->contextDescription.backendInterface, + ¶ms->motionVectors, + contextPrivate->effectContextId, + &contextPrivate->srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_MOTION_VECTORS]); + + contextPrivate->contextDescription.backendInterface.fpRegisterResource( + &contextPrivate->contextDescription.backendInterface, + ¶ms->dilatedDepth, + contextPrivate->effectContextId, + &contextPrivate->uavResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DILATED_DEPTH]); + contextPrivate->contextDescription.backendInterface.fpRegisterResource( + &contextPrivate->contextDescription.backendInterface, + ¶ms->dilatedMotionVectors, + contextPrivate->effectContextId, + &contextPrivate->uavResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS]); + contextPrivate->contextDescription.backendInterface.fpRegisterResource( + &contextPrivate->contextDescription.backendInterface, + ¶ms->reconstructedPrevDepth, + contextPrivate->effectContextId, + &contextPrivate->uavResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_RECONSTRUCTED_DEPTH_PREVIOUS_FRAME]); + + // clear estimated depth resources + { + FfxGpuJobDescription clearJob = {FFX_GPU_JOB_CLEAR_FLOAT}; + const bool bInverted = (contextPrivate->contextDescription.flags & FFX_FRAMEINTERPOLATION_ENABLE_DEPTH_INVERTED) == FFX_FRAMEINTERPOLATION_ENABLE_DEPTH_INVERTED; + const float clearDepthValue[]{bInverted ? 0.f : 1.f, bInverted ? 0.f : 1.f, bInverted ? 0.f : 1.f, bInverted ? 0.f : 1.f}; + memcpy(clearJob.clearJobDescriptor.color, clearDepthValue, 4 * sizeof(float)); + wcscpy_s(clearJob.jobLabel, L"Clear Reconstructed Previous Nearest Depth"); + clearJob.clearJobDescriptor.target = contextPrivate->uavResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_RECONSTRUCTED_DEPTH_PREVIOUS_FRAME]; + contextPrivate->contextDescription.backendInterface.fpScheduleGpuJob(&contextPrivate->contextDescription.backendInterface, &clearJob); + } + + uint32_t renderDispatchSizeX = uint32_t(params->renderSize.width + 7) / 8; + uint32_t renderDispatchSizeY = uint32_t(params->renderSize.height + 7) / 8; + + scheduleDispatch(contextPrivate, &contextPrivate->pipelineFiReconstructAndDilate, renderDispatchSizeX, renderDispatchSizeY); + + contextPrivate->contextDescription.backendInterface.fpExecuteGpuJobs(&contextPrivate->contextDescription.backendInterface, params->commandList, contextPrivate->effectContextId); + + // release dynamic resources + contextPrivate->contextDescription.backendInterface.fpUnregisterResources(&contextPrivate->contextDescription.backendInterface, + params->commandList, + contextPrivate->effectContextId); + + contextPrivate->srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DEPTH] = {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_NULL}; + contextPrivate->srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_MOTION_VECTORS] = {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_NULL}; + contextPrivate->uavResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DILATED_DEPTH] = {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_NULL}; + contextPrivate->uavResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS] = {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_NULL}; + contextPrivate->uavResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_RECONSTRUCTED_DEPTH_PREVIOUS_FRAME] = {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_NULL}; + + return FFX_OK; +} + +FFX_API FfxErrorCode ffxFrameInterpolationDispatch(FfxFrameInterpolationContext* context, const FfxFrameInterpolationDispatchDescription* params) +{ + FfxFrameInterpolationContext_Private* contextPrivate = (FfxFrameInterpolationContext_Private*)(context); + const FfxFrameInterpolationRenderDescription* renderDesc = &contextPrivate->renderDescription; + + if (contextPrivate->refreshPipelineStates) { + + createPipelineStates(contextPrivate); + contextPrivate->refreshPipelineStates = false; + } + + const bool bReset = (contextPrivate->dispatchCount == 0) || params->reset; + + FFX_ASSERT_MESSAGE(!contextPrivate->asyncSupported || bReset || (params->frameID > contextPrivate->previousFrameID), + "When async support is enabled, and the reset flag is not set, frame ID must increment in each dispatch"); + + // Detect disjoint frameID values + const bool bFrameID_Decreased = params->frameID < contextPrivate->previousFrameID; + const bool bFrameID_Skipped = (params->frameID - contextPrivate->previousFrameID) > 1; + const bool bDisjointFrameID = bFrameID_Decreased || bFrameID_Skipped; + contextPrivate->previousFrameID = params->frameID; + contextPrivate->dispatchCount++; + + contextPrivate->constants.renderSize[0] = params->renderSize.width; + contextPrivate->constants.renderSize[1] = params->renderSize.height; + contextPrivate->constants.displaySize[0] = params->displaySize.width; + contextPrivate->constants.displaySize[1] = params->displaySize.height; + contextPrivate->constants.displaySizeRcp[0] = 1.0f / params->displaySize.width; + contextPrivate->constants.displaySizeRcp[1] = 1.0f / params->displaySize.height; + contextPrivate->constants.upscalerTargetSize[0] = params->interpolationRect.width; + contextPrivate->constants.upscalerTargetSize[1] = params->interpolationRect.height; + contextPrivate->constants.Mode = 0; + contextPrivate->constants.Reset = bReset || bDisjointFrameID; + contextPrivate->constants.deltaTime = params->frameTimeDelta; + contextPrivate->constants.HUDLessAttachedFactor = params->currentBackBuffer_HUDLess.resource ? 1 : 0; + + contextPrivate->constants.opticalFlowScale[0] = params->opticalFlowScale.x; + contextPrivate->constants.opticalFlowScale[1] = params->opticalFlowScale.y; + contextPrivate->constants.opticalFlowBlockSize = params->opticalFlowBlockSize;// displaySize.width / params->opticalFlowBufferSize.width; + contextPrivate->constants.dispatchFlags = params->flags; + + contextPrivate->constants.cameraNear = params->cameraNear; + contextPrivate->constants.cameraFar = params->cameraFar; + + contextPrivate->constants.interpolationRectBase[0] = params->interpolationRect.left; + contextPrivate->constants.interpolationRectBase[1] = params->interpolationRect.top; + contextPrivate->constants.interpolationRectSize[0] = params->interpolationRect.width; + contextPrivate->constants.interpolationRectSize[1] = params->interpolationRect.height; + + // Debug bar + static size_t dbgIdx = 0; + memcpy(contextPrivate->constants.debugBarColor, &debugBarColorSequence[dbgIdx * 3], 3 * sizeof(float)); + dbgIdx = (dbgIdx + 1) % debugBarColorSequenceLength; + + contextPrivate->constants.backBufferTransferFunction = params->backBufferTransferFunction; + contextPrivate->constants.minMaxLuminance[0] = params->minMaxLuminance[0]; + contextPrivate->constants.minMaxLuminance[1] = params->minMaxLuminance[1]; + + const float aspectRatio = (float)params->renderSize.width / (float)params->renderSize.height; + const float cameraAngleHorizontal = atan(tan(params->cameraFovAngleVertical / 2) * aspectRatio) * 2; + contextPrivate->constants.fTanHalfFOV = tanf(cameraAngleHorizontal * 0.5f); + + const bool bUseExternalDistortionFieldResource = !ffxFrameInterpolationResourceIsNull(params->distortionField); + if (bUseExternalDistortionFieldResource) + { + contextPrivate->constants.distortionFieldSize[0] = params->distortionField.description.width; + contextPrivate->constants.distortionFieldSize[1] = params->distortionField.description.height; + } + else + { + contextPrivate->constants.distortionFieldSize[0] = 1; + contextPrivate->constants.distortionFieldSize[1] = 1; + } + + contextPrivate->renderDescription.cameraFar = params->cameraFar; + contextPrivate->renderDescription.cameraNear = params->cameraNear; + contextPrivate->renderDescription.viewSpaceToMetersFactor = (params->viewSpaceToMetersFactor > 0.0f) ? params->viewSpaceToMetersFactor : 1.0f; + contextPrivate->renderDescription.cameraFovAngleVertical = params->cameraFovAngleVertical; + contextPrivate->renderDescription.renderSize = params->renderSize; + contextPrivate->renderDescription.upscaleSize = params->displaySize; + setupDeviceDepthToViewSpaceDepthParams(contextPrivate, renderDesc, &contextPrivate->constants); + + contextPrivate->contextDescription.backendInterface.fpStageConstantBufferDataFunc( + &contextPrivate->contextDescription.backendInterface, + &contextPrivate->constants, + sizeof(contextPrivate->constants), + &contextPrivate->constantBuffers[FFX_FRAMEINTERPOLATION_CONSTANTBUFFER_IDENTIFIER]); + + if (contextPrivate->constants.HUDLessAttachedFactor == 1) { + + FFX_ASSERT_MESSAGE(contextPrivate->contextDescription.previousInterpolationSourceFormat == params->currentBackBuffer_HUDLess.description.format, + "Dispatch FI param currentBackBuffer_HUDLess format and Create FG Context's hudlessBackBufferFormat have to be identical. Otherwise, CopyTextureRegion from FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_CURRENT_INTERPOLATION_SOURCE to FI_PreviousInterpolationSource would fail"); + + contextPrivate->contextDescription.backendInterface.fpRegisterResource(&contextPrivate->contextDescription.backendInterface, ¶ms->currentBackBuffer, contextPrivate->effectContextId, &contextPrivate->srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_PRESENT_BACKBUFFER]); + contextPrivate->contextDescription.backendInterface.fpRegisterResource(&contextPrivate->contextDescription.backendInterface, ¶ms->currentBackBuffer_HUDLess, contextPrivate->effectContextId, &contextPrivate->srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_CURRENT_INTERPOLATION_SOURCE]); + } + else { + FFX_ASSERT_MESSAGE(contextPrivate->contextDescription.previousInterpolationSourceFormat == params->currentBackBuffer.description.format, + "Dispatch FI param currentBackBuffer format and Create FG Context's backBufferFormat have to be identical. This assert can also be triggered if create FG Context with optional hudlessBackBufferFormat that is different from backBufferFormat and Dispatch FI param's currentBackBuffer_HUDLess is null."); + contextPrivate->contextDescription.backendInterface.fpRegisterResource(&contextPrivate->contextDescription.backendInterface, ¶ms->currentBackBuffer, contextPrivate->effectContextId, &contextPrivate->srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_CURRENT_INTERPOLATION_SOURCE]); + contextPrivate->srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_PRESENT_BACKBUFFER] = contextPrivate->srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_CURRENT_INTERPOLATION_SOURCE]; + } + + if (!ffxFrameInterpolationResourceIsNull(params->dilatedDepth)) + { + contextPrivate->contextDescription.backendInterface.fpRegisterResource( + &contextPrivate->contextDescription.backendInterface, + ¶ms->dilatedDepth, + contextPrivate->effectContextId, + &contextPrivate->srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DILATED_DEPTH]); + } + if (!ffxFrameInterpolationResourceIsNull(params->dilatedMotionVectors)) + { + contextPrivate->contextDescription.backendInterface.fpRegisterResource( + &contextPrivate->contextDescription.backendInterface, + ¶ms->dilatedMotionVectors, + contextPrivate->effectContextId, + &contextPrivate->srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS]); + } + if (!ffxFrameInterpolationResourceIsNull(params->reconstructedPrevDepth)) + { + contextPrivate->contextDescription.backendInterface.fpRegisterResource( + &contextPrivate->contextDescription.backendInterface, + ¶ms->reconstructedPrevDepth, + contextPrivate->effectContextId, + &contextPrivate->srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_RECONSTRUCTED_DEPTH_PREVIOUS_FRAME]); + } + + // Register output as SRV and UAV + contextPrivate->contextDescription.backendInterface.fpRegisterResource(&contextPrivate->contextDescription.backendInterface, ¶ms->output, contextPrivate->effectContextId, &contextPrivate->uavResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OUTPUT]); + contextPrivate->srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OUTPUT] = contextPrivate->uavResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OUTPUT]; + + // set optical flow buffers + if (params->opticalFlowScale.x > 0) + { + contextPrivate->contextDescription.backendInterface.fpRegisterResource(&contextPrivate->contextDescription.backendInterface, ¶ms->opticalFlowVector, contextPrivate->effectContextId, &contextPrivate->srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OPTICAL_FLOW_VECTOR]); + contextPrivate->contextDescription.backendInterface.fpRegisterResource(&contextPrivate->contextDescription.backendInterface, ¶ms->opticalFlowSceneChangeDetection, contextPrivate->effectContextId, &contextPrivate->srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OPTICAL_FLOW_SCENE_CHANGE_DETECTION]); + } + else + { + contextPrivate->srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OPTICAL_FLOW_CONFIDENCE] = {}; + contextPrivate->srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OPTICAL_FLOW_GLOBAL_MOTION] = {}; + contextPrivate->srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OPTICAL_FLOW_SCENE_CHANGE_DETECTION] = {}; + contextPrivate->srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OPTICAL_FLOW_VECTOR] = {}; + } + + if (bUseExternalDistortionFieldResource) + { + contextPrivate->contextDescription.backendInterface.fpRegisterResource( + &contextPrivate->contextDescription.backendInterface, + ¶ms->distortionField, + contextPrivate->effectContextId, + &contextPrivate->srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DISTORTION_FIELD]); + } + else + { + contextPrivate->srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DISTORTION_FIELD] = contextPrivate->srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DEFAULT_DISTORTION_FIELD]; + } + + uint32_t displayDispatchSizeX = uint32_t(params->displaySize.width + 7) / 8; + uint32_t displayDispatchSizeY = uint32_t(params->displaySize.height + 7) / 8; + + uint32_t renderDispatchSizeX = uint32_t(params->renderSize.width + 7) / 8; + uint32_t renderDispatchSizeY = uint32_t(params->renderSize.height + 7) / 8; + + uint32_t opticalFlowDispatchSizeX = uint32_t(params->displaySize.width / float(params->opticalFlowBlockSize) + 7) / 8; + uint32_t opticalFlowDispatchSizeY = uint32_t(params->displaySize.height / float(params->opticalFlowBlockSize) + 7) / 8; + + const bool bExecutePreparationPasses = (false == contextPrivate->constants.Reset); + + // Schedule work for the interpolation command list + { + FfxResourceInternal aliasableResources[] = { + contextPrivate->uavResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_RECONSTRUCTED_DEPTH_INTERPOLATED_FRAME], + contextPrivate->uavResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_GAME_MOTION_VECTOR_FIELD_X], + contextPrivate->uavResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_GAME_MOTION_VECTOR_FIELD_Y], + contextPrivate->uavResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID], + contextPrivate->uavResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OPTICAL_FLOW_MOTION_VECTOR_FIELD_X], + contextPrivate->uavResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OPTICAL_FLOW_MOTION_VECTOR_FIELD_Y], + contextPrivate->uavResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_MASK], + contextPrivate->uavResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DISOCCLUSION_MASK] + }; + for (int i = 0; i < _countof(aliasableResources); ++i) + { + FfxGpuJobDescription discardJob = {FFX_GPU_JOB_DISCARD}; + discardJob.discardJobDescriptor.target = aliasableResources[i]; + contextPrivate->contextDescription.backendInterface.fpScheduleGpuJob(&contextPrivate->contextDescription.backendInterface, &discardJob); + } + + scheduleDispatch(contextPrivate, &contextPrivate->pipelineFiSetup, renderDispatchSizeX, renderDispatchSizeY); + + // game vector field inpainting pyramid + auto scheduleDispatchGameVectorFieldInpaintingPyramid = [&]() { + // Auto exposure + uint32_t dispatchThreadGroupCountXY[2]; + uint32_t workGroupOffset[2]; + uint32_t numWorkGroupsAndMips[2]; + uint32_t rectInfo[4] = {0, 0, params->renderSize.width, params->renderSize.height}; + ffxSpdSetup(dispatchThreadGroupCountXY, workGroupOffset, numWorkGroupsAndMips, rectInfo); + + // downsample + contextPrivate->inpaintingPyramidContants.numworkGroups = numWorkGroupsAndMips[0]; + contextPrivate->inpaintingPyramidContants.mips = numWorkGroupsAndMips[1]; + contextPrivate->inpaintingPyramidContants.workGroupOffset[0] = workGroupOffset[0]; + contextPrivate->inpaintingPyramidContants.workGroupOffset[1] = workGroupOffset[1]; + + contextPrivate->contextDescription.backendInterface.fpStageConstantBufferDataFunc( + &contextPrivate->contextDescription.backendInterface, + &contextPrivate->inpaintingPyramidContants, + sizeof(contextPrivate->inpaintingPyramidContants), + &contextPrivate->constantBuffers[FFX_FRAMEINTERPOLATION_INPAINTING_PYRAMID_CONSTANTBUFFER_IDENTIFIER]); + + scheduleDispatch( + contextPrivate, &contextPrivate->pipelineGameVectorFieldInpaintingPyramid, dispatchThreadGroupCountXY[0], dispatchThreadGroupCountXY[1]); + }; + + // only execute FG data preparation passes when reset wasnt triggered + if (bExecutePreparationPasses) + { + // clear estimated depth resources + { + FfxGpuJobDescription clearJob = {FFX_GPU_JOB_CLEAR_FLOAT}; + + const bool bInverted = + (contextPrivate->contextDescription.flags & FFX_FRAMEINTERPOLATION_ENABLE_DEPTH_INVERTED) == FFX_FRAMEINTERPOLATION_ENABLE_DEPTH_INVERTED; + const float clearDepthValue[]{bInverted ? 0.f : 1.f, bInverted ? 0.f : 1.f, bInverted ? 0.f : 1.f, bInverted ? 0.f : 1.f}; + memcpy(clearJob.clearJobDescriptor.color, clearDepthValue, 4 * sizeof(float)); + + wcscpy_s(clearJob.jobLabel, L"Clear Reconstructed Depth Interpolated Frame"); + clearJob.clearJobDescriptor.target = + contextPrivate->uavResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_RECONSTRUCTED_DEPTH_INTERPOLATED_FRAME]; + contextPrivate->contextDescription.backendInterface.fpScheduleGpuJob(&contextPrivate->contextDescription.backendInterface, &clearJob); + } + + scheduleDispatch(contextPrivate, &contextPrivate->pipelineFiReconstructPreviousDepth, renderDispatchSizeX, renderDispatchSizeY); + scheduleDispatch(contextPrivate, &contextPrivate->pipelineFiGameMotionVectorField, renderDispatchSizeX, renderDispatchSizeY); + + scheduleDispatchGameVectorFieldInpaintingPyramid(); + + scheduleDispatch(contextPrivate, &contextPrivate->pipelineFiOpticalFlowVectorField, opticalFlowDispatchSizeX, opticalFlowDispatchSizeY); + + scheduleDispatch(contextPrivate, &contextPrivate->pipelineFiDisocclusionMask, renderDispatchSizeX, renderDispatchSizeY); + } + + scheduleDispatch(contextPrivate, &contextPrivate->pipelineFiScfi, displayDispatchSizeX, displayDispatchSizeY); + + // inpainting pyramid + { + // Auto exposure + uint32_t dispatchThreadGroupCountXY[2]; + uint32_t workGroupOffset[2]; + uint32_t numWorkGroupsAndMips[2]; + uint32_t rectInfo[4] = { 0, 0, params->displaySize.width, params->displaySize.height }; + ffxSpdSetup(dispatchThreadGroupCountXY, workGroupOffset, numWorkGroupsAndMips, rectInfo); + + // downsample + contextPrivate->inpaintingPyramidContants.numworkGroups = numWorkGroupsAndMips[0]; + contextPrivate->inpaintingPyramidContants.mips = numWorkGroupsAndMips[1]; + contextPrivate->inpaintingPyramidContants.workGroupOffset[0] = workGroupOffset[0]; + contextPrivate->inpaintingPyramidContants.workGroupOffset[1] = workGroupOffset[1]; + + contextPrivate->contextDescription.backendInterface.fpStageConstantBufferDataFunc( + &contextPrivate->contextDescription.backendInterface, + &contextPrivate->inpaintingPyramidContants, + sizeof(contextPrivate->inpaintingPyramidContants), + &contextPrivate->constantBuffers[FFX_FRAMEINTERPOLATION_INPAINTING_PYRAMID_CONSTANTBUFFER_IDENTIFIER]); + + scheduleDispatch(contextPrivate, &contextPrivate->pipelineInpaintingPyramid, dispatchThreadGroupCountXY[0], dispatchThreadGroupCountXY[1]); + } + + scheduleDispatch(contextPrivate, &contextPrivate->pipelineInpainting, displayDispatchSizeX, displayDispatchSizeY); + + if (params->flags & FFX_FRAMEINTERPOLATION_DISPATCH_DRAW_DEBUG_VIEW) + { + scheduleDispatchGameVectorFieldInpaintingPyramid(); + scheduleDispatch(contextPrivate, &contextPrivate->pipelineDebugView, displayDispatchSizeX, displayDispatchSizeY); + } + + // store current buffer + { + FfxGpuJobDescription copyJobs[] = { {FFX_GPU_JOB_COPY} }; + FfxResourceInternal copySources[_countof(copyJobs)] = { contextPrivate->srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_CURRENT_INTERPOLATION_SOURCE] }; + FfxResourceInternal destSources[_countof(copyJobs)] = { contextPrivate->uavResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_PREVIOUS_INTERPOLATION_SOURCE] }; + + for (int i = 0; i < _countof(copyJobs); ++i) + { + copyJobs[i].copyJobDescriptor.src = copySources[i]; + copyJobs[i].copyJobDescriptor.dst = destSources[i]; + contextPrivate->contextDescription.backendInterface.fpScheduleGpuJob(&contextPrivate->contextDescription.backendInterface, ©Jobs[i]); + } + } + + // declare internal resources needed + struct FfxInternalResourceStates + { + FfxUInt32 id; + FfxResourceUsage usage; + }; + const FfxInternalResourceStates internalSurfaceDesc[] = { + + {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_RECONSTRUCTED_DEPTH_INTERPOLATED_FRAME, FFX_RESOURCE_USAGE_UAV}, + {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_GAME_MOTION_VECTOR_FIELD_X, FFX_RESOURCE_USAGE_UAV}, + {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_GAME_MOTION_VECTOR_FIELD_Y, FFX_RESOURCE_USAGE_UAV}, + {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID, FFX_RESOURCE_USAGE_UAV}, + {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_COUNTERS, FFX_RESOURCE_USAGE_UAV}, + {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OPTICAL_FLOW_MOTION_VECTOR_FIELD_X, FFX_RESOURCE_USAGE_UAV}, + {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OPTICAL_FLOW_MOTION_VECTOR_FIELD_Y, FFX_RESOURCE_USAGE_UAV}, + {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_PREVIOUS_INTERPOLATION_SOURCE, FFX_RESOURCE_USAGE_UAV}, + {FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DISOCCLUSION_MASK, FFX_RESOURCE_USAGE_UAV}, + }; + + for (int32_t currentSurfaceIndex = 0; currentSurfaceIndex < FFX_ARRAY_ELEMENTS(internalSurfaceDesc); ++currentSurfaceIndex) { + + const FfxInternalResourceStates* currentSurfaceDescription = &internalSurfaceDesc[currentSurfaceIndex]; + FfxResourceStates initialState = FFX_RESOURCE_STATE_UNORDERED_ACCESS; + if (currentSurfaceDescription->usage == FFX_RESOURCE_USAGE_READ_ONLY) initialState = FFX_RESOURCE_STATE_COMPUTE_READ; + if (currentSurfaceDescription->usage == FFX_RESOURCE_USAGE_RENDERTARGET) initialState = FFX_RESOURCE_STATE_RENDER_TARGET; + + FfxGpuJobDescription barrier = {FFX_GPU_JOB_BARRIER}; + barrier.barrierDescriptor.resource = contextPrivate->srvResources[currentSurfaceDescription->id]; + barrier.barrierDescriptor.subResourceID = 0; + barrier.barrierDescriptor.newState = (currentSurfaceDescription->id == FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_COUNTERS) ? FFX_RESOURCE_STATE_COPY_DEST : initialState; + barrier.barrierDescriptor.barrierType = FFX_BARRIER_TYPE_TRANSITION; + contextPrivate->contextDescription.backendInterface.fpScheduleGpuJob(&contextPrivate->contextDescription.backendInterface, &barrier); + } + + // schedule optical flow and frame interpolation + contextPrivate->contextDescription.backendInterface.fpExecuteGpuJobs(&contextPrivate->contextDescription.backendInterface, params->commandList, contextPrivate->effectContextId); + } + + // release dynamic resources + contextPrivate->contextDescription.backendInterface.fpUnregisterResources(&contextPrivate->contextDescription.backendInterface, params->commandList, contextPrivate->effectContextId); + + return FFX_OK; +} + +FFX_API FfxVersionNumber ffxFrameInterpolationGetEffectVersion() +{ + return FFX_SDK_MAKE_VERSION(FFX_FRAMEINTERPOLATION_VERSION_MAJOR, FFX_FRAMEINTERPOLATION_VERSION_MINOR, FFX_FRAMEINTERPOLATION_VERSION_PATCH); +} + +FFX_API FfxErrorCode ffxFrameInterpolationSetGlobalDebugMessage(ffxMessageCallback fpMessage, uint32_t debugLevel) +{ + ffxSetPrintMessageCallback(fpMessage, debugLevel); + return FFX_OK; +} diff --git a/thirdparty/amd-ffx/ffx_frameinterpolation.h b/thirdparty/amd-ffx/ffx_frameinterpolation.h new file mode 100644 index 000000000000..ed19525e07b5 --- /dev/null +++ b/thirdparty/amd-ffx/ffx_frameinterpolation.h @@ -0,0 +1,316 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +// @defgroup FRAMEINTERPOLATION + +#pragma once + +// Include the interface for the backend of the Frameinterpolation API. +#include "ffx_interface.h" + +/// FidelityFX Frameinterpolation major version. +/// +/// @ingroup FRAMEINTERPOLATIONFRAMEINTERPOLATION +#define FFX_FRAMEINTERPOLATION_VERSION_MAJOR (1) + +/// FidelityFX Frameinterpolation minor version. +/// +/// @ingroup FRAMEINTERPOLATIONFRAMEINTERPOLATION +#define FFX_FRAMEINTERPOLATION_VERSION_MINOR (1) + +/// FidelityFX Frameinterpolation patch version. +/// +/// @ingroup FRAMEINTERPOLATIONFRAMEINTERPOLATION +#define FFX_FRAMEINTERPOLATION_VERSION_PATCH (3) + +/// FidelityFX Frame Interpolation context count +/// +/// Defines the number of internal effect contexts required by Frame Interpolation +/// +/// @ingroup ffxFrameInterpolation +#define FFX_FRAMEINTERPOLATION_CONTEXT_COUNT (1) + +/// The size of the context specified in 32bit values. +/// +/// @ingroup FRAMEINTERPOLATIONFRAMEINTERPOLATION +#define FFX_FRAMEINTERPOLATION_CONTEXT_SIZE (FFX_SDK_DEFAULT_CONTEXT_SIZE) + +#if defined(__cplusplus) +extern "C" { +#endif // #if defined(__cplusplus) + +/// An enumeration of all the passes which constitute the FSR3 algorithm. +/// +/// FSR3 is implemented as a composite of several compute passes each +/// computing a key part of the final result. Each call to the +/// FfxFsr3ScheduleGpuJobFunc callback function will +/// correspond to a single pass included in FfxFsr3Pass. For a +/// more comprehensive description of each pass, please refer to the FSR3 +/// reference documentation. +/// +/// Please note in some cases e.g.: FFX_FSR3_PASS_ACCUMULATE +/// and FFX_FSR3_PASS_ACCUMULATE_SHARPEN either one pass or the +/// other will be used (they are mutually exclusive). The choice of which will +/// depend on the way the FfxFsr3Context is created and the +/// precise contents of FfxFsr3DispatchParamters each time a call +/// is made to ffxFsr3ContextDispatch. +/// +/// @ingroup FRAMEINTERPOLATIONFRAMEINTERPOLATION +typedef enum FfxFrameInterpolationPass +{ + FFX_FRAMEINTERPOLATION_PASS_RECONSTRUCT_AND_DILATE, + FFX_FRAMEINTERPOLATION_PASS_SETUP, + FFX_FRAMEINTERPOLATION_PASS_RECONSTRUCT_PREV_DEPTH, + FFX_FRAMEINTERPOLATION_PASS_GAME_MOTION_VECTOR_FIELD, + FFX_FRAMEINTERPOLATION_PASS_OPTICAL_FLOW_VECTOR_FIELD, + FFX_FRAMEINTERPOLATION_PASS_DISOCCLUSION_MASK, + FFX_FRAMEINTERPOLATION_PASS_INTERPOLATION, + FFX_FRAMEINTERPOLATION_PASS_INPAINTING_PYRAMID, + FFX_FRAMEINTERPOLATION_PASS_INPAINTING, + FFX_FRAMEINTERPOLATION_PASS_GAME_VECTOR_FIELD_INPAINTING_PYRAMID, + FFX_FRAMEINTERPOLATION_PASS_DEBUG_VIEW, + FFX_FRAMEINTERPOLATION_PASS_COUNT ///< The number of passes performed by FrameInterpolation. +} FfxFrameInterpolationPass; + +// forward declarations +struct FfxFrameInterpolationContext; + +/// An enumeration of bit flags used when creating a +/// FfxFrameInterpolationContext. See FfxFrameInterpolationContextDescription. +/// +/// @ingroup FRAMEINTERPOLATIONFRAMEINTERPOLATION +typedef enum FfxFrameInterpolationInitializationFlagBits { + + FFX_FRAMEINTERPOLATION_ENABLE_DEPTH_INVERTED = (1<<0), ///< A bit indicating that the input depth buffer data provided is inverted [1..0]. + FFX_FRAMEINTERPOLATION_ENABLE_DEPTH_INFINITE = (1<<1), ///< A bit indicating that the input depth buffer data provided is using an infinite far plane. + FFX_FRAMEINTERPOLATION_ENABLE_TEXTURE1D_USAGE = (1<<2), ///< A bit indicating that the backend should use 1D textures. + FFX_FRAMEINTERPOLATION_ENABLE_HDR_COLOR_INPUT = (1<<3), ///< A bit indicating that HDR values are present in the imaging pipeline. + FFX_FRAMEINTERPOLATION_ENABLE_DISPLAY_RESOLUTION_MOTION_VECTORS = (1<<4), ///< A bit indicating if the motion vectors are rendered at display resolution. + FFX_FRAMEINTERPOLATION_ENABLE_JITTER_MOTION_VECTORS = (1<<5), + FFX_FRAMEINTERPOLATION_ENABLE_ASYNC_SUPPORT = (1<<6), + FFX_FRAMEINTERPOLATION_ENABLE_DEBUG_CHECKING = (1<<7), ///< A bit indicating that the runtime should check some API values and report issues. +} FfxFrameInterpolationInitializationFlagBits; + +/// A structure encapsulating the parameters required to initialize +/// FidelityFX Frameinterpolation. +/// +/// @ingroup FRAMEINTERPOLATION +typedef struct FfxFrameInterpolationContextDescription { + uint32_t flags; ///< A collection of FfxFrameInterpolationInitializationFlagBits. + FfxDimensions2D maxRenderSize; ///< The maximum size that rendering will be performed at. + FfxDimensions2D displaySize; ///< The size of the presentation resolution + FfxSurfaceFormat backBufferFormat; ///< the format of the backbuffer + FfxSurfaceFormat previousInterpolationSourceFormat; ///< the format of the texture that will store the interpolation source for the next frame. Can be different than the backbuffer one, especially when using hudless + FfxInterface backendInterface; ///< A set of pointers to the backend implementation for FidelityFX SDK +} FfxFrameInterpolationContextDescription; + +/// A structure encapsulating the resource descriptions for shared resources for this effect. +/// +/// @ingroup FRAMEINTERPOLATION +typedef struct FfxFrameInterpolationSharedResourceDescriptions +{ + FfxCreateResourceDescription reconstructedPrevNearestDepth; ///< The FfxCreateResourceDescription for allocating the reconstructedPrevNearestDepth shared resource. + FfxCreateResourceDescription dilatedDepth; ///< The FfxCreateResourceDescription for allocating the dilatedDepth shared resource. + FfxCreateResourceDescription dilatedMotionVectors; ///< The FfxCreateResourceDescription for allocating the dilatedMotionVectors shared resource. +} FfxFrameInterpolationSharedResourceDescriptions; + +/// A structure encapsulating the FidelityFX Super Resolution 2 context. +/// +/// This sets up an object which contains all persistent internal data and +/// resources that are required by FSR3. +/// +/// The FfxFsr3Context object should have a lifetime matching +/// your use of FSR3. Before destroying the FSR3 context care should be taken +/// to ensure the GPU is not accessing the resources created or used by FSR3. +/// It is therefore recommended that the GPU is idle before destroying the +/// FSR3 context. +/// +/// @ingroup FRAMEINTERPOLATION +typedef struct FfxFrameInterpolationContext +{ + uint32_t data[FFX_FRAMEINTERPOLATION_CONTEXT_SIZE]; ///< An opaque set of uint32_t which contain the data for the context. +} FfxFrameInterpolationContext; + + +/// Create a FidelityFX Super Resolution 2 context from the parameters +/// programmed to the FfxFsr3CreateParams structure. +/// +/// The context structure is the main object used to interact with the FSR3 +/// API, and is responsible for the management of the internal resources used +/// by the FSR3 algorithm. When this API is called, multiple calls will be +/// made via the pointers contained in the callbacks structure. +/// These callbacks will attempt to retreive the device capabilities, and +/// create the internal resources, and pipelines required by FSR3's +/// frame-to-frame function. Depending on the precise configuration used when +/// creating the FfxFsr3Context a different set of resources and +/// pipelines might be requested via the callback functions. +/// +/// The flags included in the flags field of +/// FfxFsr3Context how match the configuration of your +/// application as well as the intended use of FSR3. It is important that these +/// flags are set correctly (as well as a correct programmed +/// FfxFsr3DispatchDescription) to ensure correct operation. It is +/// recommended to consult the overview documentation for further details on +/// how FSR3 should be integerated into an application. +/// +/// When the FfxFsr3Context is created, you should use the +/// ffxFsr3ContextDispatch function each frame where FSR3 +/// upscaling should be applied. See the documentation of +/// ffxFsr3ContextDispatch for more details. +/// +/// The FfxFsr3Context should be destroyed when use of it is +/// completed, typically when an application is unloaded or FSR3 upscaling is +/// disabled by a user. To destroy the FSR3 context you should call +/// ffxFsr3ContextDestroy. +/// +/// @param [out] context A pointer to a FfxFsr3Context structure to populate. +/// @param [in] contextDescription A pointer to a FfxFsr3ContextDescription structure. +/// +/// @retval +/// FFX_OK The operation completed successfully. +/// @retval +/// FFX_ERROR_CODE_NULL_POINTER The operation failed because either context or contextDescription was NULL. +/// @retval +/// FFX_ERROR_INCOMPLETE_INTERFACE The operation failed because the FfxFsr3ContextDescription.callbacks was not fully specified. +/// @retval +/// FFX_ERROR_BACKEND_API_ERROR The operation failed because of an error returned from the backend. +/// +/// @ingroup FRAMEINTERPOLATION +FFX_API FfxErrorCode ffxFrameInterpolationContextCreate(FfxFrameInterpolationContext* context, FfxFrameInterpolationContextDescription* contextDescription); + +FFX_API FfxErrorCode ffxFrameInterpolationContextGetGpuMemoryUsage(FfxFrameInterpolationContext* pContext, FfxEffectMemoryUsage* vramUsage); + +FFX_API FfxErrorCode ffxFrameInterpolationGetSharedResourceDescriptions(FfxFrameInterpolationContext* pContext, FfxFrameInterpolationSharedResourceDescriptions* SharedResources); + +FFX_API FfxErrorCode ffxSharedContextGetGpuMemoryUsage(FfxInterface* backendInterfaceShared, FfxEffectMemoryUsage* vramUsage); + +typedef struct FfxFrameInterpolationPrepareDescription +{ + uint32_t flags; ///< combination of FfxFrameInterpolationDispatchFlags + FfxCommandList commandList; ///< The FfxCommandList to record frame interpolation commands into. + FfxDimensions2D renderSize; ///< The dimensions used to render game content, dilatedDepth, dilatedMotionVectors are expected to be of ths size. + FfxFloatCoords2D jitterOffset; ///< The subpixel jitter offset applied to the camera. jitter; + FfxFloatCoords2D motionVectorScale; ///< The scale factor to apply to motion vectors. motionVectorScale; + + float frameTimeDelta; + float cameraNear; + float cameraFar; + float viewSpaceToMetersFactor; + float cameraFovAngleVertical; + + FfxResource depth; ///< The depth buffer data + FfxResource motionVectors; ///< The motion vector data + uint64_t frameID; + + FfxResource dilatedDepth; ///< The dilated depth buffer data + FfxResource dilatedMotionVectors; ///< The dilated motion vector data + FfxResource reconstructedPrevDepth; ///< The reconstructed depth buffer data + + FfxFloat32x3 cameraPosition; ///< The camera position in world space + FfxFloat32x3 cameraUp; ///< The camera up normalized vector in world space. + FfxFloat32x3 cameraRight; ///< The camera right normalized vector in world space. + FfxFloat32x3 cameraForward; ///< The camera forward normalized vector in world space. + +} FfxFrameInterpolationPrepareDescription; + +FFX_API FfxErrorCode ffxFrameInterpolationPrepare(FfxFrameInterpolationContext* context, const FfxFrameInterpolationPrepareDescription* params); + +typedef enum FfxFrameInterpolationDispatchFlags +{ + FFX_FRAMEINTERPOLATION_DISPATCH_DRAW_DEBUG_TEAR_LINES = (1 << 0), ///< A bit indicating that the debug tear lines will be drawn to the interpolated output. + FFX_FRAMEINTERPOLATION_DISPATCH_DRAW_DEBUG_RESET_INDICATORS = (1 << 1), ///< A bit indicating that the debug reset indicators will be drawn to the generated output. + FFX_FRAMEINTERPOLATION_DISPATCH_DRAW_DEBUG_VIEW = (1 << 2), ///< A bit indicating that the interpolated output resource will contain debug views with relevant information. + FFX_FRAMEINTERPOLATION_DISPATCH_DRAW_DEBUG_PACING_LINES = (1 << 3), ///< A bit indicating that the debug pacing lines will be drawn to the generated output. + FFX_FRAMEINTERPOLATION_DISPATCH_RESERVED_1 = (1 << 4), + FFX_FRAMEINTERPOLATION_DISPATCH_RESERVED_2 = (1 << 5), +} FfxFrameInterpolationDispatchFlags; + +typedef struct FfxFrameInterpolationDispatchDescription { + + uint32_t flags; ///< combination of FfxFrameInterpolationDispatchFlags + FfxCommandList commandList; ///< The FfxCommandList to record frame interpolation commands into. + FfxDimensions2D displaySize; ///< The destination output dimensions + FfxDimensions2D renderSize; ///< The dimensions used to render game content, dilatedDepth, dilatedMotionVectors are expected to be of ths size. + FfxResource currentBackBuffer; ///< The current presentation color, if currentBackBuffer_HUDLess is not used, this will be used as interpolation source data. + FfxResource currentBackBuffer_HUDLess; ///< The current presentation color without HUD content, when use it will be used as interpolation source data. + FfxResource output; ///< The output resource where to store the interpolated result. + + FfxRect2D interpolationRect; ///< The area of the backbuffer that should be used for interpolation in case only a part of the screen is used e.g. due to movie bars + + FfxResource opticalFlowVector; ///< The optical flow motion vectors (see example computation in the FfxOpticalFlow effect) + FfxResource opticalFlowSceneChangeDetection; ///< The optical flow scene change detection data + FfxDimensions2D opticalFlowBufferSize; ///< The optical flow motion vector resource dimensions + FfxFloatCoords2D opticalFlowScale; ///< The optical flow motion vector scale factor, used to scale resoure values into [0.0,1.0] range. + int opticalFlowBlockSize; ///< The optical flow block dimension size + + float cameraNear; ///< The distance to the near plane of the camera. + float cameraFar; ///< The distance to the far plane of the camera. This is used only used in case of non infinite depth. + float cameraFovAngleVertical; ///< The camera angle field of view in the vertical direction (expressed in radians). + float viewSpaceToMetersFactor; ///< The unit to scale view space coordinates to meters. + + float frameTimeDelta; ///< The time elapsed since the last frame (expressed in milliseconds). + bool reset; ///< A boolean value which when set to true, indicates the camera has moved discontinuously. + + FfxBackbufferTransferFunction backBufferTransferFunction; ///< The transfer function use to convert interpolation source color data to linear RGB. + float minMaxLuminance[2]; ///< Min and max luminance values, used when converting HDR colors to linear RGB + uint64_t frameID; ///< Identifier used to select internal resources when async support is enabled. Must increment by exactly one (1) for each frame. Any non-exactly-one difference will reset the frame generation logic. + + FfxResource dilatedDepth; ///< The dilated depth buffer data + FfxResource dilatedMotionVectors; ///< The dilated motion vector data + FfxResource reconstructedPrevDepth; ///< The reconstructed depth buffer data + + FfxResource distortionField; ///< A resource containing distortion offset data used when distortion post effects are enabled. +} FfxFrameInterpolationDispatchDescription; + +FFX_API FfxErrorCode ffxFrameInterpolationDispatch(FfxFrameInterpolationContext* context, const FfxFrameInterpolationDispatchDescription* params); + +/// Destroy the FidelityFX Super Resolution context. +/// +/// @param [out] context A pointer to a FfxFsr3Context structure to destroy. +/// +/// @retval +/// FFX_OK The operation completed successfully. +/// @retval +/// FFX_ERROR_CODE_NULL_POINTER The operation failed because either context was NULL. +/// +/// @ingroup FRAMEINTERPOLATION +FFX_API FfxErrorCode ffxFrameInterpolationContextDestroy(FfxFrameInterpolationContext* context); + +/// Queries the effect version number. +/// +/// @returns +/// The SDK version the effect was built with. +/// +/// @ingroup FRAMEINTERPOLATION +FFX_API FfxVersionNumber ffxFrameInterpolationGetEffectVersion(); + +/// Set global debug message settings +/// +/// @retval +/// FFX_OK The operation completed successfully. +/// +/// @ingroup FRAMEINTERPOLATION +FFX_API FfxErrorCode ffxFrameInterpolationSetGlobalDebugMessage(ffxMessageCallback fpMessage, uint32_t debugLevel); + +#if defined(__cplusplus) +} +#endif // #if defined(__cplusplus) diff --git a/thirdparty/amd-ffx/ffx_frameinterpolation_private.h b/thirdparty/amd-ffx/ffx_frameinterpolation_private.h new file mode 100644 index 000000000000..a338f7f1924f --- /dev/null +++ b/thirdparty/amd-ffx/ffx_frameinterpolation_private.h @@ -0,0 +1,147 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +#include "gpu/frameinterpolation/ffx_frameinterpolation_resources.h" + +/// An enumeration of all the permutations that can be passed to the FSR3 algorithm. +/// +/// FSR3 features are organized through a set of pre-defined compile +/// permutation options that need to be specified. Which shader blob +/// is returned for pipeline creation will be determined by what combination +/// of shader permutations are enabled. +/// +/// @ingroup FRAMEINTERPOLATION +typedef enum FrameInterpolationShaderPermutationOptions +{ + FRAMEINTERPOLATION_SHADER_PERMUTATION_LOW_RES_MOTION_VECTORS = (1 << 0), + FRAMEINTERPOLATION_SHADER_PERMUTATION_JITTER_MOTION_VECTORS = (1 << 1), + FRAMEINTERPOLATION_SHADER_PERMUTATION_DEPTH_INVERTED = (1 << 2), ///< Indicates input resources were generated with inverted depth + FRAMEINTERPOLATION_SHADER_PERMUTATION_FORCE_WAVE64 = (1 << 3), ///< doesn't map to a define, selects different table + FRAMEINTERPOLATION_SHADER_PERMUTATION_ALLOW_FP16 = (1 << 4), ///< Enables fast math computations where possible +} FrameInterpolationShaderPermutationOptions; + +typedef struct FrameInterpolationConstants +{ + int32_t renderSize[2]; + int32_t displaySize[2]; + + float displaySizeRcp[2]; + float cameraNear; + float cameraFar; + + int32_t upscalerTargetSize[2]; // how is that different from display size? + int Mode; + int Reset; + + float deviceToViewDepth[4]; + + float deltaTime; + int HUDLessAttachedFactor; + int32_t distortionFieldSize[2]; + + float opticalFlowScale[2]; + int32_t opticalFlowBlockSize; + uint32_t dispatchFlags; + + int32_t maxRenderSize[2]; + int opticalFlowHalfResMode; + int numInstances; + + int32_t interpolationRectBase[2]; + int32_t interpolationRectSize[2]; + + float debugBarColor[3]; + uint32_t backBufferTransferFunction; + + float minMaxLuminance[2]; + float fTanHalfFOV; + float _pad1; + + float jitter[2]; + float motionVectorScale[2]; +} FrameInterpolationConstants; + +typedef struct InpaintingPyramidConstants { + + uint32_t mips; + uint32_t numworkGroups; + uint32_t workGroupOffset[2]; +} InpaintingPyramidConstants; + +struct FfxDeviceCapabilities; +struct FfxPipelineState; +struct FfxResource; + +typedef struct FfxFrameInterpolationRenderDescription +{ + FfxDimensions2D renderSize; + FfxDimensions2D upscaleSize; + + float cameraNear; + float cameraFar; + float cameraFovAngleVertical; + float viewSpaceToMetersFactor; + + FfxFloatCoords2D motionVectorScale; ///< The scale factor to apply to motion vectors. +} FfxFrameInterpolationRenderDescription; + +// FfxFsr3Context_Private +// The private implementation of the FSR3 context. +typedef struct FfxFrameInterpolationContext_Private { + + FfxFrameInterpolationContextDescription contextDescription; + FfxUInt32 effectContextId; + FfxFrameInterpolationRenderDescription renderDescription; + FrameInterpolationConstants constants; + InpaintingPyramidConstants inpaintingPyramidContants; + FfxDevice device; + FfxDeviceCapabilities deviceCapabilities; + + // FrameInterpolation Pipelines + FfxPipelineState pipelineFiReconstructAndDilate; + FfxPipelineState pipelineFiSetup; + FfxPipelineState pipelineFiReconstructPreviousDepth; + FfxPipelineState pipelineFiGameMotionVectorField; + FfxPipelineState pipelineFiOpticalFlowVectorField; + FfxPipelineState pipelineFiDisocclusionMask; + FfxPipelineState pipelineFiScfi; + FfxPipelineState pipelineInpaintingPyramid; + FfxPipelineState pipelineInpainting; + FfxPipelineState pipelineGameVectorFieldInpaintingPyramid; + FfxPipelineState pipelineDebugView; + + FfxConstantBuffer constantBuffers[FFX_FRAMEINTERPOLATION_CONSTANTBUFFER_COUNT]; + + // 2 arrays of resources, as e.g. FFX_FSR3_RESOURCE_IDENTIFIER_LOCK_STATUS will use different resources when bound as SRV vs when bound as UAV + FfxResourceInternal srvResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_COUNT]; + FfxResourceInternal uavResources[FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_COUNT]; + + bool firstExecution; + bool refreshPipelineStates; + + bool asyncSupported; + uint64_t previousFrameID; + uint64_t dispatchCount; + +} FfxFrameInterpolationContext_Private; diff --git a/thirdparty/amd-ffx/ffx_fsr1.cpp b/thirdparty/amd-ffx/ffx_fsr1.cpp new file mode 100644 index 000000000000..e638b4814c1c --- /dev/null +++ b/thirdparty/amd-ffx/ffx_fsr1.cpp @@ -0,0 +1,515 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include // for memset +#include // for _countof +#include // for fabs, abs, sinf, sqrt, etc. + +#ifdef __clang__ +#pragma clang diagnostic ignored "-Wsign-compare" +#pragma clang diagnostic ignored "-Wunused-function" +#endif + +#ifdef _MSC_VER +#pragma warning(disable : 4505) +#endif + +#include "ffx_fsr1.h" +#include "gpu/ffx_core.h" +#include "gpu/fsr1/ffx_fsr1.h" +#include "ffx_object_management.h" + +#include "ffx_fsr1_private.h" + +// lists to map shader resource bindpoint name to resource identifier +typedef struct ResourceBinding +{ + uint32_t index; + wchar_t name[64]; +}ResourceBinding; + +static const ResourceBinding srvTextureBindingTable[] = +{ + {FFX_FSR1_RESOURCE_IDENTIFIER_INPUT_COLOR, L"r_input_color"}, + {FFX_FSR1_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR, L"r_internal_upscaled_color"}, + {FFX_FSR1_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT, L"r_upscaled_output" }, +}; + +static const ResourceBinding uavTextureBindingTable[] = +{ + {FFX_FSR1_RESOURCE_IDENTIFIER_INPUT_COLOR, L"rw_input_color"}, + {FFX_FSR1_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR, L"rw_internal_upscaled_color"}, + {FFX_FSR1_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT, L"rw_upscaled_output"}, +}; + +static const ResourceBinding cbResourceBindingTable[] = +{ + {FFX_FSR1_CONSTANTBUFFER_IDENTIFIER_FSR1, L"cbFSR1"}, +}; + +static FfxErrorCode patchResourceBindings(FfxPipelineState* inoutPipeline) +{ + for (uint32_t srvIndex = 0; srvIndex < inoutPipeline->srvTextureCount; ++srvIndex) + { + int32_t mapIndex = 0; + for (mapIndex = 0; mapIndex < _countof(srvTextureBindingTable); ++mapIndex) + { + if (0 == wcscmp(srvTextureBindingTable[mapIndex].name, inoutPipeline->srvTextureBindings[srvIndex].name)) + break; + } + if (mapIndex == _countof(srvTextureBindingTable)) + return FFX_ERROR_INVALID_ARGUMENT; + + inoutPipeline->srvTextureBindings[srvIndex].resourceIdentifier = srvTextureBindingTable[mapIndex].index; + } + + for (uint32_t uavIndex = 0; uavIndex < inoutPipeline->uavTextureCount; ++uavIndex) + { + int32_t mapIndex = 0; + for (mapIndex = 0; mapIndex < _countof(uavTextureBindingTable); ++mapIndex) + { + if (0 == wcscmp(uavTextureBindingTable[mapIndex].name, inoutPipeline->uavTextureBindings[uavIndex].name)) + break; + } + if (mapIndex == _countof(uavTextureBindingTable)) + return FFX_ERROR_INVALID_ARGUMENT; + + inoutPipeline->uavTextureBindings[uavIndex].resourceIdentifier = uavTextureBindingTable[mapIndex].index; + } + + for (uint32_t cbIndex = 0; cbIndex < inoutPipeline->constCount; ++cbIndex) + { + int32_t mapIndex = 0; + for (mapIndex = 0; mapIndex < _countof(cbResourceBindingTable); ++mapIndex) + { + if (0 == wcscmp(cbResourceBindingTable[mapIndex].name, inoutPipeline->constantBufferBindings[cbIndex].name)) + break; + } + if (mapIndex == _countof(cbResourceBindingTable)) + return FFX_ERROR_INVALID_ARGUMENT; + + inoutPipeline->constantBufferBindings[cbIndex].resourceIdentifier = cbResourceBindingTable[mapIndex].index; + } + + return FFX_OK; +} + +static uint32_t getPipelinePermutationFlags(uint32_t contextFlags, FfxFsr1Pass passId, bool fp16, bool force64) +{ + // work out what permutation to load. + uint32_t flags = 0; + flags |= (contextFlags & FFX_FSR1_RCAS_PASSTHROUGH_ALPHA) ? FSR1_SHADER_PERMUTATION_RCAS_PASSTHROUGH_ALPHA : 0; + flags |= (contextFlags & FFX_FSR1_ENABLE_SRGB_CONVERSIONS) ? FSR1_SHADER_PERMUTATION_SRGB_CONVERSIONS : 0; + flags |= (passId != FFX_FSR1_PASS_EASU) ? FSR1_SHADER_PERMUTATION_APPLY_RCAS : 0; + flags |= (force64) ? FSR1_SHADER_PERMUTATION_FORCE_WAVE64 : 0; +#if defined(_GAMING_XBOX_SCARLETT) + // Never got reports about NaNs on Xbox + flags |= (fp16) ? FSR1_SHADER_PERMUTATION_ALLOW_FP16 : 0; +#else + // Some NaNs have been observed on other hardware during Rcas with FP16 + flags |= (fp16 && (passId != FFX_FSR1_PASS_RCAS)) ? FSR1_SHADER_PERMUTATION_ALLOW_FP16 : 0; +#endif + return flags; +} + +static FfxErrorCode createPipelineStates(FfxFsr1Context_Private* context) +{ + FFX_ASSERT(context); + + FfxPipelineDescription pipelineDescription = {}; + pipelineDescription.contextFlags = context->contextDescription.flags; + + // Samplers + pipelineDescription.samplerCount = 1; + FfxSamplerDescription samplerDesc = { FFX_FILTER_TYPE_MINMAGMIP_LINEAR, FFX_ADDRESS_MODE_CLAMP, FFX_ADDRESS_MODE_CLAMP, FFX_ADDRESS_MODE_CLAMP, FFX_BIND_COMPUTE_SHADER_STAGE }; + pipelineDescription.samplers = &samplerDesc; + + // Root constants + pipelineDescription.rootConstantBufferCount = 1; + FfxRootConstantDescription rootConstantDesc = { sizeof(Fsr1Constants) / sizeof(uint32_t), FFX_BIND_COMPUTE_SHADER_STAGE }; + pipelineDescription.rootConstants = &rootConstantDesc; + + // Query device capabilities + FfxDeviceCapabilities capabilities; + context->contextDescription.backendInterface.fpGetDeviceCapabilities(&context->contextDescription.backendInterface, &capabilities); + + // Setup a few options used to determine permutation flags + bool haveShaderModel66 = capabilities.maximumSupportedShaderModel >= FFX_SHADER_MODEL_6_6; + bool supportedFP16 = capabilities.fp16Supported; + bool canForceWave64 = false; + + const uint32_t waveLaneCountMin = capabilities.waveLaneCountMin; + const uint32_t waveLaneCountMax = capabilities.waveLaneCountMax; + if (waveLaneCountMin <= 64 && waveLaneCountMax >= 64) + canForceWave64 = haveShaderModel66; + else + canForceWave64 = false; + + // Work out what permutation to load. + uint32_t contextFlags = context->contextDescription.flags; + + // Set up pipeline descriptors (basically RootSignature and binding) + wcscpy_s(pipelineDescription.name, L"FSR1-EASU"); + FFX_VALIDATE(context->contextDescription.backendInterface.fpCreatePipeline(&context->contextDescription.backendInterface, FFX_EFFECT_FSR1, FFX_FSR1_PASS_EASU, + getPipelinePermutationFlags(contextFlags, FFX_FSR1_PASS_EASU, supportedFP16, canForceWave64), + &pipelineDescription, context->effectContextId, &context->pipelineEASU)); + wcscpy_s(pipelineDescription.name, L"FSR1-EASU_RCAS"); + FFX_VALIDATE(context->contextDescription.backendInterface.fpCreatePipeline(&context->contextDescription.backendInterface, FFX_EFFECT_FSR1, FFX_FSR1_PASS_EASU_RCAS, + getPipelinePermutationFlags(contextFlags, FFX_FSR1_PASS_EASU_RCAS, supportedFP16, canForceWave64), + &pipelineDescription, context->effectContextId, &context->pipelineEASU_RCAS)); + wcscpy_s(pipelineDescription.name, L"FSR1-RCAS"); + FFX_VALIDATE(context->contextDescription.backendInterface.fpCreatePipeline(&context->contextDescription.backendInterface, FFX_EFFECT_FSR1, FFX_FSR1_PASS_RCAS, + getPipelinePermutationFlags(contextFlags, FFX_FSR1_PASS_RCAS, supportedFP16, canForceWave64), + &pipelineDescription, context->effectContextId, &context->pipelineRCAS)); + + // For each pipeline: re-route/fix-up IDs based on names + patchResourceBindings(&context->pipelineEASU); + patchResourceBindings(&context->pipelineEASU_RCAS); + patchResourceBindings(&context->pipelineRCAS); + + return FFX_OK; +} + +static void scheduleDispatch(FfxFsr1Context_Private* context, const FfxFsr1DispatchDescription*, const FfxPipelineState* pipeline, uint32_t dispatchX, uint32_t dispatchY) +{ + FfxGpuJobDescription dispatchJob = {FFX_GPU_JOB_COMPUTE}; + wcscpy_s(dispatchJob.jobLabel, pipeline->name); + + for (uint32_t currentShaderResourceViewIndex = 0; currentShaderResourceViewIndex < pipeline->srvTextureCount; ++currentShaderResourceViewIndex) { + + const uint32_t currentResourceId = pipeline->srvTextureBindings[currentShaderResourceViewIndex].resourceIdentifier; + const FfxResourceInternal currentResource = context->srvResources[currentResourceId]; + dispatchJob.computeJobDescriptor.srvTextures[currentShaderResourceViewIndex].resource = currentResource; +#ifdef FFX_DEBUG + wcscpy_s(dispatchJob.computeJobDescriptor.srvTextures[currentShaderResourceViewIndex].name, + pipeline->srvTextureBindings[currentShaderResourceViewIndex].name); +#endif + } + + for (uint32_t currentUnorderedAccessViewIndex = 0; currentUnorderedAccessViewIndex < pipeline->uavTextureCount; ++currentUnorderedAccessViewIndex) { + + const uint32_t currentResourceId = pipeline->uavTextureBindings[currentUnorderedAccessViewIndex].resourceIdentifier; +#ifdef FFX_DEBUG + wcscpy_s(dispatchJob.computeJobDescriptor.uavTextures[currentUnorderedAccessViewIndex].name, + pipeline->uavTextureBindings[currentUnorderedAccessViewIndex].name); +#endif + const FfxResourceInternal currentResource = context->uavResources[currentResourceId]; + dispatchJob.computeJobDescriptor.uavTextures[currentUnorderedAccessViewIndex].resource = currentResource; + dispatchJob.computeJobDescriptor.uavTextures[currentUnorderedAccessViewIndex].mip = 0; + } + + dispatchJob.computeJobDescriptor.dimensions[0] = dispatchX; + dispatchJob.computeJobDescriptor.dimensions[1] = dispatchY; + dispatchJob.computeJobDescriptor.dimensions[2] = 1; + dispatchJob.computeJobDescriptor.pipeline = *pipeline; + +#ifdef FFX_DEBUG + wcscpy_s(dispatchJob.computeJobDescriptor.cbNames[0], pipeline->constantBufferBindings[0].name); +#endif + dispatchJob.computeJobDescriptor.cbs[0] = context->constantBuffer; + + + context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &dispatchJob); +} + +static FfxErrorCode fsr1Dispatch(FfxFsr1Context_Private* context, const FfxFsr1DispatchDescription* params) +{ + // take a short cut to the command list + FfxCommandList commandList = params->commandList; + + // Register resources for frame + context->contextDescription.backendInterface.fpRegisterResource(&context->contextDescription.backendInterface, ¶ms->color, context->effectContextId, &context->srvResources[FFX_FSR1_RESOURCE_IDENTIFIER_INPUT_COLOR]); + context->contextDescription.backendInterface.fpRegisterResource(&context->contextDescription.backendInterface, ¶ms->output, context->effectContextId, &context->uavResources[FFX_FSR1_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT]); + + // This value is the image region dimension that each thread group of the FSR shader operates on + static const int threadGroupWorkRegionDim = 16; + int dispatchX = FFX_DIVIDE_ROUNDING_UP(context->contextDescription.displaySize.width, threadGroupWorkRegionDim); + int dispatchY = FFX_DIVIDE_ROUNDING_UP(context->contextDescription.displaySize.height, threadGroupWorkRegionDim); + + const bool doSharpen = params->enableSharpening && (context->contextDescription.flags & FFX_FSR1_ENABLE_RCAS); + + // Easu constants + Fsr1Constants easuConst = {}; + ffxFsrPopulateEasuConstants(reinterpret_cast(&easuConst.const0), + reinterpret_cast(&easuConst.const1), + reinterpret_cast(&easuConst.const2), + reinterpret_cast(&easuConst.const3), + static_cast(params->renderSize.width), static_cast(params->renderSize.height), + static_cast(params->color.description.width), static_cast(params->color.description.height), + static_cast(context->contextDescription.displaySize.width), + static_cast(context->contextDescription.displaySize.height)); + easuConst.sample[0] = context->contextDescription.flags & FFX_FSR1_ENABLE_HIGH_DYNAMIC_RANGE; + context->contextDescription.backendInterface.fpStageConstantBufferDataFunc( + &context->contextDescription.backendInterface, + &easuConst, + sizeof(Fsr1Constants), + &context->constantBuffer); + scheduleDispatch(context, params, doSharpen ? &context->pipelineEASU_RCAS : &context->pipelineEASU, dispatchX, dispatchY); + + if (doSharpen) + { + // Rcas constants + Fsr1Constants rcasConst = {}; + const float sharpenessRemapped = (-2.0f * params->sharpness) + 2.0f; + FsrRcasCon(reinterpret_cast(&rcasConst.const0), sharpenessRemapped); + rcasConst.sample[0] = context->contextDescription.flags & FFX_FSR1_ENABLE_HIGH_DYNAMIC_RANGE; + context->contextDescription.backendInterface.fpStageConstantBufferDataFunc( + &context->contextDescription.backendInterface, + &rcasConst, + sizeof(Fsr1Constants), + &context->constantBuffer); + scheduleDispatch(context, params, &context->pipelineRCAS, dispatchX, dispatchY); + } + + // Execute all the work for the frame + context->contextDescription.backendInterface.fpExecuteGpuJobs(&context->contextDescription.backendInterface, commandList, context->effectContextId); + + // Release dynamic resources + context->contextDescription.backendInterface.fpUnregisterResources(&context->contextDescription.backendInterface, commandList, context->effectContextId); + + return FFX_OK; +} + +static FfxErrorCode fsr1Create(FfxFsr1Context_Private* context, const FfxFsr1ContextDescription* contextDescription) +{ + FFX_ASSERT(context); + FFX_ASSERT(contextDescription); + + // Setup the data for implementation. + memset(context, 0, sizeof(FfxFsr1Context_Private)); + context->device = contextDescription->backendInterface.device; + + memcpy(&context->contextDescription, contextDescription, sizeof(FfxFsr1ContextDescription)); + + // Check version info - make sure we are linked with the right backend version + FfxVersionNumber version = context->contextDescription.backendInterface.fpGetSDKVersion(&context->contextDescription.backendInterface); + FFX_RETURN_ON_ERROR(version == FFX_SDK_MAKE_VERSION(1, 1, 4), FFX_ERROR_INVALID_VERSION); + + // Setup constant buffer sizes. + context->constantBuffer.num32BitEntries = sizeof(Fsr1Constants) / sizeof(uint32_t); + + // Create the context. + FfxErrorCode errorCode = + context->contextDescription.backendInterface.fpCreateBackendContext(&context->contextDescription.backendInterface, FFX_EFFECT_FSR1, nullptr, &context->effectContextId); + FFX_RETURN_ON_ERROR(errorCode == FFX_OK, errorCode); + + // Call out for device caps. + errorCode = context->contextDescription.backendInterface.fpGetDeviceCapabilities(&context->contextDescription.backendInterface, &context->deviceCapabilities); + FFX_RETURN_ON_ERROR(errorCode == FFX_OK, errorCode); + + // Create the intermediate upscale resource if RCAS is enabled + const FfxInternalResourceDescription internalSurfaceDesc = {FFX_FSR1_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR, + L"FSR1_InternalUpscaledColor", + FFX_RESOURCE_TYPE_TEXTURE2D, + FFX_RESOURCE_USAGE_UAV, + contextDescription->outputFormat, + contextDescription->displaySize.width, + contextDescription->displaySize.height, + 1, + FFX_RESOURCE_FLAGS_ALIASABLE, + {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED}}; + + // Clear the SRV resources to NULL. + memset(context->srvResources, 0, sizeof(context->srvResources)); + + if (contextDescription->flags & FFX_FSR1_ENABLE_RCAS) + { + const FfxResourceDescription resourceDescription = {FFX_RESOURCE_TYPE_TEXTURE2D, + internalSurfaceDesc.format, + internalSurfaceDesc.width, + internalSurfaceDesc.height, + 1, + internalSurfaceDesc.mipCount, + internalSurfaceDesc.flags, + internalSurfaceDesc.usage}; + + const FfxCreateResourceDescription createResourceDescription = {FFX_HEAP_TYPE_DEFAULT, + resourceDescription, + FFX_RESOURCE_STATE_UNORDERED_ACCESS, + internalSurfaceDesc.name, + internalSurfaceDesc.id, + internalSurfaceDesc.initData}; + + FFX_VALIDATE(context->contextDescription.backendInterface.fpCreateResource(&context->contextDescription.backendInterface, &createResourceDescription, context->effectContextId, &context->srvResources[internalSurfaceDesc.id])); + } + + // And copy resources to uavResrouces list + memcpy(context->uavResources, context->srvResources, sizeof(context->srvResources)); + + // Create shaders on initialize. + errorCode = createPipelineStates(context); + FFX_RETURN_ON_ERROR(errorCode == FFX_OK, errorCode); + + return FFX_OK; +} + +static FfxErrorCode fsr1Release(FfxFsr1Context_Private* context) +{ + FFX_ASSERT(context); + + // Release all pipelines + ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineEASU, context->effectContextId); + ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineEASU_RCAS, context->effectContextId); + ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineRCAS, context->effectContextId); + + // Unregister resources not created internally + context->srvResources[FFX_FSR1_RESOURCE_IDENTIFIER_INPUT_COLOR] = { FFX_FSR1_RESOURCE_IDENTIFIER_NULL }; + context->srvResources[FFX_FSR1_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT] = { FFX_FSR1_RESOURCE_IDENTIFIER_NULL }; + + // Release internal resource + ffxSafeReleaseResource(&context->contextDescription.backendInterface, context->srvResources[FFX_FSR1_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR], context->effectContextId); + + // Destroy the context + context->contextDescription.backendInterface.fpDestroyBackendContext(&context->contextDescription.backendInterface, context->effectContextId); + + return FFX_OK; +} + +FfxErrorCode ffxFsr1ContextCreate(FfxFsr1Context* context, const FfxFsr1ContextDescription* contextDescription) +{ + // Zero context memory + memset(context, 0, sizeof(FfxFsr1Context)); + + // Check pointers are valid. + FFX_RETURN_ON_ERROR( + context, + FFX_ERROR_INVALID_POINTER); + FFX_RETURN_ON_ERROR( + contextDescription, + FFX_ERROR_INVALID_POINTER); + + // Validate that all callbacks are set for the interface + FFX_RETURN_ON_ERROR(contextDescription->backendInterface.fpGetSDKVersion, FFX_ERROR_INCOMPLETE_INTERFACE); + FFX_RETURN_ON_ERROR(contextDescription->backendInterface.fpGetDeviceCapabilities, FFX_ERROR_INCOMPLETE_INTERFACE); + FFX_RETURN_ON_ERROR(contextDescription->backendInterface.fpCreateBackendContext, FFX_ERROR_INCOMPLETE_INTERFACE); + FFX_RETURN_ON_ERROR(contextDescription->backendInterface.fpDestroyBackendContext, FFX_ERROR_INCOMPLETE_INTERFACE); + + // If a scratch buffer is declared, then we must have a size + if (contextDescription->backendInterface.scratchBuffer) { + + FFX_RETURN_ON_ERROR(contextDescription->backendInterface.scratchBufferSize, FFX_ERROR_INCOMPLETE_INTERFACE); + } + + // Ensure the context is large enough for the internal context. + FFX_STATIC_ASSERT(sizeof(FfxFsr1Context) >= sizeof(FfxFsr1Context_Private)); + + // create the context. + FfxFsr1Context_Private* contextPrivate = (FfxFsr1Context_Private*)(context); + const FfxErrorCode errorCode = fsr1Create(contextPrivate, contextDescription); + + return errorCode; +} + +FFX_API FfxErrorCode ffxFsr1ContextGetGpuMemoryUsage(FfxFsr1Context* context, FfxEffectMemoryUsage* vramUsage) +{ + FFX_RETURN_ON_ERROR(context, FFX_ERROR_INVALID_POINTER); + FFX_RETURN_ON_ERROR(vramUsage, FFX_ERROR_INVALID_POINTER); + FfxFsr1Context_Private* contextPrivate = (FfxFsr1Context_Private*)(context); + + FFX_RETURN_ON_ERROR(contextPrivate->device, FFX_ERROR_NULL_DEVICE); + + FfxErrorCode errorCode = contextPrivate->contextDescription.backendInterface.fpGetEffectGpuMemoryUsage( + &contextPrivate->contextDescription.backendInterface, contextPrivate->effectContextId, vramUsage); + FFX_RETURN_ON_ERROR(errorCode == FFX_OK, errorCode); + + return FFX_OK; +} + +FfxErrorCode ffxFsr1ContextDestroy(FfxFsr1Context* context) +{ + FFX_RETURN_ON_ERROR( + context, + FFX_ERROR_INVALID_POINTER); + + // Destroy the context. + FfxFsr1Context_Private* contextPrivate = (FfxFsr1Context_Private*)(context); + const FfxErrorCode errorCode = fsr1Release(contextPrivate); + return errorCode; +} + +FfxErrorCode ffxFsr1ContextDispatch(FfxFsr1Context* context, const FfxFsr1DispatchDescription* dispatchDescription) +{ + // check pointers are valid + FFX_RETURN_ON_ERROR(context, FFX_ERROR_INVALID_POINTER); + FFX_RETURN_ON_ERROR(dispatchDescription, FFX_ERROR_INVALID_POINTER); + + FfxFsr1Context_Private* contextPrivate = (FfxFsr1Context_Private*)(context); + + // validate that renderSize is within the maximum. + FFX_RETURN_ON_ERROR( + dispatchDescription->renderSize.width <= contextPrivate->contextDescription.maxRenderSize.width, + FFX_ERROR_OUT_OF_RANGE); + FFX_RETURN_ON_ERROR( + dispatchDescription->renderSize.height <= contextPrivate->contextDescription.maxRenderSize.height, + FFX_ERROR_OUT_OF_RANGE); + FFX_RETURN_ON_ERROR( + contextPrivate->device, + FFX_ERROR_NULL_DEVICE); + + // dispatch the FSR2 passes. + const FfxErrorCode errorCode = fsr1Dispatch(contextPrivate, dispatchDescription); + return errorCode; +} + +float ffxFsr1GetUpscaleRatioFromQualityMode(FfxFsr1QualityMode qualityMode) +{ + switch (qualityMode) { + case FFX_FSR1_QUALITY_MODE_ULTRA_QUALITY: + return 1.3f; + case FFX_FSR1_QUALITY_MODE_QUALITY: + return 1.5f; + case FFX_FSR1_QUALITY_MODE_BALANCED: + return 1.7f; + case FFX_FSR1_QUALITY_MODE_PERFORMANCE: + return 2.0f; + default: + return 0.0f; + } +} + +FfxErrorCode ffxFsr1GetRenderResolutionFromQualityMode( + uint32_t* renderWidth, + uint32_t* renderHeight, + uint32_t displayWidth, + uint32_t displayHeight, + FfxFsr1QualityMode qualityMode) +{ + FFX_RETURN_ON_ERROR(renderWidth, FFX_ERROR_INVALID_POINTER); + FFX_RETURN_ON_ERROR(renderHeight, FFX_ERROR_INVALID_POINTER); + FFX_RETURN_ON_ERROR( + FFX_FSR1_QUALITY_MODE_ULTRA_QUALITY <= qualityMode && qualityMode <= FFX_FSR1_QUALITY_MODE_PERFORMANCE, + FFX_ERROR_INVALID_ENUM); + + // scale by the predefined ratios in each dimension. + const float ratio = ffxFsr1GetUpscaleRatioFromQualityMode(qualityMode); + const uint32_t scaledDisplayWidth = (uint32_t)((float)displayWidth / ratio); + const uint32_t scaledDisplayHeight = (uint32_t)((float)displayHeight / ratio); + *renderWidth = scaledDisplayWidth; + *renderHeight = scaledDisplayHeight; + + return FFX_OK; +} + +FFX_API FfxVersionNumber ffxFsr1GetEffectVersion() +{ + return FFX_SDK_MAKE_VERSION(FFX_FSR1_VERSION_MAJOR, FFX_FSR1_VERSION_MINOR, FFX_FSR1_VERSION_PATCH); +} diff --git a/thirdparty/amd-ffx/ffx_fsr1.h b/thirdparty/amd-ffx/ffx_fsr1.h new file mode 100644 index 000000000000..5201884ac9b7 --- /dev/null +++ b/thirdparty/amd-ffx/ffx_fsr1.h @@ -0,0 +1,302 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +/// @defgroup ffxFsr1 FidelityFX FSR1 +/// FidelityFX Super Resolution 1 runtime library +/// +/// @ingroup SDKComponents + +#pragma once + +/// Include the interface for the backend of the FSR 1.0 API. +/// +/// @ingroup ffxFsr1 +#include "ffx_interface.h" + +/// FidelityFX Super Resolution 1.0 major version. +/// +/// @ingroup ffxFsr1 +#define FFX_FSR1_VERSION_MAJOR (1) + +/// FidelityFX Super Resolution 1.0 minor version. +/// +/// @ingroup ffxFsr1 +#define FFX_FSR1_VERSION_MINOR (2) + +/// FidelityFX Super Resolution 1.0 patch version. +/// +/// @ingroup ffxFsr1 +#define FFX_FSR1_VERSION_PATCH (0) + +/// FidelityFX Super Resolution 1.0 context count +/// +/// Defines the number of internal effect contexts required by FSR1 +/// +/// @ingroup ffxFsr1 +#define FFX_FSR1_CONTEXT_COUNT 2 + +/// The size of the context specified in 32bit values. +/// +/// @ingroup ffxFsr1 + +// GODOT BEGINS +// On non-Windows platforms `wchar_t` is 32 bytes rather than 16 bytes, +// So we have to increase the size of the context by 2x. +#define FFX_FSR1_CONTEXT_SIZE (52408) +// GODOT ENDS + +#if defined(__cplusplus) +extern "C" { +#endif // #if defined(__cplusplus) + +/// An enumeration of all the passes which constitute the FSR1 algorithm. +/// +/// FSR1 is implemented as a composite of several compute passes each +/// computing a key part of the final result. Each call to the +/// FfxFsr1ScheduleGpuJobFunc callback function will +/// correspond to a single pass included in FfxFsr1Pass. For a +/// more comprehensive description of each pass, please refer to the FSR1 +/// reference documentation. +/// +/// @ingroup ffxFsr1 +typedef enum FfxFsr1Pass +{ + FFX_FSR1_PASS_EASU = 0, ///< A pass which upscales the color buffer using easu. + FFX_FSR1_PASS_EASU_RCAS = 1, ///< A pass which upscales the color buffer in preparation for rcas + FFX_FSR1_PASS_RCAS = 2, ///< A pass which performs rcas sharpening on the upscaled image. + + FFX_FSR1_PASS_COUNT ///< The number of passes performed by FSR2. +} FfxFsr1Pass; + +/// An enumeration of all the quality modes supported by FidelityFX Super +/// Resolution 1 upscaling. +/// +/// In order to provide a consistent user experience across multiple +/// applications which implement FSR1. It is strongly recommended that the +/// following preset scaling factors are made available through your +/// application's user interface. +/// +/// If your application does not expose the notion of preset scaling factors +/// for upscaling algorithms (perhaps instead implementing a fixed ratio which +/// is immutable) or implementing a more dynamic scaling scheme (such as +/// dynamic resolution scaling), then there is no need to use these presets. +/// +/// @ingroup ffxFsr1 +typedef enum FfxFsr1QualityMode { + + FFX_FSR1_QUALITY_MODE_ULTRA_QUALITY = 0, ///< Perform upscaling with a per-dimension upscaling ratio of 1.3x. + FFX_FSR1_QUALITY_MODE_QUALITY = 1, ///< Perform upscaling with a per-dimension upscaling ratio of 1.5x. + FFX_FSR1_QUALITY_MODE_BALANCED = 2, ///< Perform upscaling with a per-dimension upscaling ratio of 1.7x. + FFX_FSR1_QUALITY_MODE_PERFORMANCE = 3 ///< Perform upscaling with a per-dimension upscaling ratio of 2.0x. +} FfxFsr1QualityMode; + +/// An enumeration of bit flags used when creating a +/// FfxFsr1Context. See FfxFsr1ContextDescription. +/// +/// @ingroup ffxFsr1 +typedef enum FfxFsr1InitializationFlagBits { + + FFX_FSR1_ENABLE_RCAS = (1 << 0), ///< A bit indicating if we should use rcas. + FFX_FSR1_RCAS_PASSTHROUGH_ALPHA = (1 << 1), ///< A bit indicating if we should use passthrough alpha during rcas. + FFX_FSR1_RCAS_DENOISE = (1 << 2), ///< A bit indicating if denoising is invoked during rcas. + FFX_FSR1_ENABLE_HIGH_DYNAMIC_RANGE = (1 << 3), ///< A bit indicating if the input color data provided is using a high-dynamic range. + FFX_FSR1_ENABLE_SRGB_CONVERSIONS = (1 << 4), ///< A bit indicating that input/output resources require gamma conversions + +} FfxFsr1InitializationFlagBits; + +/// A structure encapsulating the parameters required to initialize FidelityFX +/// Super Resolution 1.0 +/// +/// @ingroup ffxFsr1 +typedef struct FfxFsr1ContextDescription { + + uint32_t flags; ///< A collection of FfxFsr1InitializationFlagBits. + FfxSurfaceFormat outputFormat; ///< Format of the output target used for creation of the internal upscale resource + FfxDimensions2D maxRenderSize; ///< The maximum size that rendering will be performed at. + FfxDimensions2D displaySize; ///< The size of the presentation resolution targeted by the upscaling process. + FfxInterface backendInterface; ///< A set of pointers to the backend implementation for FSR1. +} FfxFsr1ContextDescription; + +/// A structure encapsulating the parameters for dispatching the various passes +/// of FidelityFX Super Resolution 1.0 +/// +/// @ingroup ffxFsr1 +typedef struct FfxFsr1DispatchDescription { + + FfxCommandList commandList; ///< The FfxCommandList to record FSR1 rendering commands into. + FfxResource color; ///< A FfxResource containing the color buffer for the current frame (at render resolution). + FfxResource output; ///< A FfxResource containing the output color buffer for the current frame (at presentation resolution). + FfxDimensions2D renderSize; ///< The resolution that was used for rendering the input resource. + bool enableSharpening; ///< Enable an additional sharpening pass. + float sharpness; ///< The sharpness value between 0 and 1, where 0 is no additional sharpness and 1 is maximum additional sharpness. +} FfxFsr1DispatchDescription; + +/// A structure encapsulating the FidelityFX Super Resolution 1.0 context. +/// +/// This sets up an object which contains all persistent internal data and +/// resources that are required by FSR1. +/// +/// The FfxFsr1Context object should have a lifetime matching +/// your use of FSR1. Before destroying the FSR1 context care should be taken +/// to ensure the GPU is not accessing the resources created or used by FSR1. +/// It is therefore recommended that the GPU is idle before destroying the +/// FSR1 context. +/// +/// @ingroup ffxFsr1 +typedef struct FfxFsr1Context { + + uint32_t data[FFX_FSR1_CONTEXT_SIZE]; ///< An opaque set of uint32_t which contain the data for the context. +} FfxFsr1Context; + + +/// Create a FidelityFX Super Resolution 1.0 context from the parameters +/// programmed to the FfxFsr1ContextDescription structure. +/// +/// The context structure is the main object used to interact with the Super +/// Resoution 1.0 API, and is responsible for the management of the internal resources +/// used by the FSR1 algorithm. When this API is called, multiple calls +/// will be made via the pointers contained in the callbacks +/// structure. These callbacks will attempt to retreive the device capabilities, +/// and create the internal resources, and pipelines required by FSR1 +/// frame-to-frame function. Depending on the precise configuration used when +/// creating the FfxFsr1Context a different set of resources and +/// pipelines might be requested via the callback functions. +/// +/// The FfxParallelSortContext should be destroyed when use of it is +/// completed, typically when an application is unloaded or FSR1 +/// upscaling is disabled by a user. To destroy the FSR1 context you +/// should call ffxFsr1ContextDestroy. +/// +/// @param [out] pContext A pointer to a FfxFsr1Context structure to populate. +/// @param [in] pContextDescription A pointer to a FfxFsr1ContextDescription structure. +/// +/// @retval +/// FFX_OK The operation completed successfully. +/// @retval +/// FFX_ERROR_CODE_NULL_POINTER The operation failed because either context or contextDescription was NULL. +/// @retval +/// FFX_ERROR_INCOMPLETE_INTERFACE The operation failed because the FfxFsr1ContextDescription.callbacks was not fully specified. +/// @retval +/// FFX_ERROR_BACKEND_API_ERROR The operation failed because of an error returned from the backend. +/// +/// @ingroup ffxFsr1 +FFX_API FfxErrorCode ffxFsr1ContextCreate(FfxFsr1Context* pContext, const FfxFsr1ContextDescription* pContextDescription); + +/// Get GPU memory usage of the FidelityFX Super Resolution context. +/// +/// @param [in] pContext A pointer to a FfxFsr1Context structure. +/// @param [out] pVramUsage A pointer to a FfxEffectMemoryUsage structure. +/// +/// @retval +/// FFX_OK The operation completed successfully. +/// @retval +/// FFX_ERROR_CODE_NULL_POINTER The operation failed because either context or vramUsage were NULL. +/// +/// @ingroup ffxFsr1 +FFX_API FfxErrorCode ffxFsr1ContextGetGpuMemoryUsage(FfxFsr1Context* pContext, FfxEffectMemoryUsage* pVramUsage); + +/// @param [out] pContext A pointer to a FfxFsr1Context structure to populate. +/// @param [in] pDispatchDescription A pointer to a FfxFsr1DispatchDescription structure. +/// +/// @retval +/// FFX_OK The operation completed successfully. +/// @retval +/// FFX_ERROR_CODE_NULL_POINTER The operation failed because either context or dispatchDescription was NULL. +/// @retval +/// FFX_ERROR_BACKEND_API_ERROR The operation failed because of an error returned from the backend. +/// +/// @ingroup ffxFsr1 +FFX_API FfxErrorCode ffxFsr1ContextDispatch(FfxFsr1Context* pContext, const FfxFsr1DispatchDescription* pDispatchDescription); + +/// Destroy the FidelityFX FSR 1 context. +/// +/// @param [out] pContext A pointer to a FfxFsr1Context structure to destroy. +/// +/// @retval +/// FFX_OK The operation completed successfully. +/// @retval +/// FFX_ERROR_CODE_NULL_POINTER The operation failed because either context was NULL. +/// +/// @ingroup ffxFsr1 +FFX_API FfxErrorCode ffxFsr1ContextDestroy(FfxFsr1Context* pContext); + +/// Get the upscale ratio from the quality mode. +/// +/// The following table enumerates the mapping of the quality modes to +/// per-dimension scaling ratios. +/// +/// Quality preset | Scale factor +/// ----------------------------------------------------- | ------------- +/// FFX_FSR1_QUALITY_MODE_ULTRA_QUALITY | 1.3x +/// FFX_FSR1_QUALITY_MODE_QUALITY | 1.5x +/// FFX_FSR1_QUALITY_MODE_BALANCED | 1.7x +/// FFX_FSR1_QUALITY_MODE_PERFORMANCE | 2.0x +/// +/// Passing an invalid qualityMode will return 0.0f. +/// +/// @param [in] qualityMode The quality mode preset. +/// +/// @returns +/// The upscaling the per-dimension upscaling ratio for +/// qualityMode according to the table above. +/// +/// @ingroup ffxFsr1 +FFX_API float ffxFsr1GetUpscaleRatioFromQualityMode(FfxFsr1QualityMode qualityMode); + +/// A helper function to calculate the rendering resolution from a target +/// resolution and desired quality level. +/// +/// This function applies the scaling factor returned by +/// ffxFsr1GetUpscaleRatioFromQualityMode to each dimension. +/// +/// @param [out] pRenderWidth A pointer to a uint32_t which will hold the calculated render resolution width. +/// @param [out] pRenderHeight A pointer to a uint32_t which will hold the calculated render resolution height. +/// @param [in] displayWidth The target display resolution width. +/// @param [in] displayHeight The target display resolution height. +/// @param [in] qualityMode The desired quality mode for FSR1 upscaling. +/// +/// @retval +/// FFX_OK The operation completed successfully. +/// @retval +/// FFX_ERROR_INVALID_POINTER Either renderWidth or renderHeight was NULL. +/// @retval +/// FFX_ERROR_INVALID_ENUM An invalid quality mode was specified. +/// +/// @ingroup ffxFsr1 +FFX_API FfxErrorCode ffxFsr1GetRenderResolutionFromQualityMode( + uint32_t* pRenderWidth, + uint32_t* pRenderHeight, + uint32_t displayWidth, + uint32_t displayHeight, + FfxFsr1QualityMode qualityMode); + +/// Queries the effect version number. +/// +/// @returns +/// The SDK version the effect was built with. +/// +/// @ingroup ffxFsr1 +FFX_API FfxVersionNumber ffxFsr1GetEffectVersion(); + +#if defined(__cplusplus) +} +#endif // #if defined(__cplusplus) diff --git a/thirdparty/amd-ffx/ffx_fsr1_private.h b/thirdparty/amd-ffx/ffx_fsr1_private.h new file mode 100644 index 000000000000..1c9b23ed4fd7 --- /dev/null +++ b/thirdparty/amd-ffx/ffx_fsr1_private.h @@ -0,0 +1,75 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once +#include "gpu/fsr1/ffx_fsr1_resources.h" + +/// An enumeration of all the permutations that can be passed to the FSR1 algorithm. +/// +/// FSR1 features are organized through a set of pre-defined compile +/// permutation options that need to be specified. Which shader blob +/// is returned for pipeline creation will be determined by what combination +/// of shader permutations are enabled. +/// +typedef enum Fs1ShaderPermutationOptions +{ + FSR1_SHADER_PERMUTATION_APPLY_RCAS = (1 << 0), ///< RCAS will be applied, outputs to correct intermediary target + FSR1_SHADER_PERMUTATION_RCAS_PASSTHROUGH_ALPHA = (1 << 1), ///< Compile RCAS to pass through the input alpha value + FSR1_SHADER_PERMUTATION_SRGB_CONVERSIONS = (1 << 2), ///< Handle necessary conversions for SRGB formats (de-gamma in and gamma out) + FSR1_SHADER_PERMUTATION_FORCE_WAVE64 = (1 << 3), ///< doesn't map to a define, selects different table + FSR1_SHADER_PERMUTATION_ALLOW_FP16 = (1 << 4), ///< Enables fast math computations where possible +} Fs1ShaderPermutationOptions; + +// Constants for FSR1 dispatches. Must be kept in sync with cbFSR1 in ffx_fsr1_callbacks_hlsl.h +typedef struct Fsr1Constants +{ + FfxUInt32x4 const0; + FfxUInt32x4 const1; + FfxUInt32x4 const2; + FfxUInt32x4 const3; + FfxUInt32x4 sample; +} Fsr1Constants; + +struct FfxFsr1ContextDescription; +struct FfxDeviceCapabilities; +struct FfxPipelineState; +struct FfxResource; + +// FfxFsr1Context_Private +// The private implementation of the FSR1 context. +typedef struct FfxFsr1Context_Private { + + FfxFsr1ContextDescription contextDescription; + FfxUInt32 effectContextId; + Fsr1Constants constants; + FfxDevice device; + FfxDeviceCapabilities deviceCapabilities; + FfxConstantBuffer constantBuffer; + + FfxPipelineState pipelineEASU; + FfxPipelineState pipelineEASU_RCAS; + FfxPipelineState pipelineRCAS; + + FfxResourceInternal srvResources[FFX_FSR1_RESOURCE_IDENTIFIER_COUNT]; + FfxResourceInternal uavResources[FFX_FSR1_RESOURCE_IDENTIFIER_COUNT]; + +} FfxFsr1Context_Private; diff --git a/thirdparty/amd-fsr2/ffx_fsr2.cpp b/thirdparty/amd-ffx/ffx_fsr2.cpp similarity index 50% rename from thirdparty/amd-fsr2/ffx_fsr2.cpp rename to thirdparty/amd-ffx/ffx_fsr2.cpp index ec571b9cd27d..bae2a6b1e232 100644 --- a/thirdparty/amd-fsr2/ffx_fsr2.cpp +++ b/thirdparty/amd-ffx/ffx_fsr2.cpp @@ -1,13 +1,14 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (C) 2024 Advanced Micro Devices, Inc. // // Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal +// of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell // copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: +// furnished to do so, subject to the following conditions : +// // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // @@ -23,27 +24,26 @@ #include // for fabs, abs, sinf, sqrt, etc. #include // for memset #include // for FLT_EPSILON -#include "ffx_fsr2.h" -#define FFX_CPU -#include "shaders/ffx_core.h" -#include "shaders/ffx_fsr1.h" -#include "shaders/ffx_spd.h" -#include "shaders/ffx_fsr2_callbacks_hlsl.h" - -#include "ffx_fsr2_maximum_bias.h" #ifdef __clang__ -#pragma clang diagnostic ignored "-Wunused-variable" +#pragma clang diagnostic ignored "-Wsign-compare" +#pragma clang diagnostic ignored "-Wunused-function" #endif -#ifndef _countof -#define _countof(array) (sizeof(array) / sizeof(array[0])) +#ifdef _MSC_VER +#pragma warning(disable : 4505) #endif -#ifndef _MSC_VER -#include -#define wcscpy_s wcscpy -#endif +#include "ffx_fsr2.h" +#define FFX_CPU +#include "gpu/ffx_core.h" +#include "gpu/fsr1/ffx_fsr1.h" +#include "gpu/spd/ffx_spd.h" +#include "gpu/fsr2/ffx_fsr2_callbacks_hlsl.h" +#include "gpu/fsr2/ffx_fsr2_common.h" +#include "ffx_object_management.h" + +#include "ffx_fsr2_maximum_bias.h" // max queued frames for descriptor management static const uint32_t FSR2_MAX_QUEUED_FRAMES = 16; @@ -57,7 +57,7 @@ typedef struct ResourceBinding wchar_t name[64]; }ResourceBinding; -static const ResourceBinding srvResourceBindingTable[] = +static const ResourceBinding srvTextureBindingTable[] = { {FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_COLOR, L"r_input_color_jittered"}, {FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_OPAQUE_ONLY, L"r_input_opaque_only"}, @@ -88,7 +88,7 @@ static const ResourceBinding srvResourceBindingTable[] = {FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR, L"r_input_prev_color_post_alpha"}, }; -static const ResourceBinding uavResourceBindingTable[] = +static const ResourceBinding uavTextureBindingTable[] = { {FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH, L"rw_reconstructed_previous_nearest_depth"}, {FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS, L"rw_dilated_motion_vectors"}, @@ -111,7 +111,7 @@ static const ResourceBinding uavResourceBindingTable[] = {FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR, L"rw_output_prev_color_post_alpha"}, }; -static const ResourceBinding cbResourceBindingTable[] = +static const ResourceBinding constantBufferBindingTable[] = { {FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_FSR2, L"cbFSR2"}, {FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_SPD, L"cbSPD"}, @@ -120,14 +120,14 @@ static const ResourceBinding cbResourceBindingTable[] = }; // Broad structure of the root signature. -typedef enum Fsr2RootSignatureLayout { +/*typedef enum Fsr2RootSignatureLayout { FSR2_ROOT_SIGNATURE_LAYOUT_UAVS, FSR2_ROOT_SIGNATURE_LAYOUT_SRVS, FSR2_ROOT_SIGNATURE_LAYOUT_CONSTANTS, FSR2_ROOT_SIGNATURE_LAYOUT_CONSTANTS_REGISTER_1, FSR2_ROOT_SIGNATURE_LAYOUT_PARAMETER_COUNT -} Fsr2RootSignatureLayout; +} Fsr2RootSignatureLayout;*/ typedef struct Fsr2RcasConstants { @@ -167,27 +167,6 @@ typedef union Fsr2SecondaryUnion { Fsr2GenerateReactiveConstants2 autogenReactive; } Fsr2SecondaryUnion; -typedef struct Fsr2ResourceDescription { - - uint32_t id; - const wchar_t* name; - FfxResourceUsage usage; - FfxSurfaceFormat format; - uint32_t width; - uint32_t height; - uint32_t mipCount; - FfxResourceFlags flags; - uint32_t initDataSize; - void* initData; -} Fsr2ResourceDescription; - -FfxConstantBuffer globalFsr2ConstantBuffers[4] = { - { sizeof(Fsr2Constants) / sizeof(uint32_t) }, - { sizeof(Fsr2SpdConstants) / sizeof(uint32_t) }, - { sizeof(Fsr2RcasConstants) / sizeof(uint32_t) }, - { sizeof(Fsr2GenerateReactiveConstants) / sizeof(uint32_t) } -}; - // Lanczos static float lanczos2(float value) { @@ -213,77 +192,77 @@ static void fsr2DebugCheckDispatch(FfxFsr2Context_Private* context, const FfxFsr { if (params->commandList == nullptr) { - context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_ERROR, L"commandList is null"); + FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_ERROR, L"commandList is null"); } if (params->color.resource == nullptr) { - context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_ERROR, L"color resource is null"); + FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_ERROR, L"color resource is null"); } if (params->depth.resource == nullptr) { - context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_ERROR, L"depth resource is null"); + FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_ERROR, L"depth resource is null"); } if (params->motionVectors.resource == nullptr) { - context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_ERROR, L"motionVectors resource is null"); + FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_ERROR, L"motionVectors resource is null"); } if (params->exposure.resource != nullptr) { if ((context->contextDescription.flags & FFX_FSR2_ENABLE_AUTO_EXPOSURE) == FFX_FSR2_ENABLE_AUTO_EXPOSURE) { - context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_WARNING, L"exposure resource provided, however auto exposure flag is present"); + FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_WARNING, L"exposure resource provided, however auto exposure flag is present"); } } if (params->output.resource == nullptr) { - context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_ERROR, L"output resource is null"); + FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_ERROR, L"output resource is null"); } if (fabs(params->jitterOffset.x) > 1.0f || fabs(params->jitterOffset.y) > 1.0f) { - context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_WARNING, L"jitterOffset contains value outside of expected range [-1.0, 1.0]"); + FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_WARNING, L"jitterOffset contains value outside of expected range [-1.0, 1.0]"); } if ((params->motionVectorScale.x > (float)context->contextDescription.maxRenderSize.width) || (params->motionVectorScale.y > (float)context->contextDescription.maxRenderSize.height)) { - context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_WARNING, L"motionVectorScale contains scale value greater than maxRenderSize"); + FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_WARNING, L"motionVectorScale contains scale value greater than maxRenderSize"); } if ((params->motionVectorScale.x == 0.0f) || (params->motionVectorScale.y == 0.0f)) { - context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_WARNING, L"motionVectorScale contains zero scale value"); + FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_WARNING, L"motionVectorScale contains zero scale value"); } if ((params->renderSize.width > context->contextDescription.maxRenderSize.width) || (params->renderSize.height > context->contextDescription.maxRenderSize.height)) { - context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_WARNING, L"renderSize is greater than context maxRenderSize"); + FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_WARNING, L"renderSize is greater than context maxRenderSize"); } if ((params->renderSize.width == 0) || (params->renderSize.height == 0)) { - context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_WARNING, L"renderSize contains zero dimension"); + FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_WARNING, L"renderSize contains zero dimension"); } if (params->sharpness < 0.0f || params->sharpness > 1.0f) { - context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_WARNING, L"sharpness contains value outside of expected range [0.0, 1.0]"); + FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_WARNING, L"sharpness contains value outside of expected range [0.0, 1.0]"); } if (params->frameTimeDelta < 1.0f) { - context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_WARNING, L"frameTimeDelta is less than 1.0f - this value should be milliseconds (~16.6f for 60fps)"); + FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_WARNING, L"frameTimeDelta is less than 1.0f - this value should be milliseconds (~16.6f for 60fps)"); } if (params->preExposure == 0.0f) { - context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_ERROR, L"preExposure provided as 0.0f which is invalid"); + FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_ERROR, L"preExposure provided as 0.0f which is invalid"); } bool infiniteDepth = (context->contextDescription.flags & FFX_FSR2_ENABLE_DEPTH_INFINITE) == FFX_FSR2_ENABLE_DEPTH_INFINITE; @@ -293,20 +272,20 @@ static void fsr2DebugCheckDispatch(FfxFsr2Context_Private* context, const FfxFsr { if (params->cameraNear < params->cameraFar) { - context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_WARNING, + FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_WARNING, L"FFX_FSR2_ENABLE_DEPTH_INVERTED flag is present yet cameraNear is less than cameraFar"); } if (infiniteDepth) { if (params->cameraNear != FLT_MAX) { - context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_WARNING, + FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_WARNING, L"FFX_FSR2_ENABLE_DEPTH_INFINITE and FFX_FSR2_ENABLE_DEPTH_INVERTED present, yet cameraNear != FLT_MAX"); } } if (params->cameraFar < 0.075f) { - context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_WARNING, + FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_WARNING, L"FFX_FSR2_ENABLE_DEPTH_INFINITE and FFX_FSR2_ENABLE_DEPTH_INVERTED present, cameraFar value is very low which may result in depth separation artefacting"); } } @@ -314,117 +293,184 @@ static void fsr2DebugCheckDispatch(FfxFsr2Context_Private* context, const FfxFsr { if (params->cameraNear > params->cameraFar) { - context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_WARNING, + FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_WARNING, L"cameraNear is greater than cameraFar in non-inverted-depth context"); } if (infiniteDepth) { if (params->cameraFar != FLT_MAX) { - context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_WARNING, + FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_WARNING, L"FFX_FSR2_ENABLE_DEPTH_INFINITE and FFX_FSR2_ENABLE_DEPTH_INVERTED present, yet cameraFar != FLT_MAX"); } } if (params->cameraNear < 0.075f) { - context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_WARNING, + FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_WARNING, L"FFX_FSR2_ENABLE_DEPTH_INFINITE and FFX_FSR2_ENABLE_DEPTH_INVERTED present, cameraNear value is very low which may result in depth separation artefacting"); } } if (params->cameraFovAngleVertical <= 0.0f) { - context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_ERROR, L"cameraFovAngleVertical is 0.0f - this value should be > 0.0f"); + FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_ERROR, L"cameraFovAngleVertical is 0.0f - this value should be > 0.0f"); } if (params->cameraFovAngleVertical > FFX_PI) { - context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_ERROR, L"cameraFovAngleVertical is greater than 180 degrees/PI"); + FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_ERROR, L"cameraFovAngleVertical is greater than 180 degrees/PI"); } } static FfxErrorCode patchResourceBindings(FfxPipelineState* inoutPipeline) { - for (uint32_t srvIndex = 0; srvIndex < inoutPipeline->srvCount; ++srvIndex) + for (uint32_t srvIndex = 0; srvIndex < inoutPipeline->srvTextureCount; ++srvIndex) { int32_t mapIndex = 0; - for (mapIndex = 0; mapIndex < _countof(srvResourceBindingTable); ++mapIndex) + for (mapIndex = 0; mapIndex < _countof(srvTextureBindingTable); ++mapIndex) { - if (0 == wcscmp(srvResourceBindingTable[mapIndex].name, inoutPipeline->srvResourceBindings[srvIndex].name)) + if (0 == wcscmp(srvTextureBindingTable[mapIndex].name, inoutPipeline->srvTextureBindings[srvIndex].name)) break; } - if (mapIndex == _countof(srvResourceBindingTable)) + if (mapIndex == _countof(srvTextureBindingTable)) return FFX_ERROR_INVALID_ARGUMENT; - inoutPipeline->srvResourceBindings[srvIndex].resourceIdentifier = srvResourceBindingTable[mapIndex].index; + inoutPipeline->srvTextureBindings[srvIndex].resourceIdentifier = srvTextureBindingTable[mapIndex].index; } - for (uint32_t uavIndex = 0; uavIndex < inoutPipeline->uavCount; ++uavIndex) + for (uint32_t uavIndex = 0; uavIndex < inoutPipeline->uavTextureCount; ++uavIndex) { int32_t mapIndex = 0; - for (mapIndex = 0; mapIndex < _countof(uavResourceBindingTable); ++mapIndex) + for (mapIndex = 0; mapIndex < _countof(uavTextureBindingTable); ++mapIndex) { - if (0 == wcscmp(uavResourceBindingTable[mapIndex].name, inoutPipeline->uavResourceBindings[uavIndex].name)) + if (0 == wcscmp(uavTextureBindingTable[mapIndex].name, inoutPipeline->uavTextureBindings[uavIndex].name)) break; } - if (mapIndex == _countof(uavResourceBindingTable)) + if (mapIndex == _countof(uavTextureBindingTable)) return FFX_ERROR_INVALID_ARGUMENT; - inoutPipeline->uavResourceBindings[uavIndex].resourceIdentifier = uavResourceBindingTable[mapIndex].index; + inoutPipeline->uavTextureBindings[uavIndex].resourceIdentifier = uavTextureBindingTable[mapIndex].index; } for (uint32_t cbIndex = 0; cbIndex < inoutPipeline->constCount; ++cbIndex) { int32_t mapIndex = 0; - for (mapIndex = 0; mapIndex < _countof(cbResourceBindingTable); ++mapIndex) + for (mapIndex = 0; mapIndex < _countof(constantBufferBindingTable); ++mapIndex) { - if (0 == wcscmp(cbResourceBindingTable[mapIndex].name, inoutPipeline->cbResourceBindings[cbIndex].name)) + if (0 == wcscmp(constantBufferBindingTable[mapIndex].name, inoutPipeline->constantBufferBindings[cbIndex].name)) break; } - if (mapIndex == _countof(cbResourceBindingTable)) + if (mapIndex == _countof(constantBufferBindingTable)) return FFX_ERROR_INVALID_ARGUMENT; - inoutPipeline->cbResourceBindings[cbIndex].resourceIdentifier = cbResourceBindingTable[mapIndex].index; + inoutPipeline->constantBufferBindings[cbIndex].resourceIdentifier = constantBufferBindingTable[mapIndex].index; } return FFX_OK; } +static uint32_t getPipelinePermutationFlags(uint32_t contextFlags, FfxFsr2Pass passId, bool fp16, bool force64, bool useLut) +{ + // work out what permutation to load. + uint32_t flags = 0; + flags |= (contextFlags & FFX_FSR2_ENABLE_HIGH_DYNAMIC_RANGE) ? FSR2_SHADER_PERMUTATION_HDR_COLOR_INPUT : 0; + flags |= (contextFlags & FFX_FSR2_ENABLE_DISPLAY_RESOLUTION_MOTION_VECTORS) ? 0 : FSR2_SHADER_PERMUTATION_LOW_RES_MOTION_VECTORS; + flags |= (contextFlags & FFX_FSR2_ENABLE_MOTION_VECTORS_JITTER_CANCELLATION) ? FSR2_SHADER_PERMUTATION_JITTER_MOTION_VECTORS : 0; + flags |= (contextFlags & FFX_FSR2_ENABLE_DEPTH_INVERTED) ? FSR2_SHADER_PERMUTATION_DEPTH_INVERTED : 0; + flags |= (passId == FFX_FSR2_PASS_ACCUMULATE_SHARPEN) ? FSR2_SHADER_PERMUTATION_ENABLE_SHARPENING : 0; + flags |= (useLut) ? FSR2_SHADER_PERMUTATION_USE_LANCZOS_TYPE : 0; + flags |= (force64) ? FSR2_SHADER_PERMUTATION_FORCE_WAVE64 : 0; +#if defined(_GAMING_XBOX) + /** On Xbox we enable 16-bit math, and use 32-bit within the shader only where it's necessary. */ + flags |= (fp16) ? FSR2_SHADER_PERMUTATION_ALLOW_FP16 : 0; +#else + flags |= (fp16 && (passId != FFX_FSR2_PASS_RCAS)) ? FSR2_SHADER_PERMUTATION_ALLOW_FP16 : 0; +#endif // defined(_GAMING_XBOX) + return flags; +} static FfxErrorCode createPipelineStates(FfxFsr2Context_Private* context) { FFX_ASSERT(context); - const size_t samplerCount = 2; - FfxFilterType samplers[samplerCount]; - samplers[0] = FFX_FILTER_TYPE_POINT; - samplers[1] = FFX_FILTER_TYPE_LINEAR; - - const size_t rootConstantCount = 2; - uint32_t rootConstants[rootConstantCount]; - rootConstants[0] = sizeof(Fsr2Constants) / sizeof(uint32_t); - rootConstants[1] = sizeof(Fsr2SecondaryUnion) / sizeof(uint32_t); - - FfxPipelineDescription pipelineDescription; + FfxPipelineDescription pipelineDescription = {}; pipelineDescription.contextFlags = context->contextDescription.flags; - pipelineDescription.samplerCount = samplerCount; - pipelineDescription.samplers = samplers; - pipelineDescription.rootConstantBufferCount = rootConstantCount; - pipelineDescription.rootConstantBufferSizes = rootConstants; - - // New interface: will handle RootSignature in backend - // set up pipeline descriptor (basically RootSignature and binding) - FFX_VALIDATE(context->contextDescription.callbacks.fpCreatePipeline(&context->contextDescription.callbacks, FFX_FSR2_PASS_COMPUTE_LUMINANCE_PYRAMID, &pipelineDescription, &context->pipelineComputeLuminancePyramid)); - FFX_VALIDATE(context->contextDescription.callbacks.fpCreatePipeline(&context->contextDescription.callbacks, FFX_FSR2_PASS_RCAS, &pipelineDescription, &context->pipelineRCAS)); - FFX_VALIDATE(context->contextDescription.callbacks.fpCreatePipeline(&context->contextDescription.callbacks, FFX_FSR2_PASS_GENERATE_REACTIVE, &pipelineDescription, &context->pipelineGenerateReactive)); - FFX_VALIDATE(context->contextDescription.callbacks.fpCreatePipeline(&context->contextDescription.callbacks, FFX_FSR2_PASS_TCR_AUTOGENERATE, &pipelineDescription, &context->pipelineTcrAutogenerate)); + + // Samplers + pipelineDescription.samplerCount = 2; + FfxSamplerDescription samplerDescs[2] = { { FFX_FILTER_TYPE_MINMAGMIP_POINT, FFX_ADDRESS_MODE_CLAMP, FFX_ADDRESS_MODE_CLAMP, FFX_ADDRESS_MODE_CLAMP, FFX_BIND_COMPUTE_SHADER_STAGE }, + { FFX_FILTER_TYPE_MINMAGMIP_LINEAR, FFX_ADDRESS_MODE_CLAMP, FFX_ADDRESS_MODE_CLAMP, FFX_ADDRESS_MODE_CLAMP, FFX_BIND_COMPUTE_SHADER_STAGE} }; + pipelineDescription.samplers = samplerDescs; + + // Root constants + pipelineDescription.rootConstantBufferCount = 2; + FfxRootConstantDescription rootConstantDescs[2] = { {sizeof(Fsr2Constants) / sizeof(uint32_t), FFX_BIND_COMPUTE_SHADER_STAGE }, + { sizeof(Fsr2SecondaryUnion) / sizeof(uint32_t), FFX_BIND_COMPUTE_SHADER_STAGE } }; + pipelineDescription.rootConstants = rootConstantDescs; + + // Query device capabilities + FfxDeviceCapabilities capabilities; + context->contextDescription.backendInterface.fpGetDeviceCapabilities(&context->contextDescription.backendInterface, &capabilities); + + // Setup a few options used to determine permutation flags + bool haveShaderModel66 = capabilities.maximumSupportedShaderModel >= FFX_SHADER_MODEL_6_6; + bool supportedFP16 = capabilities.fp16Supported; + bool canForceWave64 = false; + bool useLut = false; + + const uint32_t waveLaneCountMin = capabilities.waveLaneCountMin; + const uint32_t waveLaneCountMax = capabilities.waveLaneCountMax; + if (waveLaneCountMin <= 64 && waveLaneCountMax >= 64) + { + useLut = true; + canForceWave64 = haveShaderModel66; + } + else + canForceWave64 = false; + + // Work out what permutation to load. + uint32_t contextFlags = context->contextDescription.flags; + + // Set up pipeline descriptor (basically RootSignature and binding) + wcscpy_s(pipelineDescription.name, L"FSR2-LUM_PYRAMID"); + FFX_VALIDATE(context->contextDescription.backendInterface.fpCreatePipeline(&context->contextDescription.backendInterface, FFX_EFFECT_FSR2, FFX_FSR2_PASS_COMPUTE_LUMINANCE_PYRAMID, + getPipelinePermutationFlags(contextFlags, FFX_FSR2_PASS_COMPUTE_LUMINANCE_PYRAMID, supportedFP16, canForceWave64, useLut), + &pipelineDescription, context->effectContextId, &context->pipelineComputeLuminancePyramid)); + wcscpy_s(pipelineDescription.name, L"FSR2-RCAS"); + FFX_VALIDATE(context->contextDescription.backendInterface.fpCreatePipeline(&context->contextDescription.backendInterface, FFX_EFFECT_FSR2, FFX_FSR2_PASS_RCAS, + getPipelinePermutationFlags(contextFlags, FFX_FSR2_PASS_RCAS, supportedFP16, canForceWave64, useLut), + &pipelineDescription, context->effectContextId, &context->pipelineRCAS)); + wcscpy_s(pipelineDescription.name, L"FSR2-GEN_REACTIVE"); + FFX_VALIDATE(context->contextDescription.backendInterface.fpCreatePipeline(&context->contextDescription.backendInterface, FFX_EFFECT_FSR2, FFX_FSR2_PASS_GENERATE_REACTIVE, + getPipelinePermutationFlags(contextFlags, FFX_FSR2_PASS_GENERATE_REACTIVE, supportedFP16, canForceWave64, useLut), + &pipelineDescription, context->effectContextId, &context->pipelineGenerateReactive)); + wcscpy_s(pipelineDescription.name, L"FSR2-TCR_AUTOGENERATE"); + FFX_VALIDATE(context->contextDescription.backendInterface.fpCreatePipeline(&context->contextDescription.backendInterface, FFX_EFFECT_FSR2, FFX_FSR2_PASS_TCR_AUTOGENERATE, + getPipelinePermutationFlags(contextFlags, FFX_FSR2_PASS_TCR_AUTOGENERATE, supportedFP16, canForceWave64, useLut), + &pipelineDescription, context->effectContextId, &context->pipelineTcrAutogenerate)); pipelineDescription.rootConstantBufferCount = 1; - FFX_VALIDATE(context->contextDescription.callbacks.fpCreatePipeline(&context->contextDescription.callbacks, FFX_FSR2_PASS_DEPTH_CLIP, &pipelineDescription, &context->pipelineDepthClip)); - FFX_VALIDATE(context->contextDescription.callbacks.fpCreatePipeline(&context->contextDescription.callbacks, FFX_FSR2_PASS_RECONSTRUCT_PREVIOUS_DEPTH, &pipelineDescription, &context->pipelineReconstructPreviousDepth)); - FFX_VALIDATE(context->contextDescription.callbacks.fpCreatePipeline(&context->contextDescription.callbacks, FFX_FSR2_PASS_LOCK, &pipelineDescription, &context->pipelineLock)); - FFX_VALIDATE(context->contextDescription.callbacks.fpCreatePipeline(&context->contextDescription.callbacks, FFX_FSR2_PASS_ACCUMULATE, &pipelineDescription, &context->pipelineAccumulate)); - FFX_VALIDATE(context->contextDescription.callbacks.fpCreatePipeline(&context->contextDescription.callbacks, FFX_FSR2_PASS_ACCUMULATE_SHARPEN, &pipelineDescription, &context->pipelineAccumulateSharpen)); - + + wcscpy_s(pipelineDescription.name, L"FSR2-DEPTH_CLIP"); + FFX_VALIDATE(context->contextDescription.backendInterface.fpCreatePipeline(&context->contextDescription.backendInterface, FFX_EFFECT_FSR2, FFX_FSR2_PASS_DEPTH_CLIP, + getPipelinePermutationFlags(contextFlags, FFX_FSR2_PASS_DEPTH_CLIP, supportedFP16, canForceWave64, useLut), + &pipelineDescription, context->effectContextId, &context->pipelineDepthClip)); + wcscpy_s(pipelineDescription.name, L"FSR2-RECON_PREV_DEPTH"); + FFX_VALIDATE(context->contextDescription.backendInterface.fpCreatePipeline(&context->contextDescription.backendInterface, FFX_EFFECT_FSR2, FFX_FSR2_PASS_RECONSTRUCT_PREVIOUS_DEPTH, + getPipelinePermutationFlags(contextFlags, FFX_FSR2_PASS_RECONSTRUCT_PREVIOUS_DEPTH, supportedFP16, canForceWave64, useLut), + &pipelineDescription, context->effectContextId, &context->pipelineReconstructPreviousDepth)); + wcscpy_s(pipelineDescription.name, L"FSR2-LOCK"); + FFX_VALIDATE(context->contextDescription.backendInterface.fpCreatePipeline(&context->contextDescription.backendInterface, FFX_EFFECT_FSR2, FFX_FSR2_PASS_LOCK, + getPipelinePermutationFlags(contextFlags, FFX_FSR2_PASS_LOCK, supportedFP16, canForceWave64, useLut), + &pipelineDescription, context->effectContextId, &context->pipelineLock)); + wcscpy_s(pipelineDescription.name, L"FSR2-ACCUMULATE"); + FFX_VALIDATE(context->contextDescription.backendInterface.fpCreatePipeline(&context->contextDescription.backendInterface, FFX_EFFECT_FSR2, FFX_FSR2_PASS_ACCUMULATE, + getPipelinePermutationFlags(contextFlags, FFX_FSR2_PASS_ACCUMULATE, supportedFP16, canForceWave64, useLut), + &pipelineDescription, context->effectContextId, &context->pipelineAccumulate)); + wcscpy_s(pipelineDescription.name, L"FSR2-ACCUM_SHARP"); + FFX_VALIDATE(context->contextDescription.backendInterface.fpCreatePipeline(&context->contextDescription.backendInterface, FFX_EFFECT_FSR2, FFX_FSR2_PASS_ACCUMULATE_SHARPEN, + getPipelinePermutationFlags(contextFlags, FFX_FSR2_PASS_ACCUMULATE_SHARPEN, supportedFP16, canForceWave64, useLut), + &pipelineDescription, context->effectContextId, &context->pipelineAccumulateSharpen)); + // for each pipeline: re-route/fix-up IDs based on names patchResourceBindings(&context->pipelineDepthClip); patchResourceBindings(&context->pipelineReconstructPreviousDepth); @@ -448,26 +494,27 @@ static FfxErrorCode fsr2Create(FfxFsr2Context_Private* context, const FfxFsr2Con // Setup the data for implementation. memset(context, 0, sizeof(FfxFsr2Context_Private)); - context->device = contextDescription->device; + context->device = contextDescription->backendInterface.device; memcpy(&context->contextDescription, contextDescription, sizeof(FfxFsr2ContextDescription)); - if ((context->contextDescription.flags & FFX_FSR2_ENABLE_DEBUG_CHECKING) == FFX_FSR2_ENABLE_DEBUG_CHECKING) - { - if (context->contextDescription.fpMessage == nullptr) - { - FFX_ASSERT(context->contextDescription.fpMessage != nullptr); - // remove the debug checking flag - we have no message function - context->contextDescription.flags &= ~FFX_FSR2_ENABLE_DEBUG_CHECKING; - } - } + // Check version info - make sure we are linked with the right backend version + FfxVersionNumber version = context->contextDescription.backendInterface.fpGetSDKVersion(&context->contextDescription.backendInterface); + FFX_RETURN_ON_ERROR(version == FFX_SDK_MAKE_VERSION(1, 1, 4), FFX_ERROR_INVALID_VERSION); + + // Setup constant buffer sizes. + context->constantBuffers[0].num32BitEntries = sizeof(Fsr2Constants) / sizeof(uint32_t); + context->constantBuffers[1].num32BitEntries = sizeof(Fsr2SpdConstants) / sizeof(uint32_t); + context->constantBuffers[2].num32BitEntries = sizeof(Fsr2RcasConstants) / sizeof(uint32_t); + context->constantBuffers[3].num32BitEntries = sizeof(Fsr2GenerateReactiveConstants) / sizeof(uint32_t); - // Create the device. - FfxErrorCode errorCode = context->contextDescription.callbacks.fpCreateBackendContext(&context->contextDescription.callbacks, context->device); + // Create the context. + FfxErrorCode errorCode = + context->contextDescription.backendInterface.fpCreateBackendContext(&context->contextDescription.backendInterface, FFX_EFFECT_FSR2, nullptr, &context->effectContextId); FFX_RETURN_ON_ERROR(errorCode == FFX_OK, errorCode); // call out for device caps. - errorCode = context->contextDescription.callbacks.fpGetDeviceCapabilities(&context->contextDescription.callbacks, &context->deviceCapabilities, context->device); + errorCode = context->contextDescription.backendInterface.fpGetDeviceCapabilities(&context->contextDescription.backendInterface, &context->deviceCapabilities); FFX_RETURN_ON_ERROR(errorCode == FFX_OK, errorCode); // set defaults @@ -495,92 +542,307 @@ static FfxErrorCode fsr2Create(FfxFsr2Context_Private* context, const FfxFsr2Con maximumBias[i] = int16_t(roundf(ffxFsr2MaximumBias[i] / 2.0f * 32767.0f)); } - uint8_t defaultReactiveMaskData = 0U; - uint32_t atomicInitData = 0U; - float defaultExposure[] = { 0.0f, 0.0f }; - const FfxResourceType texture1dResourceType = (context->contextDescription.flags & FFX_FSR2_ENABLE_TEXTURE1D_USAGE) ? FFX_RESOURCE_TYPE_TEXTURE1D : FFX_RESOURCE_TYPE_TEXTURE2D; - // declare internal resources needed - const Fsr2ResourceDescription internalSurfaceDesc[] = { - - { FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR, L"FSR2_PreparedInputColor", FFX_RESOURCE_USAGE_UAV, - FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE }, - - { FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH, L"FSR2_ReconstructedPrevNearestDepth", FFX_RESOURCE_USAGE_UAV, - FFX_SURFACE_FORMAT_R32_UINT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE }, - - { FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DILATED_MOTION_VECTORS_1, L"FSR2_InternalDilatedVelocity1", (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV), - FFX_SURFACE_FORMAT_R16G16_FLOAT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_NONE }, - - { FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DILATED_MOTION_VECTORS_2, L"FSR2_InternalDilatedVelocity2", (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV), - FFX_SURFACE_FORMAT_R16G16_FLOAT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_NONE }, - - { FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_DEPTH, L"FSR2_DilatedDepth", (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV), - FFX_SURFACE_FORMAT_R32_FLOAT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE }, - - { FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_1, L"FSR2_LockStatus1", (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV), - FFX_SURFACE_FORMAT_R16G16_FLOAT, contextDescription->displaySize.width, contextDescription->displaySize.height, 1, FFX_RESOURCE_FLAGS_NONE }, - - { FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_2, L"FSR2_LockStatus2", (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV), - FFX_SURFACE_FORMAT_R16G16_FLOAT, contextDescription->displaySize.width, contextDescription->displaySize.height, 1, FFX_RESOURCE_FLAGS_NONE }, - - { FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_INPUT_LUMA, L"FSR2_LockInputLuma", (FfxResourceUsage)(FFX_RESOURCE_USAGE_UAV), - FFX_SURFACE_FORMAT_R16_FLOAT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE }, - - { FFX_FSR2_RESOURCE_IDENTIFIER_NEW_LOCKS, L"FSR2_NewLocks", (FfxResourceUsage)(FFX_RESOURCE_USAGE_UAV), - FFX_SURFACE_FORMAT_R8_UNORM, contextDescription->displaySize.width, contextDescription->displaySize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE }, - - { FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_1, L"FSR2_InternalUpscaled1", (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV), - FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT, contextDescription->displaySize.width, contextDescription->displaySize.height, 1, FFX_RESOURCE_FLAGS_NONE }, - - { FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_2, L"FSR2_InternalUpscaled2", (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV), - FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT, contextDescription->displaySize.width, contextDescription->displaySize.height, 1, FFX_RESOURCE_FLAGS_NONE }, - - { FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE, L"FSR2_ExposureMips", FFX_RESOURCE_USAGE_UAV, - FFX_SURFACE_FORMAT_R16_FLOAT, contextDescription->maxRenderSize.width / 2, contextDescription->maxRenderSize.height / 2, 0, FFX_RESOURCE_FLAGS_ALIASABLE }, - - { FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY_1, L"FSR2_LumaHistory1", (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV), - FFX_SURFACE_FORMAT_R8G8B8A8_UNORM, contextDescription->displaySize.width, contextDescription->displaySize.height, 1, FFX_RESOURCE_FLAGS_NONE }, - - { FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY_2, L"FSR2_LumaHistory2", (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV), - FFX_SURFACE_FORMAT_R8G8B8A8_UNORM, contextDescription->displaySize.width, contextDescription->displaySize.height, 1, FFX_RESOURCE_FLAGS_NONE }, - - { FFX_FSR2_RESOURCE_IDENTIFIER_SPD_ATOMIC_COUNT, L"FSR2_SpdAtomicCounter", (FfxResourceUsage)(FFX_RESOURCE_USAGE_UAV), - FFX_SURFACE_FORMAT_R32_UINT, 1, 1, 1, FFX_RESOURCE_FLAGS_ALIASABLE, sizeof(atomicInitData), &atomicInitData }, - - { FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS, L"FSR2_DilatedReactiveMasks", FFX_RESOURCE_USAGE_UAV, - FFX_SURFACE_FORMAT_R8G8_UNORM, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE }, - - { FFX_FSR2_RESOURCE_IDENTIFIER_LANCZOS_LUT, L"FSR2_LanczosLutData", FFX_RESOURCE_USAGE_READ_ONLY, - FFX_SURFACE_FORMAT_R16_SNORM, lanczos2LutWidth, 1, 1, FFX_RESOURCE_FLAGS_NONE, sizeof(lanczos2Weights), lanczos2Weights }, - - { FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_REACTIVITY, L"FSR2_DefaultReactiviyMask", FFX_RESOURCE_USAGE_READ_ONLY, - FFX_SURFACE_FORMAT_R8_UNORM, 1, 1, 1, FFX_RESOURCE_FLAGS_NONE, sizeof(defaultReactiveMaskData), &defaultReactiveMaskData }, - - { FFX_FSR2_RESOURCE_IDENTITIER_UPSAMPLE_MAXIMUM_BIAS_LUT, L"FSR2_MaximumUpsampleBias", FFX_RESOURCE_USAGE_READ_ONLY, - FFX_SURFACE_FORMAT_R16_SNORM, FFX_FSR2_MAXIMUM_BIAS_TEXTURE_WIDTH, FFX_FSR2_MAXIMUM_BIAS_TEXTURE_HEIGHT, 1, FFX_RESOURCE_FLAGS_NONE, sizeof(maximumBias), maximumBias }, - - { FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_EXPOSURE, L"FSR2_DefaultExposure", FFX_RESOURCE_USAGE_READ_ONLY, - FFX_SURFACE_FORMAT_R32G32_FLOAT, 1, 1, 1, FFX_RESOURCE_FLAGS_NONE, sizeof(defaultExposure), defaultExposure }, - - { FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE, L"FSR2_AutoExposure", FFX_RESOURCE_USAGE_UAV, - FFX_SURFACE_FORMAT_R32G32_FLOAT, 1, 1, 1, FFX_RESOURCE_FLAGS_NONE }, - - - // only one for now, will need pingpont to respect the motion vectors - { FFX_FSR2_RESOURCE_IDENTIFIER_AUTOREACTIVE, L"FSR2_AutoReactive", FFX_RESOURCE_USAGE_UAV, - FFX_SURFACE_FORMAT_R8_UNORM, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_NONE }, - { FFX_FSR2_RESOURCE_IDENTIFIER_AUTOCOMPOSITION, L"FSR2_AutoComposition", FFX_RESOURCE_USAGE_UAV, - FFX_SURFACE_FORMAT_R8_UNORM, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_NONE }, - { FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR_1, L"FSR2_PrevPreAlpha0", FFX_RESOURCE_USAGE_UAV, - FFX_SURFACE_FORMAT_R11G11B10_FLOAT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_NONE }, - { FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR_1, L"FSR2_PrevPostAlpha0", FFX_RESOURCE_USAGE_UAV, - FFX_SURFACE_FORMAT_R11G11B10_FLOAT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_NONE }, - { FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR_2, L"FSR2_PrevPreAlpha1", FFX_RESOURCE_USAGE_UAV, - FFX_SURFACE_FORMAT_R11G11B10_FLOAT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_NONE }, - { FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR_2, L"FSR2_PrevPostAlpha1", FFX_RESOURCE_USAGE_UAV, - FFX_SURFACE_FORMAT_R11G11B10_FLOAT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_NONE }, - + const FfxInternalResourceDescription internalSurfaceDesc[] = { + + {FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR, + L"FSR2_PreparedInputColor", + FFX_RESOURCE_TYPE_TEXTURE2D, + (FfxResourceUsage)(FFX_RESOURCE_USAGE_UAV | FFX_RESOURCE_USAGE_DCC_RENDERTARGET), + FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT, + contextDescription->maxRenderSize.width, + contextDescription->maxRenderSize.height, + 1, + FFX_RESOURCE_FLAGS_ALIASABLE, + {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED}}, + + {FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH, + L"FSR2_ReconstructedPrevNearestDepth", + FFX_RESOURCE_TYPE_TEXTURE2D, + FFX_RESOURCE_USAGE_UAV, + FFX_SURFACE_FORMAT_R32_UINT, + contextDescription->maxRenderSize.width, + contextDescription->maxRenderSize.height, + 1, + FFX_RESOURCE_FLAGS_ALIASABLE, + {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED}}, + + {FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DILATED_MOTION_VECTORS_1, + L"FSR2_InternalDilatedVelocity1", + FFX_RESOURCE_TYPE_TEXTURE2D, + (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV | FFX_RESOURCE_USAGE_DCC_RENDERTARGET), + FFX_SURFACE_FORMAT_R16G16_FLOAT, + contextDescription->maxRenderSize.width, + contextDescription->maxRenderSize.height, + 1, + FFX_RESOURCE_FLAGS_NONE, + {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED}}, + + {FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DILATED_MOTION_VECTORS_2, + L"FSR2_InternalDilatedVelocity2", + FFX_RESOURCE_TYPE_TEXTURE2D, + (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV | FFX_RESOURCE_USAGE_DCC_RENDERTARGET), + FFX_SURFACE_FORMAT_R16G16_FLOAT, + contextDescription->maxRenderSize.width, + contextDescription->maxRenderSize.height, + 1, + FFX_RESOURCE_FLAGS_NONE, + {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED}}, + + {FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_DEPTH, + L"FSR2_DilatedDepth", + FFX_RESOURCE_TYPE_TEXTURE2D, + (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV), + FFX_SURFACE_FORMAT_R32_FLOAT, + contextDescription->maxRenderSize.width, + contextDescription->maxRenderSize.height, + 1, + FFX_RESOURCE_FLAGS_ALIASABLE, + {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED}}, + + {FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_1, + L"FSR2_LockStatus1", + FFX_RESOURCE_TYPE_TEXTURE2D, + (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV), + FFX_SURFACE_FORMAT_R16G16_FLOAT, + contextDescription->displaySize.width, + contextDescription->displaySize.height, + 1, + FFX_RESOURCE_FLAGS_NONE, + {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED}}, + + {FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_2, + L"FSR2_LockStatus2", + FFX_RESOURCE_TYPE_TEXTURE2D, + (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV), + FFX_SURFACE_FORMAT_R16G16_FLOAT, + contextDescription->displaySize.width, + contextDescription->displaySize.height, + 1, + FFX_RESOURCE_FLAGS_NONE, + {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED}}, + + {FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_INPUT_LUMA, + L"FSR2_LockInputLuma", + FFX_RESOURCE_TYPE_TEXTURE2D, + (FfxResourceUsage)(FFX_RESOURCE_USAGE_UAV), + FFX_SURFACE_FORMAT_R16_FLOAT, + contextDescription->maxRenderSize.width, + contextDescription->maxRenderSize.height, + 1, + FFX_RESOURCE_FLAGS_ALIASABLE, + {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED}}, + + {FFX_FSR2_RESOURCE_IDENTIFIER_NEW_LOCKS, + L"FSR2_NewLocks", + FFX_RESOURCE_TYPE_TEXTURE2D, + (FfxResourceUsage)(FFX_RESOURCE_USAGE_UAV), + FFX_SURFACE_FORMAT_R8_UNORM, + contextDescription->displaySize.width, + contextDescription->displaySize.height, + 1, + FFX_RESOURCE_FLAGS_ALIASABLE, + {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED}}, + + {FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_1, + L"FSR2_InternalUpscaled1", + FFX_RESOURCE_TYPE_TEXTURE2D, + (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV | FFX_RESOURCE_USAGE_DCC_RENDERTARGET), + FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT, + contextDescription->displaySize.width, + contextDescription->displaySize.height, + 1, + FFX_RESOURCE_FLAGS_NONE, + {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED}}, + + {FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_2, + L"FSR2_InternalUpscaled2", + FFX_RESOURCE_TYPE_TEXTURE2D, + (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV | FFX_RESOURCE_USAGE_DCC_RENDERTARGET), + FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT, + contextDescription->displaySize.width, + contextDescription->displaySize.height, + 1, + FFX_RESOURCE_FLAGS_NONE, + {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED}}, + + {FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE, + L"FSR2_ExposureMips", + FFX_RESOURCE_TYPE_TEXTURE2D, + FFX_RESOURCE_USAGE_UAV, + FFX_SURFACE_FORMAT_R16_FLOAT, + contextDescription->maxRenderSize.width / 2, + contextDescription->maxRenderSize.height / 2, + 0, + FFX_RESOURCE_FLAGS_ALIASABLE, + {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED}}, + + {FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY_1, + L"FSR2_LumaHistory1", + FFX_RESOURCE_TYPE_TEXTURE2D, + (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV), + FFX_SURFACE_FORMAT_R8G8B8A8_UNORM, + contextDescription->displaySize.width, + contextDescription->displaySize.height, + 1, + FFX_RESOURCE_FLAGS_NONE, + {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED}}, + + {FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY_2, + L"FSR2_LumaHistory2", + FFX_RESOURCE_TYPE_TEXTURE2D, + (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV), + FFX_SURFACE_FORMAT_R8G8B8A8_UNORM, + contextDescription->displaySize.width, + contextDescription->displaySize.height, + 1, + FFX_RESOURCE_FLAGS_NONE, + {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED}}, + + {FFX_FSR2_RESOURCE_IDENTIFIER_SPD_ATOMIC_COUNT, + L"FSR2_SpdAtomicCounter", + FFX_RESOURCE_TYPE_TEXTURE2D, + FFX_RESOURCE_USAGE_UAV, + FFX_SURFACE_FORMAT_R32_UINT, + 1, + 1, + 1, + FFX_RESOURCE_FLAGS_ALIASABLE, + {FFX_RESOURCE_INIT_DATA_TYPE_VALUE, sizeof(uint32_t), 0}}, + + {FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS, + L"FSR2_DilatedReactiveMasks", + FFX_RESOURCE_TYPE_TEXTURE2D, + (FfxResourceUsage)(FFX_RESOURCE_USAGE_UAV | FFX_RESOURCE_USAGE_DCC_RENDERTARGET), + FFX_SURFACE_FORMAT_R8G8_UNORM, + contextDescription->maxRenderSize.width, + contextDescription->maxRenderSize.height, + 1, + FFX_RESOURCE_FLAGS_ALIASABLE, + {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED}}, + + {FFX_FSR2_RESOURCE_IDENTIFIER_LANCZOS_LUT, + L"FSR2_LanczosLutData", + FFX_RESOURCE_TYPE_TEXTURE2D, + FFX_RESOURCE_USAGE_READ_ONLY, + FFX_SURFACE_FORMAT_R16_SNORM, + lanczos2LutWidth, + 1, + 1, + FFX_RESOURCE_FLAGS_NONE, + {FFX_RESOURCE_INIT_DATA_TYPE_BUFFER, sizeof(lanczos2Weights), lanczos2Weights}}, + + {FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_REACTIVITY, + L"FSR2_DefaultReactivityMask", + FFX_RESOURCE_TYPE_TEXTURE2D, + FFX_RESOURCE_USAGE_READ_ONLY, + FFX_SURFACE_FORMAT_R8_UNORM, + 1, + 1, + 1, + FFX_RESOURCE_FLAGS_NONE, + {FFX_RESOURCE_INIT_DATA_TYPE_VALUE, sizeof(uint8_t), 0}}, + + {FFX_FSR2_RESOURCE_IDENTITIER_UPSAMPLE_MAXIMUM_BIAS_LUT, + L"FSR2_MaximumUpsampleBias", + FFX_RESOURCE_TYPE_TEXTURE2D, + FFX_RESOURCE_USAGE_READ_ONLY, + FFX_SURFACE_FORMAT_R16_SNORM, + FFX_FSR2_MAXIMUM_BIAS_TEXTURE_WIDTH, + FFX_FSR2_MAXIMUM_BIAS_TEXTURE_HEIGHT, + 1, + FFX_RESOURCE_FLAGS_NONE, + {FFX_RESOURCE_INIT_DATA_TYPE_BUFFER, sizeof(maximumBias), maximumBias}}, + + {FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_EXPOSURE, + L"FSR2_DefaultExposure", + FFX_RESOURCE_TYPE_TEXTURE2D, + FFX_RESOURCE_USAGE_READ_ONLY, + FFX_SURFACE_FORMAT_R32G32_FLOAT, + 1, + 1, + 1, + FFX_RESOURCE_FLAGS_NONE, + {FFX_RESOURCE_INIT_DATA_TYPE_VALUE, sizeof(float) * 2, 0}}, + + {FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE, + L"FSR2_AutoExposure", + FFX_RESOURCE_TYPE_TEXTURE2D, + FFX_RESOURCE_USAGE_UAV, + FFX_SURFACE_FORMAT_R32G32_FLOAT, + 1, + 1, + 1, + FFX_RESOURCE_FLAGS_NONE, + {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED}}, + + + // only one for now, will need ping pong to respect the motion vectors + {FFX_FSR2_RESOURCE_IDENTIFIER_AUTOREACTIVE, + L"FSR2_AutoReactive", + FFX_RESOURCE_TYPE_TEXTURE2D, + FFX_RESOURCE_USAGE_UAV, + FFX_SURFACE_FORMAT_R8_UNORM, + contextDescription->maxRenderSize.width, + contextDescription->maxRenderSize.height, + 1, + FFX_RESOURCE_FLAGS_NONE, + {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED}}, + + {FFX_FSR2_RESOURCE_IDENTIFIER_AUTOCOMPOSITION, + L"FSR2_AutoComposition", + FFX_RESOURCE_TYPE_TEXTURE2D, + FFX_RESOURCE_USAGE_UAV, + FFX_SURFACE_FORMAT_R8_UNORM, + contextDescription->maxRenderSize.width, + contextDescription->maxRenderSize.height, + 1, + FFX_RESOURCE_FLAGS_NONE, + {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED}}, + + {FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR_1, + L"FSR2_PrevPreAlpha0", + FFX_RESOURCE_TYPE_TEXTURE2D, + FFX_RESOURCE_USAGE_UAV, + FFX_SURFACE_FORMAT_R11G11B10_FLOAT, + contextDescription->maxRenderSize.width, + contextDescription->maxRenderSize.height, + 1, + FFX_RESOURCE_FLAGS_NONE, + {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED}}, + + {FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR_1, + L"FSR2_PrevPostAlpha0", + FFX_RESOURCE_TYPE_TEXTURE2D, + FFX_RESOURCE_USAGE_UAV, + FFX_SURFACE_FORMAT_R11G11B10_FLOAT, + contextDescription->maxRenderSize.width, + contextDescription->maxRenderSize.height, + 1, + FFX_RESOURCE_FLAGS_NONE, + {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED}}, + + {FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR_2, + L"FSR2_PrevPreAlpha1", + FFX_RESOURCE_TYPE_TEXTURE2D, + FFX_RESOURCE_USAGE_UAV, + FFX_SURFACE_FORMAT_R11G11B10_FLOAT, + contextDescription->maxRenderSize.width, + contextDescription->maxRenderSize.height, + 1, + FFX_RESOURCE_FLAGS_NONE, + {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED}}, + + {FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR_2, + L"FSR2_PrevPostAlpha1", + FFX_RESOURCE_TYPE_TEXTURE2D, + FFX_RESOURCE_USAGE_UAV, + FFX_SURFACE_FORMAT_R11G11B10_FLOAT, + contextDescription->maxRenderSize.width, + contextDescription->maxRenderSize.height, + 1, + FFX_RESOURCE_FLAGS_NONE, + {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED}}, }; // clear the SRV resources to NULL. @@ -588,13 +850,18 @@ static FfxErrorCode fsr2Create(FfxFsr2Context_Private* context, const FfxFsr2Con for (int32_t currentSurfaceIndex = 0; currentSurfaceIndex < FFX_ARRAY_ELEMENTS(internalSurfaceDesc); ++currentSurfaceIndex) { - const Fsr2ResourceDescription* currentSurfaceDescription = &internalSurfaceDesc[currentSurfaceIndex]; - const FfxResourceType resourceType = currentSurfaceDescription->height > 1 ? FFX_RESOURCE_TYPE_TEXTURE2D : texture1dResourceType; - const FfxResourceDescription resourceDescription = { resourceType, currentSurfaceDescription->format, currentSurfaceDescription->width, currentSurfaceDescription->height, 1, currentSurfaceDescription->mipCount }; + const FfxInternalResourceDescription* currentSurfaceDescription = &internalSurfaceDesc[currentSurfaceIndex]; + const FfxResourceType resourceType = internalSurfaceDesc[currentSurfaceIndex].type; + const FfxResourceDescription resourceDescription = { resourceType, currentSurfaceDescription->format, currentSurfaceDescription->width, currentSurfaceDescription->height, 1, currentSurfaceDescription->mipCount, FFX_RESOURCE_FLAGS_NONE, currentSurfaceDescription->usage }; const FfxResourceStates initialState = (currentSurfaceDescription->usage == FFX_RESOURCE_USAGE_READ_ONLY) ? FFX_RESOURCE_STATE_COMPUTE_READ : FFX_RESOURCE_STATE_UNORDERED_ACCESS; - const FfxCreateResourceDescription createResourceDescription = { FFX_HEAP_TYPE_DEFAULT, resourceDescription, initialState, currentSurfaceDescription->initDataSize, currentSurfaceDescription->initData, currentSurfaceDescription->name, currentSurfaceDescription->usage, currentSurfaceDescription->id }; - - FFX_VALIDATE(context->contextDescription.callbacks.fpCreateResource(&context->contextDescription.callbacks, &createResourceDescription, &context->srvResources[currentSurfaceDescription->id])); + const FfxCreateResourceDescription createResourceDescription = {FFX_HEAP_TYPE_DEFAULT, + resourceDescription, + initialState, + currentSurfaceDescription->name, + currentSurfaceDescription->id, + currentSurfaceDescription->initData}; + + FFX_VALIDATE(context->contextDescription.backendInterface.fpCreateResource(&context->contextDescription.backendInterface, &createResourceDescription, context->effectContextId, &context->srvResources[currentSurfaceDescription->id])); } // copy resources to uavResrouces list @@ -602,48 +869,25 @@ static FfxErrorCode fsr2Create(FfxFsr2Context_Private* context, const FfxFsr2Con // avoid compiling pipelines on first render { - context->refreshPipelineStates = false; errorCode = createPipelineStates(context); FFX_RETURN_ON_ERROR(errorCode == FFX_OK, errorCode); } return FFX_OK; } -static void fsr2SafeReleasePipeline(FfxFsr2Context_Private* context, FfxPipelineState* pipeline) -{ - FFX_ASSERT(pipeline); - - context->contextDescription.callbacks.fpDestroyPipeline(&context->contextDescription.callbacks, pipeline); -} - -static void fsr2SafeReleaseResource(FfxFsr2Context_Private* context, FfxResourceInternal resource) -{ - context->contextDescription.callbacks.fpDestroyResource(&context->contextDescription.callbacks, resource); -} - -static void fsr2SafeReleaseDevice(FfxFsr2Context_Private* context, FfxDevice* device) -{ - if (*device == nullptr) { - return; - } - - context->contextDescription.callbacks.fpDestroyBackendContext(&context->contextDescription.callbacks); - *device = nullptr; -} - static FfxErrorCode fsr2Release(FfxFsr2Context_Private* context) { FFX_ASSERT(context); - fsr2SafeReleasePipeline(context, &context->pipelineDepthClip); - fsr2SafeReleasePipeline(context, &context->pipelineReconstructPreviousDepth); - fsr2SafeReleasePipeline(context, &context->pipelineLock); - fsr2SafeReleasePipeline(context, &context->pipelineAccumulate); - fsr2SafeReleasePipeline(context, &context->pipelineAccumulateSharpen); - fsr2SafeReleasePipeline(context, &context->pipelineRCAS); - fsr2SafeReleasePipeline(context, &context->pipelineComputeLuminancePyramid); - fsr2SafeReleasePipeline(context, &context->pipelineGenerateReactive); - fsr2SafeReleasePipeline(context, &context->pipelineTcrAutogenerate); + ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineDepthClip, context->effectContextId); + ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineReconstructPreviousDepth, context->effectContextId); + ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineLock, context->effectContextId); + ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineAccumulate, context->effectContextId); + ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineAccumulateSharpen, context->effectContextId); + ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineRCAS, context->effectContextId); + ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineComputeLuminancePyramid, context->effectContextId); + ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineGenerateReactive, context->effectContextId); + ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineTcrAutogenerate, context->effectContextId); // unregister resources not created internally context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_OPAQUE_ONLY] = { FFX_FSR2_RESOURCE_IDENTIFIER_NULL }; @@ -658,13 +902,21 @@ static FfxErrorCode fsr2Release(FfxFsr2Context_Private* context) context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_RCAS_INPUT] = { FFX_FSR2_RESOURCE_IDENTIFIER_NULL }; context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT] = { FFX_FSR2_RESOURCE_IDENTIFIER_NULL }; + // Release the copy resources for those that had init data + ffxSafeReleaseCopyResource(&context->contextDescription.backendInterface, context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_SPD_ATOMIC_COUNT], context->effectContextId); + ffxSafeReleaseCopyResource(&context->contextDescription.backendInterface, context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_LANCZOS_LUT], context->effectContextId); + ffxSafeReleaseCopyResource(&context->contextDescription.backendInterface, context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_REACTIVITY], context->effectContextId); + ffxSafeReleaseCopyResource(&context->contextDescription.backendInterface, context->srvResources[FFX_FSR2_RESOURCE_IDENTITIER_UPSAMPLE_MAXIMUM_BIAS_LUT], context->effectContextId); + ffxSafeReleaseCopyResource(&context->contextDescription.backendInterface, context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_EXPOSURE], context->effectContextId); + // release internal resources for (int32_t currentResourceIndex = 0; currentResourceIndex < FFX_FSR2_RESOURCE_IDENTIFIER_COUNT; ++currentResourceIndex) { - fsr2SafeReleaseResource(context, context->srvResources[currentResourceIndex]); + ffxSafeReleaseResource(&context->contextDescription.backendInterface, context->srvResources[currentResourceIndex], context->effectContextId); } - fsr2SafeReleaseDevice(context, &context->device); + // Destroy the context + context->contextDescription.backendInterface.fpDestroyBackendContext(&context->contextDescription.backendInterface, context->effectContextId); return FFX_OK; } @@ -720,52 +972,58 @@ static void setupDeviceDepthToViewSpaceDepthParams(FfxFsr2Context_Private* conte context->constants.deviceToViewDepth[3] = (1.0f / b); } -static void scheduleDispatch(FfxFsr2Context_Private* context, const FfxFsr2DispatchDescription* params, const FfxPipelineState* pipeline, uint32_t dispatchX, uint32_t dispatchY) +static void scheduleDispatch(FfxFsr2Context_Private* context, const FfxFsr2DispatchDescription*, const FfxPipelineState* pipeline, uint32_t dispatchX, uint32_t dispatchY) { - FfxComputeJobDescription jobDescriptor = {}; + FfxGpuJobDescription dispatchJob = {FFX_GPU_JOB_COMPUTE}; + wcscpy_s(dispatchJob.jobLabel, pipeline->name); - for (uint32_t currentShaderResourceViewIndex = 0; currentShaderResourceViewIndex < pipeline->srvCount; ++currentShaderResourceViewIndex) { + for (uint32_t currentShaderResourceViewIndex = 0; currentShaderResourceViewIndex < pipeline->srvTextureCount; ++currentShaderResourceViewIndex) { - const uint32_t currentResourceId = pipeline->srvResourceBindings[currentShaderResourceViewIndex].resourceIdentifier; + const uint32_t currentResourceId = pipeline->srvTextureBindings[currentShaderResourceViewIndex].resourceIdentifier; const FfxResourceInternal currentResource = context->srvResources[currentResourceId]; - jobDescriptor.srvs[currentShaderResourceViewIndex] = currentResource; - wcscpy_s(jobDescriptor.srvNames[currentShaderResourceViewIndex], pipeline->srvResourceBindings[currentShaderResourceViewIndex].name); + dispatchJob.computeJobDescriptor.srvTextures[currentShaderResourceViewIndex].resource = currentResource; +#ifdef FFX_DEBUG + wcscpy_s(dispatchJob.computeJobDescriptor.srvTextures[currentShaderResourceViewIndex].name, + pipeline->srvTextureBindings[currentShaderResourceViewIndex].name); +#endif } - for (uint32_t currentUnorderedAccessViewIndex = 0; currentUnorderedAccessViewIndex < pipeline->uavCount; ++currentUnorderedAccessViewIndex) { - - const uint32_t currentResourceId = pipeline->uavResourceBindings[currentUnorderedAccessViewIndex].resourceIdentifier; - wcscpy_s(jobDescriptor.uavNames[currentUnorderedAccessViewIndex], pipeline->uavResourceBindings[currentUnorderedAccessViewIndex].name); + for (uint32_t currentUnorderedAccessViewIndex = 0; currentUnorderedAccessViewIndex < pipeline->uavTextureCount; ++currentUnorderedAccessViewIndex) { + const uint32_t currentResourceId = pipeline->uavTextureBindings[currentUnorderedAccessViewIndex].resourceIdentifier; +#ifdef FFX_DEBUG + wcscpy_s(dispatchJob.computeJobDescriptor.uavTextures[currentUnorderedAccessViewIndex].name, + pipeline->uavTextureBindings[currentUnorderedAccessViewIndex].name); +#endif if (currentResourceId >= FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_0 && currentResourceId <= FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_12) { const FfxResourceInternal currentResource = context->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE]; - jobDescriptor.uavs[currentUnorderedAccessViewIndex] = currentResource; - jobDescriptor.uavMip[currentUnorderedAccessViewIndex] = currentResourceId - FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_0; + dispatchJob.computeJobDescriptor.uavTextures[currentUnorderedAccessViewIndex].resource = currentResource; + dispatchJob.computeJobDescriptor.uavTextures[currentUnorderedAccessViewIndex].mip = + currentResourceId - FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_0; } else { const FfxResourceInternal currentResource = context->uavResources[currentResourceId]; - jobDescriptor.uavs[currentUnorderedAccessViewIndex] = currentResource; - jobDescriptor.uavMip[currentUnorderedAccessViewIndex] = 0; + dispatchJob.computeJobDescriptor.uavTextures[currentUnorderedAccessViewIndex].resource = currentResource; + dispatchJob.computeJobDescriptor.uavTextures[currentUnorderedAccessViewIndex].mip = 0; } } - - jobDescriptor.dimensions[0] = dispatchX; - jobDescriptor.dimensions[1] = dispatchY; - jobDescriptor.dimensions[2] = 1; - jobDescriptor.pipeline = *pipeline; + + dispatchJob.computeJobDescriptor.dimensions[0] = dispatchX; + dispatchJob.computeJobDescriptor.dimensions[1] = dispatchY; + dispatchJob.computeJobDescriptor.dimensions[2] = 1; + dispatchJob.computeJobDescriptor.pipeline = *pipeline; for (uint32_t currentRootConstantIndex = 0; currentRootConstantIndex < pipeline->constCount; ++currentRootConstantIndex) { - wcscpy_s( jobDescriptor.cbNames[currentRootConstantIndex], pipeline->cbResourceBindings[currentRootConstantIndex].name); - jobDescriptor.cbs[currentRootConstantIndex] = globalFsr2ConstantBuffers[pipeline->cbResourceBindings[currentRootConstantIndex].resourceIdentifier]; - jobDescriptor.cbSlotIndex[currentRootConstantIndex] = pipeline->cbResourceBindings[currentRootConstantIndex].slotIndex; +#ifdef FFX_DEBUG + wcscpy_s(dispatchJob.computeJobDescriptor.cbNames[currentRootConstantIndex], pipeline->constantBufferBindings[currentRootConstantIndex].name); +#endif + dispatchJob.computeJobDescriptor.cbs[currentRootConstantIndex] = context->constantBuffers[pipeline->constantBufferBindings[currentRootConstantIndex].resourceIdentifier]; } - FfxGpuJobDescription dispatchJob = { FFX_GPU_JOB_COMPUTE }; - dispatchJob.computeJobDescriptor = jobDescriptor; - context->contextDescription.callbacks.fpScheduleGpuJob(&context->contextDescription.callbacks, &dispatchJob); + context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &dispatchJob); } static FfxErrorCode fsr2Dispatch(FfxFsr2Context_Private* context, const FfxFsr2DispatchDescription* params) @@ -774,37 +1032,28 @@ static FfxErrorCode fsr2Dispatch(FfxFsr2Context_Private* context, const FfxFsr2D { fsr2DebugCheckDispatch(context, params); } + // take a short cut to the command list FfxCommandList commandList = params->commandList; - // try and refresh shaders first. Early exit in case of error. - if (context->refreshPipelineStates) { - - context->refreshPipelineStates = false; - - const FfxErrorCode errorCode = createPipelineStates(context); - FFX_RETURN_ON_ERROR(errorCode == FFX_OK, errorCode); - } - if (context->firstExecution) { FfxGpuJobDescription clearJob = { FFX_GPU_JOB_CLEAR_FLOAT }; + wcscpy_s(clearJob.jobLabel, L"Zero initialize resource"); const float clearValuesToZeroFloat[]{ 0.f, 0.f, 0.f, 0.f }; memcpy(clearJob.clearJobDescriptor.color, clearValuesToZeroFloat, 4 * sizeof(float)); clearJob.clearJobDescriptor.target = context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_1]; - context->contextDescription.callbacks.fpScheduleGpuJob(&context->contextDescription.callbacks, &clearJob); + context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &clearJob); clearJob.clearJobDescriptor.target = context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_2]; - context->contextDescription.callbacks.fpScheduleGpuJob(&context->contextDescription.callbacks, &clearJob); + context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &clearJob); clearJob.clearJobDescriptor.target = context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR]; - context->contextDescription.callbacks.fpScheduleGpuJob(&context->contextDescription.callbacks, &clearJob); + context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &clearJob); } // Prepare per frame descriptor tables const bool isOddFrame = !!(context->resourceFrameIndex & 1); - const uint32_t currentCpuOnlyTableBase = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_COUNT : 0; - const uint32_t currentGpuTableBase = 2 * FFX_FSR2_RESOURCE_IDENTIFIER_COUNT * context->resourceFrameIndex; const uint32_t lockStatusSrvResourceIndex = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_2 : FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_1; const uint32_t lockStatusUavResourceIndex = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_1 : FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_2; const uint32_t upscaledColorSrvResourceIndex = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_2 : FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_1; @@ -822,9 +1071,9 @@ static FfxErrorCode fsr2Dispatch(FfxFsr2Context_Private* context, const FfxFsr2D const bool resetAccumulation = params->reset || context->firstExecution; context->firstExecution = false; - context->contextDescription.callbacks.fpRegisterResource(&context->contextDescription.callbacks, ¶ms->color, &context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_COLOR]); - context->contextDescription.callbacks.fpRegisterResource(&context->contextDescription.callbacks, ¶ms->depth, &context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_DEPTH]); - context->contextDescription.callbacks.fpRegisterResource(&context->contextDescription.callbacks, ¶ms->motionVectors, &context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_MOTION_VECTORS]); + context->contextDescription.backendInterface.fpRegisterResource(&context->contextDescription.backendInterface, ¶ms->color, context->effectContextId, &context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_COLOR]); + context->contextDescription.backendInterface.fpRegisterResource(&context->contextDescription.backendInterface, ¶ms->depth, context->effectContextId, &context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_DEPTH]); + context->contextDescription.backendInterface.fpRegisterResource(&context->contextDescription.backendInterface, ¶ms->motionVectors, context->effectContextId, &context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_MOTION_VECTORS]); // if auto exposure is enabled use the auto exposure SRV, otherwise what the app sends. if (context->contextDescription.flags & FFX_FSR2_ENABLE_AUTO_EXPOSURE) { @@ -833,29 +1082,29 @@ static FfxErrorCode fsr2Dispatch(FfxFsr2Context_Private* context, const FfxFsr2D if (ffxFsr2ResourceIsNull(params->exposure)) { context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_EXPOSURE] = context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_EXPOSURE]; } else { - context->contextDescription.callbacks.fpRegisterResource(&context->contextDescription.callbacks, ¶ms->exposure, &context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_EXPOSURE]); + context->contextDescription.backendInterface.fpRegisterResource(&context->contextDescription.backendInterface, ¶ms->exposure, context->effectContextId, &context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_EXPOSURE]); } } - + if (params->enableAutoReactive) { - context->contextDescription.callbacks.fpRegisterResource(&context->contextDescription.callbacks, ¶ms->colorOpaqueOnly, &context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR]); + context->contextDescription.backendInterface.fpRegisterResource(&context->contextDescription.backendInterface, ¶ms->colorOpaqueOnly, context->effectContextId, &context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR]); } - + if (ffxFsr2ResourceIsNull(params->reactive)) { context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK] = context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_REACTIVITY]; } else { - context->contextDescription.callbacks.fpRegisterResource(&context->contextDescription.callbacks, ¶ms->reactive, &context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK]); + context->contextDescription.backendInterface.fpRegisterResource(&context->contextDescription.backendInterface, ¶ms->reactive, context->effectContextId, &context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK]); } - + if (ffxFsr2ResourceIsNull(params->transparencyAndComposition)) { context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_TRANSPARENCY_AND_COMPOSITION_MASK] = context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_REACTIVITY]; } else { - context->contextDescription.callbacks.fpRegisterResource(&context->contextDescription.callbacks, ¶ms->transparencyAndComposition, &context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_TRANSPARENCY_AND_COMPOSITION_MASK]); + context->contextDescription.backendInterface.fpRegisterResource(&context->contextDescription.backendInterface, ¶ms->transparencyAndComposition, context->effectContextId, &context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_TRANSPARENCY_AND_COMPOSITION_MASK]); } - context->contextDescription.callbacks.fpRegisterResource(&context->contextDescription.callbacks, ¶ms->output, &context->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT]); + context->contextDescription.backendInterface.fpRegisterResource(&context->contextDescription.backendInterface, ¶ms->output, context->effectContextId, &context->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT]); context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS] = context->srvResources[lockStatusSrvResourceIndex]; context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR] = context->srvResources[upscaledColorSrvResourceIndex]; context->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS] = context->uavResources[lockStatusUavResourceIndex]; @@ -875,9 +1124,9 @@ static FfxErrorCode fsr2Dispatch(FfxFsr2Context_Private* context, const FfxFsr2D context->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR] = context->uavResources[prevPostAlphaColorUavResourceIndex]; // actual resource size may differ from render/display resolution (e.g. due to Hw/API restrictions), so query the descriptor for UVs adjustment - const FfxResourceDescription resourceDescInputColor = context->contextDescription.callbacks.fpGetResourceDescription(&context->contextDescription.callbacks, context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_COLOR]); - const FfxResourceDescription resourceDescLockStatus = context->contextDescription.callbacks.fpGetResourceDescription(&context->contextDescription.callbacks, context->srvResources[lockStatusSrvResourceIndex]); - const FfxResourceDescription resourceDescReactiveMask = context->contextDescription.callbacks.fpGetResourceDescription(&context->contextDescription.callbacks, context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK]); + const FfxResourceDescription resourceDescInputColor = context->contextDescription.backendInterface.fpGetResourceDescription(&context->contextDescription.backendInterface, context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_COLOR]); + const FfxResourceDescription resourceDescLockStatus = context->contextDescription.backendInterface.fpGetResourceDescription(&context->contextDescription.backendInterface, context->srvResources[lockStatusSrvResourceIndex]); + const FfxResourceDescription resourceDescReactiveMask = context->contextDescription.backendInterface.fpGetResourceDescription(&context->contextDescription.backendInterface, context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK]); FFX_ASSERT(resourceDescInputColor.type == FFX_RESOURCE_TYPE_TEXTURE2D); FFX_ASSERT(resourceDescLockStatus.type == FFX_RESOURCE_TYPE_TEXTURE2D); @@ -956,16 +1205,16 @@ static FfxErrorCode fsr2Dispatch(FfxFsr2Context_Private* context, const FfxFsr2D // reactive mask bias const int32_t threadGroupWorkRegionDim = 8; - const int32_t dispatchSrcX = (context->constants.renderSize[0] + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; - const int32_t dispatchSrcY = (context->constants.renderSize[1] + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; - const int32_t dispatchDstX = (context->contextDescription.displaySize.width + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; - const int32_t dispatchDstY = (context->contextDescription.displaySize.height + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; + const int32_t dispatchSrcX = FFX_DIVIDE_ROUNDING_UP(context->constants.renderSize[0], threadGroupWorkRegionDim); + const int32_t dispatchSrcY = FFX_DIVIDE_ROUNDING_UP(context->constants.renderSize[1], threadGroupWorkRegionDim); + const int32_t dispatchDstX = FFX_DIVIDE_ROUNDING_UP(context->contextDescription.displaySize.width, threadGroupWorkRegionDim); + const int32_t dispatchDstY = FFX_DIVIDE_ROUNDING_UP(context->contextDescription.displaySize.height, threadGroupWorkRegionDim); // Clear reconstructed depth for max depth store. if (resetAccumulation) { FfxGpuJobDescription clearJob = { FFX_GPU_JOB_CLEAR_FLOAT }; - + wcscpy_s(clearJob.jobLabel, L"Zero initialize resource"); // LockStatus resource has no sign bit, callback functions are compensating for this. // Clearing the resource must follow the same logic. float clearValuesLockStatus[4]{}; @@ -974,15 +1223,15 @@ static FfxErrorCode fsr2Dispatch(FfxFsr2Context_Private* context, const FfxFsr2D memcpy(clearJob.clearJobDescriptor.color, clearValuesLockStatus, 4 * sizeof(float)); clearJob.clearJobDescriptor.target = context->srvResources[lockStatusSrvResourceIndex]; - context->contextDescription.callbacks.fpScheduleGpuJob(&context->contextDescription.callbacks, &clearJob); + context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &clearJob); const float clearValuesToZeroFloat[]{ 0.f, 0.f, 0.f, 0.f }; memcpy(clearJob.clearJobDescriptor.color, clearValuesToZeroFloat, 4 * sizeof(float)); clearJob.clearJobDescriptor.target = context->srvResources[upscaledColorSrvResourceIndex]; - context->contextDescription.callbacks.fpScheduleGpuJob(&context->contextDescription.callbacks, &clearJob); + context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &clearJob); clearJob.clearJobDescriptor.target = context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE]; - context->contextDescription.callbacks.fpScheduleGpuJob(&context->contextDescription.callbacks, &clearJob); + context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &clearJob); //if (context->contextDescription.flags & FFX_FSR2_ENABLE_AUTO_EXPOSURE) // Auto exposure always used to track luma changes in locking logic @@ -990,7 +1239,7 @@ static FfxErrorCode fsr2Dispatch(FfxFsr2Context_Private* context, const FfxFsr2D const float clearValuesExposure[]{ -1.f, 1e8f, 0.f, 0.f }; memcpy(clearJob.clearJobDescriptor.color, clearValuesExposure, 4 * sizeof(float)); clearJob.clearJobDescriptor.target = context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE]; - context->contextDescription.callbacks.fpScheduleGpuJob(&context->contextDescription.callbacks, &clearJob); + context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &clearJob); } } @@ -999,7 +1248,7 @@ static FfxErrorCode fsr2Dispatch(FfxFsr2Context_Private* context, const FfxFsr2D uint32_t workGroupOffset[2]; uint32_t numWorkGroupsAndMips[2]; uint32_t rectInfo[4] = { 0, 0, params->renderSize.width, params->renderSize.height }; - SpdSetup(dispatchThreadGroupCountXY, workGroupOffset, numWorkGroupsAndMips, rectInfo); + ffxSpdSetup(dispatchThreadGroupCountXY, workGroupOffset, numWorkGroupsAndMips, rectInfo); // downsample Fsr2SpdConstants luminancePyramidConstants; @@ -1022,10 +1271,22 @@ static FfxErrorCode fsr2Dispatch(FfxFsr2Context_Private* context, const FfxFsr2D genReactiveConsts.autoReactiveMax = params->autoReactiveMax; // initialize constantBuffers data - memcpy(&globalFsr2ConstantBuffers[FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_FSR2].data, &context->constants, globalFsr2ConstantBuffers[FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_FSR2].uint32Size * sizeof(uint32_t)); - memcpy(&globalFsr2ConstantBuffers[FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_SPD].data, &luminancePyramidConstants, globalFsr2ConstantBuffers[FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_SPD].uint32Size * sizeof(uint32_t)); - memcpy(&globalFsr2ConstantBuffers[FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_RCAS].data, &rcasConsts, globalFsr2ConstantBuffers[FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_RCAS].uint32Size * sizeof(uint32_t)); - memcpy(&globalFsr2ConstantBuffers[FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_GENREACTIVE].data, &genReactiveConsts, globalFsr2ConstantBuffers[FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_GENREACTIVE].uint32Size * sizeof(uint32_t)); + context->contextDescription.backendInterface.fpStageConstantBufferDataFunc(&context->contextDescription.backendInterface, + &context->constants, + sizeof(Fsr2Constants), + &context->constantBuffers[FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_FSR2]); + context->contextDescription.backendInterface.fpStageConstantBufferDataFunc(&context->contextDescription.backendInterface, + &luminancePyramidConstants, + sizeof(Fsr2SpdConstants), + &context->constantBuffers[FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_SPD]); + context->contextDescription.backendInterface.fpStageConstantBufferDataFunc(&context->contextDescription.backendInterface, + &rcasConsts, + sizeof(Fsr2RcasConstants), + &context->constantBuffers[FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_RCAS]); + context->contextDescription.backendInterface.fpStageConstantBufferDataFunc(&context->contextDescription.backendInterface, + &genReactiveConsts, + sizeof(Fsr2GenerateReactiveConstants), + &context->constantBuffers[FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_GENREACTIVE]); // Auto reactive if (params->enableAutoReactive) @@ -1034,6 +1295,7 @@ static FfxErrorCode fsr2Dispatch(FfxFsr2Context_Private* context, const FfxFsr2D context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK] = context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_AUTOREACTIVE]; context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_TRANSPARENCY_AND_COMPOSITION_MASK] = context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_AUTOCOMPOSITION]; } + scheduleDispatch(context, params, &context->pipelineComputeLuminancePyramid, dispatchThreadGroupCountXY[0], dispatchThreadGroupCountXY[1]); scheduleDispatch(context, params, &context->pipelineReconstructPreviousDepth, dispatchSrcX, dispatchSrcY); scheduleDispatch(context, params, &context->pipelineDepthClip, dispatchSrcX, dispatchSrcY); @@ -1048,8 +1310,8 @@ static FfxErrorCode fsr2Dispatch(FfxFsr2Context_Private* context, const FfxFsr2D // dispatch RCAS const int32_t threadGroupWorkRegionDimRCAS = 16; - const int32_t dispatchX = (context->contextDescription.displaySize.width + (threadGroupWorkRegionDimRCAS - 1)) / threadGroupWorkRegionDimRCAS; - const int32_t dispatchY = (context->contextDescription.displaySize.height + (threadGroupWorkRegionDimRCAS - 1)) / threadGroupWorkRegionDimRCAS; + const int32_t dispatchX = FFX_DIVIDE_ROUNDING_UP(context->contextDescription.displaySize.width, threadGroupWorkRegionDimRCAS); + const int32_t dispatchY = FFX_DIVIDE_ROUNDING_UP(context->contextDescription.displaySize.height, threadGroupWorkRegionDimRCAS); scheduleDispatch(context, params, &context->pipelineRCAS, dispatchX, dispatchY); } @@ -1058,10 +1320,10 @@ static FfxErrorCode fsr2Dispatch(FfxFsr2Context_Private* context, const FfxFsr2D // Fsr2MaxQueuedFrames must be an even number. FFX_STATIC_ASSERT((FSR2_MAX_QUEUED_FRAMES & 1) == 0); - context->contextDescription.callbacks.fpExecuteGpuJobs(&context->contextDescription.callbacks, commandList); + context->contextDescription.backendInterface.fpExecuteGpuJobs(&context->contextDescription.backendInterface, commandList, context->effectContextId); // release dynamic resources - context->contextDescription.callbacks.fpUnregisterResources(&context->contextDescription.callbacks); + context->contextDescription.backendInterface.fpUnregisterResources(&context->contextDescription.backendInterface, commandList, context->effectContextId); return FFX_OK; } @@ -1080,14 +1342,15 @@ FfxErrorCode ffxFsr2ContextCreate(FfxFsr2Context* context, const FfxFsr2ContextD FFX_ERROR_INVALID_POINTER); // validate that all callbacks are set for the interface - FFX_RETURN_ON_ERROR(contextDescription->callbacks.fpGetDeviceCapabilities, FFX_ERROR_INCOMPLETE_INTERFACE); - FFX_RETURN_ON_ERROR(contextDescription->callbacks.fpCreateBackendContext, FFX_ERROR_INCOMPLETE_INTERFACE); - FFX_RETURN_ON_ERROR(contextDescription->callbacks.fpDestroyBackendContext, FFX_ERROR_INCOMPLETE_INTERFACE); + FFX_RETURN_ON_ERROR(contextDescription->backendInterface.fpGetSDKVersion, FFX_ERROR_INCOMPLETE_INTERFACE); + FFX_RETURN_ON_ERROR(contextDescription->backendInterface.fpGetDeviceCapabilities, FFX_ERROR_INCOMPLETE_INTERFACE); + FFX_RETURN_ON_ERROR(contextDescription->backendInterface.fpCreateBackendContext, FFX_ERROR_INCOMPLETE_INTERFACE); + FFX_RETURN_ON_ERROR(contextDescription->backendInterface.fpDestroyBackendContext, FFX_ERROR_INCOMPLETE_INTERFACE); // if a scratch buffer is declared, then we must have a size - if (contextDescription->callbacks.scratchBuffer) { + if (contextDescription->backendInterface.scratchBuffer) { - FFX_RETURN_ON_ERROR(contextDescription->callbacks.scratchBufferSize, FFX_ERROR_INCOMPLETE_INTERFACE); + FFX_RETURN_ON_ERROR(contextDescription->backendInterface.scratchBufferSize, FFX_ERROR_INCOMPLETE_INTERFACE); } // ensure the context is large enough for the internal context. @@ -1100,6 +1363,21 @@ FfxErrorCode ffxFsr2ContextCreate(FfxFsr2Context* context, const FfxFsr2ContextD return errorCode; } +FFX_API FfxErrorCode ffxFsr2ContextGetGpuMemoryUsage(FfxFsr2Context* context, FfxEffectMemoryUsage* vramUsage) +{ + FFX_RETURN_ON_ERROR(context, FFX_ERROR_INVALID_POINTER); + FFX_RETURN_ON_ERROR(vramUsage, FFX_ERROR_INVALID_POINTER); + FfxFsr2Context_Private* contextPrivate = (FfxFsr2Context_Private*)(context); + + FFX_RETURN_ON_ERROR(contextPrivate->device, FFX_ERROR_NULL_DEVICE); + + FfxErrorCode errorCode = contextPrivate->contextDescription.backendInterface.fpGetEffectGpuMemoryUsage( + &contextPrivate->contextDescription.backendInterface, contextPrivate->effectContextId, vramUsage); + FFX_RETURN_ON_ERROR(errorCode == FFX_OK, errorCode); + + return FFX_OK; +} + FfxErrorCode ffxFsr2ContextDestroy(FfxFsr2Context* context) { FFX_RETURN_ON_ERROR( @@ -1183,18 +1461,6 @@ FfxErrorCode ffxFsr2GetRenderResolutionFromQualityMode( return FFX_OK; } -FfxErrorCode ffxFsr2ContextEnqueueRefreshPipelineRequest(FfxFsr2Context* context) -{ - FFX_RETURN_ON_ERROR( - context, - FFX_ERROR_INVALID_POINTER); - - FfxFsr2Context_Private* contextPrivate = (FfxFsr2Context_Private*)context; - contextPrivate->refreshPipelineStates = true; - - return FFX_OK; -} - int32_t ffxFsr2GetJitterPhaseCount(int32_t renderWidth, int32_t displayWidth) { const float basePhaseCount = 8.0f; @@ -1245,12 +1511,6 @@ FfxErrorCode ffxFsr2ContextGenerateReactiveMask(FfxFsr2Context* context, const F contextPrivate->device, FFX_ERROR_NULL_DEVICE); - if (contextPrivate->refreshPipelineStates) { - - createPipelineStates(contextPrivate); - contextPrivate->refreshPipelineStates = false; - } - // take a short cut to the command list FfxCommandList commandList = params->commandList; @@ -1264,27 +1524,31 @@ FfxErrorCode ffxFsr2ContextGenerateReactiveMask(FfxFsr2Context* context, const F FfxResourceInternal internalReactive = contextPrivate->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_AUTOREACTIVE]; FfxComputeJobDescription jobDescriptor = {}; - contextPrivate->contextDescription.callbacks.fpRegisterResource(&contextPrivate->contextDescription.callbacks, ¶ms->colorOpaqueOnly, &contextPrivate->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_OPAQUE_ONLY]); - contextPrivate->contextDescription.callbacks.fpRegisterResource(&contextPrivate->contextDescription.callbacks, ¶ms->colorPreUpscale, &contextPrivate->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_COLOR]); - contextPrivate->contextDescription.callbacks.fpRegisterResource(&contextPrivate->contextDescription.callbacks, ¶ms->outReactive, &contextPrivate->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_AUTOREACTIVE]); - - jobDescriptor.uavs[0] = contextPrivate->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_AUTOREACTIVE]; + contextPrivate->contextDescription.backendInterface.fpRegisterResource(&contextPrivate->contextDescription.backendInterface, ¶ms->colorOpaqueOnly, contextPrivate->effectContextId, &contextPrivate->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_OPAQUE_ONLY]); + contextPrivate->contextDescription.backendInterface.fpRegisterResource(&contextPrivate->contextDescription.backendInterface, ¶ms->colorPreUpscale, contextPrivate->effectContextId, &contextPrivate->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_COLOR]); + contextPrivate->contextDescription.backendInterface.fpRegisterResource(&contextPrivate->contextDescription.backendInterface, ¶ms->outReactive, contextPrivate->effectContextId, &contextPrivate->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_AUTOREACTIVE]); - wcscpy_s(jobDescriptor.srvNames[0], pipeline->srvResourceBindings[0].name); - wcscpy_s(jobDescriptor.srvNames[1], pipeline->srvResourceBindings[1].name); - wcscpy_s(jobDescriptor.uavNames[0], pipeline->uavResourceBindings[0].name); + jobDescriptor.uavTextures[0].resource = contextPrivate->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_AUTOREACTIVE]; + +#ifdef FFX_DEBUG + wcscpy_s(jobDescriptor.srvTextures[0].name, pipeline->srvTextureBindings[0].name); + wcscpy_s(jobDescriptor.srvTextures[1].name, pipeline->srvTextureBindings[1].name); + wcscpy_s(jobDescriptor.uavTextures[0].name, pipeline->uavTextureBindings[0].name); +#endif jobDescriptor.dimensions[0] = dispatchSrcX; jobDescriptor.dimensions[1] = dispatchSrcY; jobDescriptor.dimensions[2] = 1; jobDescriptor.pipeline = *pipeline; - for (uint32_t currentShaderResourceViewIndex = 0; currentShaderResourceViewIndex < pipeline->srvCount; ++currentShaderResourceViewIndex) { + for (uint32_t currentShaderResourceViewIndex = 0; currentShaderResourceViewIndex < pipeline->srvTextureCount; ++currentShaderResourceViewIndex) { - const uint32_t currentResourceId = pipeline->srvResourceBindings[currentShaderResourceViewIndex].resourceIdentifier; + const uint32_t currentResourceId = pipeline->srvTextureBindings[currentShaderResourceViewIndex].resourceIdentifier; const FfxResourceInternal currentResource = contextPrivate->srvResources[currentResourceId]; - jobDescriptor.srvs[currentShaderResourceViewIndex] = currentResource; - wcscpy_s(jobDescriptor.srvNames[currentShaderResourceViewIndex], pipeline->srvResourceBindings[currentShaderResourceViewIndex].name); + jobDescriptor.srvTextures[currentShaderResourceViewIndex].resource = currentResource; +#ifdef FFX_DEBUG + wcscpy_s(jobDescriptor.srvTextures[currentShaderResourceViewIndex].name, pipeline->srvTextureBindings[currentShaderResourceViewIndex].name); +#endif } Fsr2GenerateReactiveConstants constants = {}; @@ -1293,34 +1557,33 @@ FfxErrorCode ffxFsr2ContextGenerateReactiveMask(FfxFsr2Context* context, const F constants.binaryValue = params->binaryValue; constants.flags = params->flags; - jobDescriptor.cbs[0].uint32Size = sizeof(constants); - memcpy(&jobDescriptor.cbs[0].data, &constants, sizeof(constants)); - wcscpy_s(jobDescriptor.cbNames[0], pipeline->cbResourceBindings[0].name); - + contextPrivate->contextDescription.backendInterface.fpStageConstantBufferDataFunc(&contextPrivate->contextDescription.backendInterface, + &constants, + sizeof(constants), + &jobDescriptor.cbs[0]); +#ifdef FFX_DEBUG + wcscpy_s(jobDescriptor.cbNames[0], pipeline->constantBufferBindings[0].name); +#endif FfxGpuJobDescription dispatchJob = { FFX_GPU_JOB_COMPUTE }; + wcscpy_s(dispatchJob.jobLabel, pipeline->name); dispatchJob.computeJobDescriptor = jobDescriptor; - contextPrivate->contextDescription.callbacks.fpScheduleGpuJob(&contextPrivate->contextDescription.callbacks, &dispatchJob); + //contextPrivate->contextDescription.backendInterface.fpScheduleGpuJob(&contextPrivate->contextDescription.backendInterface, &dispatchJob); - contextPrivate->contextDescription.callbacks.fpExecuteGpuJobs(&contextPrivate->contextDescription.callbacks, commandList); + contextPrivate->contextDescription.backendInterface.fpExecuteGpuJobs( + &contextPrivate->contextDescription.backendInterface, commandList, contextPrivate->effectContextId); // restore internal reactive contextPrivate->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_AUTOREACTIVE] = internalReactive; + // release dynamic resources + contextPrivate->contextDescription.backendInterface.fpUnregisterResources(&contextPrivate->contextDescription.backendInterface, commandList, contextPrivate->effectContextId); + return FFX_OK; } static FfxErrorCode generateReactiveMaskInternal(FfxFsr2Context_Private* contextPrivate, const FfxFsr2DispatchDescription* params) { - if (contextPrivate->refreshPipelineStates) { - - createPipelineStates(contextPrivate); - contextPrivate->refreshPipelineStates = false; - } - - // take a short cut to the command list - FfxCommandList commandList = params->commandList; - FfxPipelineState* pipeline = &contextPrivate->pipelineTcrAutogenerate; const int32_t threadGroupWorkRegionDim = 8; @@ -1328,42 +1591,60 @@ static FfxErrorCode generateReactiveMaskInternal(FfxFsr2Context_Private* context const int32_t dispatchSrcY = (params->renderSize.height + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; FfxComputeJobDescription jobDescriptor = {}; - contextPrivate->contextDescription.callbacks.fpRegisterResource(&contextPrivate->contextDescription.callbacks, ¶ms->colorOpaqueOnly, &contextPrivate->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_OPAQUE_ONLY]); - contextPrivate->contextDescription.callbacks.fpRegisterResource(&contextPrivate->contextDescription.callbacks, ¶ms->color, &contextPrivate->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_COLOR]); - - jobDescriptor.uavs[0] = contextPrivate->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_AUTOREACTIVE]; - jobDescriptor.uavs[1] = contextPrivate->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_AUTOCOMPOSITION]; - jobDescriptor.uavs[2] = contextPrivate->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR]; - jobDescriptor.uavs[3] = contextPrivate->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR]; - - wcscpy_s(jobDescriptor.uavNames[0], pipeline->uavResourceBindings[0].name); - wcscpy_s(jobDescriptor.uavNames[1], pipeline->uavResourceBindings[1].name); - wcscpy_s(jobDescriptor.uavNames[2], pipeline->uavResourceBindings[2].name); - wcscpy_s(jobDescriptor.uavNames[3], pipeline->uavResourceBindings[3].name); + contextPrivate->contextDescription.backendInterface.fpRegisterResource(&contextPrivate->contextDescription.backendInterface, ¶ms->colorOpaqueOnly, contextPrivate->effectContextId, &contextPrivate->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_OPAQUE_ONLY]); + contextPrivate->contextDescription.backendInterface.fpRegisterResource(&contextPrivate->contextDescription.backendInterface, ¶ms->color, contextPrivate->effectContextId, &contextPrivate->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_COLOR]); + + jobDescriptor.uavTextures[0].resource = contextPrivate->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_AUTOREACTIVE]; + jobDescriptor.uavTextures[1].resource = contextPrivate->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_AUTOCOMPOSITION]; + jobDescriptor.uavTextures[2].resource = contextPrivate->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR]; + jobDescriptor.uavTextures[3].resource = contextPrivate->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR]; + +#ifdef FFX_DEBUG + wcscpy_s(jobDescriptor.uavTextures[0].name, pipeline->uavTextureBindings[0].name); + wcscpy_s(jobDescriptor.uavTextures[1].name, pipeline->uavTextureBindings[1].name); + wcscpy_s(jobDescriptor.uavTextures[2].name, pipeline->uavTextureBindings[2].name); + wcscpy_s(jobDescriptor.uavTextures[3].name, pipeline->uavTextureBindings[3].name); +#endif jobDescriptor.dimensions[0] = dispatchSrcX; jobDescriptor.dimensions[1] = dispatchSrcY; jobDescriptor.dimensions[2] = 1; jobDescriptor.pipeline = *pipeline; - for (uint32_t currentShaderResourceViewIndex = 0; currentShaderResourceViewIndex < pipeline->srvCount; ++currentShaderResourceViewIndex) { + for (uint32_t currentShaderResourceViewIndex = 0; currentShaderResourceViewIndex < pipeline->srvTextureCount; ++currentShaderResourceViewIndex) { - const uint32_t currentResourceId = pipeline->srvResourceBindings[currentShaderResourceViewIndex].resourceIdentifier; + const uint32_t currentResourceId = pipeline->srvTextureBindings[currentShaderResourceViewIndex].resourceIdentifier; const FfxResourceInternal currentResource = contextPrivate->srvResources[currentResourceId]; - jobDescriptor.srvs[currentShaderResourceViewIndex] = currentResource; - wcscpy_s(jobDescriptor.srvNames[currentShaderResourceViewIndex], pipeline->srvResourceBindings[currentShaderResourceViewIndex].name); + jobDescriptor.srvTextures[currentShaderResourceViewIndex].resource = currentResource; +#ifdef FFX_DEBUG + wcscpy_s(jobDescriptor.srvTextures[currentShaderResourceViewIndex].name, pipeline->srvTextureBindings[currentShaderResourceViewIndex].name); +#endif } for (uint32_t currentRootConstantIndex = 0; currentRootConstantIndex < pipeline->constCount; ++currentRootConstantIndex) { - wcscpy_s(jobDescriptor.cbNames[currentRootConstantIndex], pipeline->cbResourceBindings[currentRootConstantIndex].name); - jobDescriptor.cbs[currentRootConstantIndex] = globalFsr2ConstantBuffers[pipeline->cbResourceBindings[currentRootConstantIndex].resourceIdentifier]; - jobDescriptor.cbSlotIndex[currentRootConstantIndex] = pipeline->cbResourceBindings[currentRootConstantIndex].slotIndex; +#ifdef FFX_DEBUG + wcscpy_s(jobDescriptor.cbNames[currentRootConstantIndex], pipeline->constantBufferBindings[currentRootConstantIndex].name); +#endif + jobDescriptor.cbs[currentRootConstantIndex] = contextPrivate->constantBuffers[pipeline->constantBufferBindings[currentRootConstantIndex].resourceIdentifier]; + //jobDescriptor.cbSlotIndex[currentRootConstantIndex] = pipeline->constantBufferBindings[currentRootConstantIndex].slotIndex; } FfxGpuJobDescription dispatchJob = { FFX_GPU_JOB_COMPUTE }; + wcscpy_s(dispatchJob.jobLabel, pipeline->name); dispatchJob.computeJobDescriptor = jobDescriptor; - contextPrivate->contextDescription.callbacks.fpScheduleGpuJob(&contextPrivate->contextDescription.callbacks, &dispatchJob); + contextPrivate->contextDescription.backendInterface.fpScheduleGpuJob(&contextPrivate->contextDescription.backendInterface, &dispatchJob); + + return FFX_OK; +} + +FFX_API FfxVersionNumber ffxFsr2GetEffectVersion() +{ + return FFX_SDK_MAKE_VERSION(FFX_FSR2_VERSION_MAJOR, FFX_FSR2_VERSION_MINOR, FFX_FSR2_VERSION_PATCH); +} +FFX_API FfxErrorCode ffxFsr2SetGlobalDebugMessage(ffxMessageCallback fpMessage, uint32_t debugLevel) +{ + ffxSetPrintMessageCallback(fpMessage, debugLevel); return FFX_OK; } diff --git a/thirdparty/amd-fsr2/ffx_fsr2.h b/thirdparty/amd-ffx/ffx_fsr2.h similarity index 77% rename from thirdparty/amd-fsr2/ffx_fsr2.h rename to thirdparty/amd-ffx/ffx_fsr2.h index dfcd4caf3503..86d89b3166ef 100644 --- a/thirdparty/amd-fsr2/ffx_fsr2.h +++ b/thirdparty/amd-ffx/ffx_fsr2.h @@ -1,13 +1,14 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (C) 2024 Advanced Micro Devices, Inc. // // Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal +// of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell // copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: +// furnished to do so, subject to the following conditions : +// // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // @@ -19,38 +20,80 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. - -// @defgroup FSR2 - #pragma once // Include the interface for the backend of the FSR2 API. -#include "ffx_fsr2_interface.h" +#include "ffx_interface.h" + +/// @defgroup ffxFsr2 FidelityFX FSR2 +/// FidelityFX Super Resolution 2 runtime library +/// +/// @ingroup SDKComponents /// FidelityFX Super Resolution 2 major version. /// -/// @ingroup FSR2 +/// @ingroup ffxFsr2 #define FFX_FSR2_VERSION_MAJOR (2) /// FidelityFX Super Resolution 2 minor version. /// -/// @ingroup FSR2 -#define FFX_FSR2_VERSION_MINOR (2) +/// @ingroup ffxFsr2 +#define FFX_FSR2_VERSION_MINOR (3) /// FidelityFX Super Resolution 2 patch version. /// -/// @ingroup FSR2 -#define FFX_FSR2_VERSION_PATCH (1) +/// @ingroup ffxFsr2 +#define FFX_FSR2_VERSION_PATCH (3) + +/// FidelityFX Super Resolution 2 context count +/// +/// Defines the number of internal effect contexts required by FSR2 +/// +/// @ingroup ffxFsr2 +#define FFX_FSR2_CONTEXT_COUNT 1 /// The size of the context specified in 32bit values. /// -/// @ingroup FSR2 -#define FFX_FSR2_CONTEXT_SIZE (16536) +/// @ingroup ffxFsr2 +#define FFX_FSR2_CONTEXT_SIZE (FFX_SDK_DEFAULT_CONTEXT_SIZE) #if defined(__cplusplus) extern "C" { #endif // #if defined(__cplusplus) +/// An enumeration of all the passes which constitute the FSR2 algorithm. +/// +/// FSR2 is implemented as a composite of several compute passes each +/// computing a key part of the final result. Each call to the +/// FfxFsr2ScheduleGpuJobFunc callback function will +/// correspond to a single pass included in FfxFsr2Pass. For a +/// more comprehensive description of each pass, please refer to the FSR2 +/// reference documentation. +/// +/// Please note in some cases e.g.: FFX_FSR2_PASS_ACCUMULATE +/// and FFX_FSR2_PASS_ACCUMULATE_SHARPEN either one pass or the +/// other will be used (they are mutually exclusive). The choice of which will +/// depend on the way the FfxFsr2Context is created and the +/// precise contents of FfxFsr2DispatchParamters each time a call +/// is made to ffxFsr2ContextDispatch. +/// +/// @ingroup ffxFsr2 +typedef enum FfxFsr2Pass +{ + + FFX_FSR2_PASS_DEPTH_CLIP = 0, ///< A pass which performs depth clipping. + FFX_FSR2_PASS_RECONSTRUCT_PREVIOUS_DEPTH = 1, ///< A pass which performs reconstruction of previous frame's depth. + FFX_FSR2_PASS_LOCK = 2, ///< A pass which calculates pixel locks. + FFX_FSR2_PASS_ACCUMULATE = 3, ///< A pass which performs upscaling. + FFX_FSR2_PASS_ACCUMULATE_SHARPEN = 4, ///< A pass which performs upscaling when sharpening is used. + FFX_FSR2_PASS_RCAS = 5, ///< A pass which performs sharpening. + FFX_FSR2_PASS_COMPUTE_LUMINANCE_PYRAMID = 6, ///< A pass which generates the luminance mipmap chain for the current frame. + FFX_FSR2_PASS_GENERATE_REACTIVE = 7, ///< An optional pass to generate a reactive mask. + FFX_FSR2_PASS_TCR_AUTOGENERATE = 8, ///< An optional pass to automatically generate transparency/composition and reactive masks. + + FFX_FSR2_PASS_COUNT ///< The number of passes performed by FSR2. +} FfxFsr2Pass; + /// An enumeration of all the quality modes supported by FidelityFX Super /// Resolution 2 upscaling. /// @@ -70,7 +113,7 @@ extern "C" { /// of using this scaling mode before deciding if you should include it in your /// application. /// -/// @ingroup FSR2 +/// @ingroup ffxFsr2 typedef enum FfxFsr2QualityMode { FFX_FSR2_QUALITY_MODE_QUALITY = 1, ///< Perform upscaling with a per-dimension upscaling ratio of 1.5x. @@ -82,7 +125,7 @@ typedef enum FfxFsr2QualityMode { /// An enumeration of bit flags used when creating a /// FfxFsr2Context. See FfxFsr2ContextDescription. /// -/// @ingroup FSR2 +/// @ingroup ffxFsr2 typedef enum FfxFsr2InitializationFlagBits { FFX_FSR2_ENABLE_HIGH_DYNAMIC_RANGE = (1<<0), ///< A bit indicating if the input color data provided is using a high-dynamic range. @@ -96,25 +139,36 @@ typedef enum FfxFsr2InitializationFlagBits { FFX_FSR2_ENABLE_DEBUG_CHECKING = (1<<8), ///< A bit indicating that the runtime should check some API values and report issues. } FfxFsr2InitializationFlagBits; +/// Pass a string message +/// +/// Used for debug messages. +/// +/// @param [in] type The type of message. +/// @param [in] message A string message to pass. +/// +/// +/// @ingroup ffxFsr2 +typedef void(*FfxFsr2Message)( + FfxMsgType type, + const wchar_t* message); + /// A structure encapsulating the parameters required to initialize FidelityFX /// Super Resolution 2 upscaling. /// -/// @ingroup FSR2 +/// @ingroup ffxFsr2 typedef struct FfxFsr2ContextDescription { uint32_t flags; ///< A collection of FfxFsr2InitializationFlagBits. FfxDimensions2D maxRenderSize; ///< The maximum size that rendering will be performed at. FfxDimensions2D displaySize; ///< The size of the presentation resolution targeted by the upscaling process. - FfxFsr2Interface callbacks; ///< A set of pointers to the backend implementation for FSR 2.0. - FfxDevice device; ///< The abstracted device which is passed to some callback functions. - - FfxFsr2Message fpMessage; ///< A pointer to a function that can recieve messages from the runtime. + FfxFsr2Message fpMessage; ///< A pointer to a function that can receive messages from the runtime. + FfxInterface backendInterface; ///< A set of pointers to the backend implementation for FidelityFX SDK } FfxFsr2ContextDescription; /// A structure encapsulating the parameters for dispatching the various passes /// of FidelityFX Super Resolution 2. /// -/// @ingroup FSR2 +/// @ingroup ffxFsr2 typedef struct FfxFsr2DispatchDescription { FfxCommandList commandList; ///< The FfxCommandList to record FSR2 rendering commands into. @@ -146,12 +200,15 @@ typedef struct FfxFsr2DispatchDescription { float autoReactiveScale; ///< A value to scale the reactive mask float autoReactiveMax; ///< A value to clamp the reactive mask - float reprojectionMatrix[16]; ///< The matrix used for reprojecting pixels with invalid motion vectors by using the depth. + // GODOT BEGINS + float reprojectionMatrix[16]; + // GODOT ENDS + } FfxFsr2DispatchDescription; /// A structure encapsulating the parameters for automatic generation of a reactive mask /// -/// @ingroup FSR2 +/// @ingroup ffxFsr2 typedef struct FfxFsr2GenerateReactiveDescription { FfxCommandList commandList; ///< The FfxCommandList to record FSR2 rendering commands into. @@ -176,12 +233,13 @@ typedef struct FfxFsr2GenerateReactiveDescription { /// It is therefore recommended that the GPU is idle before destroying the /// FSR2 context. /// -/// @ingroup FSR2 -typedef struct FfxFsr2Context { - - uint32_t data[FFX_FSR2_CONTEXT_SIZE]; ///< An opaque set of uint32_t which contain the data for the context. +/// @ingroup ffxFsr2 +typedef struct FfxFsr2Context +{ + uint32_t data[FFX_FSR2_CONTEXT_SIZE]; ///< An opaque set of uint32_t which contain the data for the context. } FfxFsr2Context; + /// Create a FidelityFX Super Resolution 2 context from the parameters /// programmed to the FfxFsr2CreateParams structure. /// @@ -213,8 +271,8 @@ typedef struct FfxFsr2Context { /// disabled by a user. To destroy the FSR2 context you should call /// ffxFsr2ContextDestroy. /// -/// @param [out] context A pointer to a FfxFsr2Context structure to populate. -/// @param [in] contextDescription A pointer to a FfxFsr2ContextDescription structure. +/// @param [out] pContext A pointer to a FfxFsr2Context structure to populate. +/// @param [in] pContextDescription A pointer to a FfxFsr2ContextDescription structure. /// /// @retval /// FFX_OK The operation completed successfully. @@ -225,8 +283,21 @@ typedef struct FfxFsr2Context { /// @retval /// FFX_ERROR_BACKEND_API_ERROR The operation failed because of an error returned from the backend. /// -/// @ingroup FSR2 -FFX_API FfxErrorCode ffxFsr2ContextCreate(FfxFsr2Context* context, const FfxFsr2ContextDescription* contextDescription); +/// @ingroup ffxFsr2 +FFX_API FfxErrorCode ffxFsr2ContextCreate(FfxFsr2Context* pContext, const FfxFsr2ContextDescription* pContextDescription); + +/// Get GPU memory usage of the FidelityFX Super Resolution context. +/// +/// @param [in] pContext A pointer to a FfxFsr2Context structure. +/// @param [out] pVramUsage A pointer to a FfxEffectMemoryUsage structure. +/// +/// @retval +/// FFX_OK The operation completed successfully. +/// @retval +/// FFX_ERROR_CODE_NULL_POINTER The operation failed because either context or vramUsage were NULL. +/// +/// @ingroup ffxFsr2 +FFX_API FfxErrorCode ffxFsr2ContextGetGpuMemoryUsage(FfxFsr2Context* pContext, FfxEffectMemoryUsage* pVramUsage); /// Dispatch the various passes that constitute FidelityFX Super Resolution 2. /// @@ -252,8 +323,8 @@ FFX_API FfxErrorCode ffxFsr2ContextCreate(FfxFsr2Context* context, const FfxFsr2 /// documentation for ffxFsr2GetJitterOffset as well as the /// accompanying overview documentation for FSR2. /// -/// @param [in] context A pointer to a FfxFsr2Context structure. -/// @param [in] dispatchDescription A pointer to a FfxFsr2DispatchDescription structure. +/// @param [in] pContext A pointer to a FfxFsr2Context structure. +/// @param [in] pDispatchDescription A pointer to a FfxFsr2DispatchDescription structure. /// /// @retval /// FFX_OK The operation completed successfully. @@ -266,31 +337,31 @@ FFX_API FfxErrorCode ffxFsr2ContextCreate(FfxFsr2Context* context, const FfxFsr2 /// @retval /// FFX_ERROR_BACKEND_API_ERROR The operation failed because of an error returned from the backend. /// -/// @ingroup FSR2 -FFX_API FfxErrorCode ffxFsr2ContextDispatch(FfxFsr2Context* context, const FfxFsr2DispatchDescription* dispatchDescription); +/// @ingroup ffxFsr2 +FFX_API FfxErrorCode ffxFsr2ContextDispatch(FfxFsr2Context* pContext, const FfxFsr2DispatchDescription* pDispatchDescription); /// A helper function generate a Reactive mask from an opaque only texure and one containing translucent objects. /// -/// @param [in] context A pointer to a FfxFsr2Context structure. -/// @param [in] params A pointer to a FfxFsr2GenerateReactiveDescription structure +/// @param [in] pContext A pointer to a FfxFsr2Context structure. +/// @param [in] pParams A pointer to a FfxFsr2GenerateReactiveDescription structure /// /// @retval /// FFX_OK The operation completed successfully. /// -/// @ingroup FSR2 -FFX_API FfxErrorCode ffxFsr2ContextGenerateReactiveMask(FfxFsr2Context* context, const FfxFsr2GenerateReactiveDescription* params); +/// @ingroup ffxFsr2 +FFX_API FfxErrorCode ffxFsr2ContextGenerateReactiveMask(FfxFsr2Context* pContext, const FfxFsr2GenerateReactiveDescription* pParams); /// Destroy the FidelityFX Super Resolution context. /// -/// @param [out] context A pointer to a FfxFsr2Context structure to destroy. +/// @param [out] pContext A pointer to a FfxFsr2Context structure to destroy. /// /// @retval /// FFX_OK The operation completed successfully. /// @retval /// FFX_ERROR_CODE_NULL_POINTER The operation failed because either context was NULL. /// -/// @ingroup FSR2 -FFX_API FfxErrorCode ffxFsr2ContextDestroy(FfxFsr2Context* context); +/// @ingroup ffxFsr2 +FFX_API FfxErrorCode ffxFsr2ContextDestroy(FfxFsr2Context* pContext); /// Get the upscale ratio from the quality mode. /// @@ -312,7 +383,7 @@ FFX_API FfxErrorCode ffxFsr2ContextDestroy(FfxFsr2Context* context); /// The upscaling the per-dimension upscaling ratio for /// qualityMode according to the table above. /// -/// @ingroup FSR2 +/// @ingroup ffxFsr2 FFX_API float ffxFsr2GetUpscaleRatioFromQualityMode(FfxFsr2QualityMode qualityMode); /// A helper function to calculate the rendering resolution from a target @@ -321,8 +392,8 @@ FFX_API float ffxFsr2GetUpscaleRatioFromQualityMode(FfxFsr2QualityMode qualityMo /// This function applies the scaling factor returned by /// ffxFsr2GetUpscaleRatioFromQualityMode to each dimension. /// -/// @param [out] renderWidth A pointer to a uint32_t which will hold the calculated render resolution width. -/// @param [out] renderHeight A pointer to a uint32_t which will hold the calculated render resolution height. +/// @param [out] pRenderWidth A pointer to a uint32_t which will hold the calculated render resolution width. +/// @param [out] pRenderHeight A pointer to a uint32_t which will hold the calculated render resolution height. /// @param [in] displayWidth The target display resolution width. /// @param [in] displayHeight The target display resolution height. /// @param [in] qualityMode The desired quality mode for FSR 2 upscaling. @@ -334,10 +405,10 @@ FFX_API float ffxFsr2GetUpscaleRatioFromQualityMode(FfxFsr2QualityMode qualityMo /// @retval /// FFX_ERROR_INVALID_ENUM An invalid quality mode was specified. /// -/// @ingroup FSR2 +/// @ingroup ffxFsr2 FFX_API FfxErrorCode ffxFsr2GetRenderResolutionFromQualityMode( - uint32_t* renderWidth, - uint32_t* renderHeight, + uint32_t* pRenderWidth, + uint32_t* pRenderHeight, uint32_t displayWidth, uint32_t displayHeight, FfxFsr2QualityMode qualityMode); @@ -348,7 +419,7 @@ FFX_API FfxErrorCode ffxFsr2GetRenderResolutionFromQualityMode( /// For more detailed information about the application of camera jitter to /// your application's rendering please refer to the /// ffxFsr2GetJitterOffset function. -/// +/// /// The table below shows the jitter phase count which this function /// would return for each of the quality presets. /// @@ -366,7 +437,7 @@ FFX_API FfxErrorCode ffxFsr2GetRenderResolutionFromQualityMode( /// @returns /// The jitter phase count for the scaling factor between renderWidth and displayWidth. /// -/// @ingroup FSR2 +/// @ingroup ffxFsr2 FFX_API int32_t ffxFsr2GetJitterPhaseCount(int32_t renderWidth, int32_t displayWidth); /// A helper function to calculate the subpixel jitter offset. @@ -398,12 +469,12 @@ FFX_API int32_t ffxFsr2GetJitterPhaseCount(int32_t renderWidth, int32_t displayW /// float jitterX = 0; /// float jitterY = 0; /// ffxFsr2GetJitterOffset(&jitterX, &jitterY, index, jitterPhaseCount); -/// +/// /// const float jitterX = 2.0f * jitterX / (float)renderWidth; /// const float jitterY = -2.0f * jitterY / (float)renderHeight; /// const Matrix4 jitterTranslationMatrix = translateMatrix(Matrix3::identity, Vector3(jitterX, jitterY, 0)); /// const Matrix4 jitteredProjectionMatrix = jitterTranslationMatrix * projectionMatrix; -/// +/// /// Jitter should be applied to all rendering. This includes opaque, alpha /// transparent, and raytraced objects. For rasterized objects, the sub-pixel /// jittering values calculated by the iffxFsr2GetJitterOffset @@ -411,31 +482,31 @@ FFX_API int32_t ffxFsr2GetJitterPhaseCount(int32_t renderWidth, int32_t displayW /// used to perform transformations during vertex shading. For raytraced /// rendering, the sub-pixel jitter should be applied to the ray's origin, /// often the camera's position. -/// +/// /// Whether you elect to use the ffxFsr2GetJitterOffset function /// or your own sequence generator, you must program the /// jitterOffset field of the /// FfxFsr2DispatchParameters structure in order to inform FSR2 /// of the jitter offset that has been applied in order to render each frame. -/// +/// /// If not using the recommended ffxFsr2GetJitterOffset function, /// care should be taken that your jitter sequence never generates a null vector; /// that is value of 0 in both the X and Y dimensions. /// -/// @param [out] outX A pointer to a float which will contain the subpixel jitter offset for the x dimension. -/// @param [out] outY A pointer to a float which will contain the subpixel jitter offset for the y dimension. +/// @param [out] pOutX A pointer to a float which will contain the subpixel jitter offset for the x dimension. +/// @param [out] pOutY A pointer to a float which will contain the subpixel jitter offset for the y dimension. /// @param [in] index The index within the jitter sequence. /// @param [in] phaseCount The length of jitter phase. See ffxFsr2GetJitterPhaseCount. -/// +/// /// @retval /// FFX_OK The operation completed successfully. /// @retval /// FFX_ERROR_INVALID_POINTER Either outX or outY was NULL. /// @retval /// FFX_ERROR_INVALID_ARGUMENT Argument phaseCount must be greater than 0. -/// -/// @ingroup FSR2 -FFX_API FfxErrorCode ffxFsr2GetJitterOffset(float* outX, float* outY, int32_t index, int32_t phaseCount); +/// +/// @ingroup ffxFsr2 +FFX_API FfxErrorCode ffxFsr2GetJitterOffset(float* pOutX, float* pOutY, int32_t index, int32_t phaseCount); /// A helper function to check if a resource is /// FFX_FSR2_RESOURCE_IDENTIFIER_NULL. @@ -447,9 +518,26 @@ FFX_API FfxErrorCode ffxFsr2GetJitterOffset(float* outX, float* outY, int32_t in /// @returns /// false The resource was FFX_FSR2_RESOURCE_IDENTIFIER_NULL. /// -/// @ingroup FSR2 +/// @ingroup ffxFsr2 FFX_API bool ffxFsr2ResourceIsNull(FfxResource resource); +/// Queries the effect version number. +/// +/// @returns +/// The SDK version the effect was built with. +/// +/// @ingroup ffxFsr2 +FFX_API FfxVersionNumber ffxFsr2GetEffectVersion(); + +/// Set global debug message settings +/// +/// @param [in] fpMessage A ffxMessageCallback +/// @param [in] debugLevel An unsigned integer. Unimplemented. +/// @retval +/// FFX_OK The operation completed successfully. +/// +/// @ingroup FRAMEINTERPOLATION +FFX_API FfxErrorCode ffxFsr2SetGlobalDebugMessage(ffxMessageCallback fpMessage, uint32_t debugLevel); #if defined(__cplusplus) } #endif // #if defined(__cplusplus) diff --git a/thirdparty/amd-fsr2/ffx_fsr2_maximum_bias.h b/thirdparty/amd-ffx/ffx_fsr2_maximum_bias.h similarity index 89% rename from thirdparty/amd-fsr2/ffx_fsr2_maximum_bias.h rename to thirdparty/amd-ffx/ffx_fsr2_maximum_bias.h index 5fdbd0cdcd00..a6847e357f71 100644 --- a/thirdparty/amd-fsr2/ffx_fsr2_maximum_bias.h +++ b/thirdparty/amd-ffx/ffx_fsr2_maximum_bias.h @@ -1,13 +1,14 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. -// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// // Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal +// of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell // copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: +// furnished to do so, subject to the following conditions : +// // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // @@ -23,8 +24,8 @@ #pragma once -static const int FFX_FSR2_MAXIMUM_BIAS_TEXTURE_WIDTH = 16; -static const int FFX_FSR2_MAXIMUM_BIAS_TEXTURE_HEIGHT = 16; +static const int32_t FFX_FSR2_MAXIMUM_BIAS_TEXTURE_WIDTH = 16; +static const int32_t FFX_FSR2_MAXIMUM_BIAS_TEXTURE_HEIGHT = 16; static const float ffxFsr2MaximumBias[] = { 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 1.876f, 1.809f, 1.772f, 1.753f, 1.748f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f, 1.869f, 1.801f, 1.764f, 1.745f, 1.739f, diff --git a/thirdparty/amd-fsr2/ffx_fsr2_private.h b/thirdparty/amd-ffx/ffx_fsr2_private.h similarity index 65% rename from thirdparty/amd-fsr2/ffx_fsr2_private.h rename to thirdparty/amd-ffx/ffx_fsr2_private.h index 8a9aec577886..d2840aae7bc7 100644 --- a/thirdparty/amd-fsr2/ffx_fsr2_private.h +++ b/thirdparty/amd-ffx/ffx_fsr2_private.h @@ -1,13 +1,14 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (C) 2024 Advanced Micro Devices, Inc. // // Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal +// of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell // copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: +// furnished to do so, subject to the following conditions : +// // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // @@ -20,8 +21,29 @@ // THE SOFTWARE. #pragma once +#include "gpu/fsr2/ffx_fsr2_resources.h" + +/// An enumeration of all the permutations that can be passed to the FSR2 algorithm. +/// +/// FSR2 features are organized through a set of pre-defined compile +/// permutation options that need to be specified. Which shader blob +/// is returned for pipeline creation will be determined by what combination +/// of shader permutations are enabled. +/// +/// @ingroup FSR2 +typedef enum Fs2ShaderPermutationOptions +{ + FSR2_SHADER_PERMUTATION_USE_LANCZOS_TYPE = (1 << 0), ///< Off means reference, On means LUT + FSR2_SHADER_PERMUTATION_HDR_COLOR_INPUT = (1 << 1), ///< Enables the HDR code path + FSR2_SHADER_PERMUTATION_LOW_RES_MOTION_VECTORS = (1 << 2), ///< Indicates low resolution motion vectors provided + FSR2_SHADER_PERMUTATION_JITTER_MOTION_VECTORS = (1 << 3), ///< Indicates motion vectors were generated with jitter + FSR2_SHADER_PERMUTATION_DEPTH_INVERTED = (1 << 4), ///< Indicates input resources were generated with inverted depth + FSR2_SHADER_PERMUTATION_ENABLE_SHARPENING = (1 << 5), ///< Enables a supplementary sharpening pass + FSR2_SHADER_PERMUTATION_FORCE_WAVE64 = (1 << 6), ///< doesn't map to a define, selects different table + FSR2_SHADER_PERMUTATION_ALLOW_FP16 = (1 << 7), ///< Enables fast math computations where possible +} Fs2ShaderPermutationOptions; -// Constants for FSR2 DX12 dispatches. Must be kept in sync with cbFSR2 in ffx_fsr2_callbacks_hlsl.h +// Constants for FSR2 dispatches. Must be kept in sync with cbFSR2 in ffx_fsr2_callbacks_hlsl.h typedef struct Fsr2Constants { int32_t renderSize[2]; @@ -31,7 +53,7 @@ typedef struct Fsr2Constants { int32_t lumaMipDimensions[2]; int32_t lumaMipLevelToUse; int32_t frameIndex; - + float deviceToViewDepth[4]; float jitterOffset[2]; float motionVectorScale[2]; @@ -45,20 +67,22 @@ typedef struct Fsr2Constants { float dynamicResChangeFactor; float viewSpaceToMetersFactor; + // GODOT BEGINS float pad; float reprojectionMatrix[16]; + // GODOT ENDS } Fsr2Constants; struct FfxFsr2ContextDescription; struct FfxDeviceCapabilities; struct FfxPipelineState; -struct FfxResource; // FfxFsr2Context_Private // The private implementation of the FSR2 context. typedef struct FfxFsr2Context_Private { FfxFsr2ContextDescription contextDescription; + FfxUInt32 effectContextId; Fsr2Constants constants; FfxDevice device; FfxDeviceCapabilities deviceCapabilities; @@ -71,13 +95,13 @@ typedef struct FfxFsr2Context_Private { FfxPipelineState pipelineComputeLuminancePyramid; FfxPipelineState pipelineGenerateReactive; FfxPipelineState pipelineTcrAutogenerate; + FfxConstantBuffer constantBuffers[4]; // 2 arrays of resources, as e.g. FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS will use different resources when bound as SRV vs when bound as UAV FfxResourceInternal srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_COUNT]; FfxResourceInternal uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_COUNT]; bool firstExecution; - bool refreshPipelineStates; uint32_t resourceFrameIndex; float previousJitterOffset[2]; int32_t jitterPhaseCountRemaining; diff --git a/thirdparty/amd-ffx/ffx_fsr3.cpp b/thirdparty/amd-ffx/ffx_fsr3.cpp new file mode 100644 index 000000000000..01aaac7ca6fc --- /dev/null +++ b/thirdparty/amd-ffx/ffx_fsr3.cpp @@ -0,0 +1,589 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include // for max used inside SPD CPU code. +#include // for fabs, abs, sinf, sqrt, etc. +#include // for memset +#include // for FLT_EPSILON +#include "ffx_fsr3.h" +#include "ffx_fsr3upscaler.h" +#define FFX_CPU +#ifdef __clang__ +#pragma clang diagnostic ignored "-Wunused-function" +#pragma clang diagnostic ignored "-Wsign-compare" +#endif + +#include "gpu/ffx_core.h" +#include "gpu/fsr3/ffx_fsr3_resources.h" +#include "ffx_object_management.h" +#include "ffx_frameinterpolation_private.h" + +#include "ffx_fsr3_private.h" + +// To track only one context is present, also used in fi dispatch callback +static FfxFsr3Context* s_Context = nullptr; + +FfxErrorCode ffxFsr3ContextCreate(FfxFsr3Context* context, FfxFsr3ContextDescription* contextDescription) +{ + FFX_STATIC_ASSERT(sizeof(FfxFsr3Context) >= sizeof(FfxFsr3Context_Private)); + FfxErrorCode ret = FFX_OK; + FfxFsr3Context_Private* contextPrivate = (FfxFsr3Context_Private*)(context); + + // Prepare backend + memset(context, 0, sizeof(FfxFsr3Context_Private)); + + // check pointers are valid. + FFX_RETURN_ON_ERROR( + context, + FFX_ERROR_INVALID_POINTER); + FFX_RETURN_ON_ERROR( + contextDescription, + FFX_ERROR_INVALID_POINTER); + + contextPrivate->description = *contextDescription; + + contextPrivate->backendInterfaceSharedResources = contextDescription->backendInterfaceSharedResources; + contextPrivate->backendInterfaceUpscaling = contextDescription->backendInterfaceUpscaling; + contextPrivate->backendInterfaceFrameInterpolation = contextDescription->backendInterfaceFrameInterpolation; + + bool upscalingOnly = (contextDescription->flags & FFX_FSR3_ENABLE_UPSCALING_ONLY) != 0; + bool interpolationOnly = (contextDescription->flags & FFX_FSR3_ENABLE_INTERPOLATION_ONLY) != 0; + contextPrivate->asyncWorkloadSupported = (contextDescription->flags & FFX_FSR3_ENABLE_ASYNC_WORKLOAD_SUPPORT) != 0; + contextPrivate->sharedResourceCount = contextPrivate->asyncWorkloadSupported ? FSR3_MAX_QUEUED_FRAMES : 1; + + // ensure upscalingOnly and interpolationOnly are not set simultaneously + FFX_ASSERT(upscalingOnly == false || interpolationOnly == false); + + // validate that all callbacks are set for the backend interfaces + if (contextPrivate->interpolationOnly) + { + const FfxUInt32 numBackendsToVerify = 2; + FfxInterface* backendsToVerify[] = { &contextPrivate->backendInterfaceSharedResources, + &contextPrivate->backendInterfaceFrameInterpolation }; + + for (FfxUInt32 i = 0; i < numBackendsToVerify; i++) + { + FfxInterface* backend = backendsToVerify[i]; + FFX_RETURN_ON_ERROR(backend, FFX_ERROR_INCOMPLETE_INTERFACE); + FFX_RETURN_ON_ERROR(backend->fpGetDeviceCapabilities, FFX_ERROR_INCOMPLETE_INTERFACE); + FFX_RETURN_ON_ERROR(backend->fpCreateBackendContext, FFX_ERROR_INCOMPLETE_INTERFACE); + FFX_RETURN_ON_ERROR(backend->fpDestroyBackendContext, FFX_ERROR_INCOMPLETE_INTERFACE); + + // if a scratch buffer is declared, then we must have a size + if (backend->scratchBuffer) + { + FFX_RETURN_ON_ERROR(backend->scratchBufferSize, FFX_ERROR_INCOMPLETE_INTERFACE); + } + } + } + else + { + const FfxUInt32 numBackendsToVerify = contextPrivate->upscalingOnly ? 1 : 3; + FfxInterface* backendsToVerify[] = { &contextPrivate->backendInterfaceUpscaling, + &contextPrivate->backendInterfaceSharedResources, + &contextPrivate->backendInterfaceFrameInterpolation }; + + for (FfxUInt32 i = 0; i < numBackendsToVerify; i++) + { + FfxInterface* backend = backendsToVerify[i]; + FFX_RETURN_ON_ERROR(backend, FFX_ERROR_INCOMPLETE_INTERFACE); + FFX_RETURN_ON_ERROR(backend->fpGetSDKVersion, FFX_ERROR_INCOMPLETE_INTERFACE); + FFX_RETURN_ON_ERROR(backend->fpGetDeviceCapabilities, FFX_ERROR_INCOMPLETE_INTERFACE); + FFX_RETURN_ON_ERROR(backend->fpCreateBackendContext, FFX_ERROR_INCOMPLETE_INTERFACE); + FFX_RETURN_ON_ERROR(backend->fpDestroyBackendContext, FFX_ERROR_INCOMPLETE_INTERFACE); + + // if a scratch buffer is declared, then we must have a size + if (backend->scratchBuffer) + { + FFX_RETURN_ON_ERROR(backend->scratchBufferSize, FFX_ERROR_INCOMPLETE_INTERFACE); + } + } + } + + if (!contextPrivate->upscalingOnly) + { + FFX_VALIDATE(contextPrivate->backendInterfaceSharedResources.fpCreateBackendContext(&contextPrivate->backendInterfaceSharedResources, + FFX_EFFECT_SHAREDRESOURCES, + nullptr, + &contextPrivate->effectContextIdSharedResources)); + } + else + { + contextPrivate->backendInterfaceSharedResources = contextPrivate->backendInterfaceUpscaling; + contextDescription->backendInterfaceSharedResources = contextDescription->backendInterfaceUpscaling; + } + + // set up FSR3 Upscaler + // ensure we're actually creating an FSR3 Upscaler context, not the creationfunction that reroutes to ffxFsr3ContextCreate + if (!interpolationOnly) + { + FfxFsr3UpscalerContextDescription upDesc = {}; + upDesc.flags = 0; + upDesc.flags |= (contextDescription->flags & FFX_FSR3_ENABLE_HIGH_DYNAMIC_RANGE) ? FFX_FSR3UPSCALER_ENABLE_HIGH_DYNAMIC_RANGE : 0; + upDesc.flags |= (contextDescription->flags & FFX_FSR3_ENABLE_DISPLAY_RESOLUTION_MOTION_VECTORS) ? FFX_FSR3UPSCALER_ENABLE_DISPLAY_RESOLUTION_MOTION_VECTORS : 0; + upDesc.flags |= (contextDescription->flags & FFX_FSR3_ENABLE_MOTION_VECTORS_JITTER_CANCELLATION) ? FFX_FSR3UPSCALER_ENABLE_MOTION_VECTORS_JITTER_CANCELLATION : 0; + upDesc.flags |= (contextDescription->flags & FFX_FSR3_ENABLE_DEPTH_INVERTED) ? FFX_FSR3UPSCALER_ENABLE_DEPTH_INVERTED : 0; + upDesc.flags |= (contextDescription->flags & FFX_FSR3_ENABLE_DEPTH_INFINITE) ? FFX_FSR3UPSCALER_ENABLE_DEPTH_INFINITE : 0; + upDesc.flags |= (contextDescription->flags & FFX_FSR3_ENABLE_AUTO_EXPOSURE) ? FFX_FSR3UPSCALER_ENABLE_AUTO_EXPOSURE : 0; + upDesc.flags |= (contextDescription->flags & FFX_FSR3_ENABLE_DYNAMIC_RESOLUTION) ? FFX_FSR3UPSCALER_ENABLE_DYNAMIC_RESOLUTION : 0; + upDesc.flags |= (contextDescription->flags & FFX_FSR3_ENABLE_DEBUG_CHECKING) ? FFX_FSR3UPSCALER_ENABLE_DEBUG_CHECKING : 0; + upDesc.flags |= (contextDescription->flags & FFX_FSR3_ENABLE_HDR_UPSCALE_SDR_FINALOUTPUT) ? FFX_FSR3UPSCALER_ENABLE_HIGH_DYNAMIC_RANGE : 0; + upDesc.maxRenderSize = contextDescription->maxRenderSize; + upDesc.maxUpscaleSize = contextDescription->maxUpscaleSize; + upDesc.backendInterface = contextDescription->backendInterfaceUpscaling; + upDesc.fpMessage = contextDescription->fpMessage; + FFX_VALIDATE(ffxFsr3UpscalerContextCreate(&contextPrivate->upscalerContext, &upDesc)); + } + + if (!upscalingOnly) + { + + FfxOpticalflowContextDescription ofDescription = {}; + ofDescription.backendInterface = contextDescription->backendInterfaceFrameInterpolation; + ofDescription.resolution = contextDescription->displaySize; + + // set up Opticalflow + FFX_VALIDATE(ffxOpticalflowContextCreate(&contextPrivate->ofContext, &ofDescription)); + + FfxFrameInterpolationContextDescription fiDescription = {}; + fiDescription.backendInterface = contextDescription->backendInterfaceFrameInterpolation; + fiDescription.flags |= (contextDescription->flags & FFX_FSR3_ENABLE_DISPLAY_RESOLUTION_MOTION_VECTORS) ? FFX_FRAMEINTERPOLATION_ENABLE_DISPLAY_RESOLUTION_MOTION_VECTORS : 0; + fiDescription.flags |= (contextDescription->flags & FFX_FSR3_ENABLE_MOTION_VECTORS_JITTER_CANCELLATION) ? FFX_FRAMEINTERPOLATION_ENABLE_JITTER_MOTION_VECTORS : 0; + fiDescription.flags |= (contextDescription->flags & FFX_FSR3_ENABLE_DEPTH_INVERTED) ? FFX_FRAMEINTERPOLATION_ENABLE_DEPTH_INVERTED : 0; + fiDescription.flags |= (contextDescription->flags & FFX_FSR3_ENABLE_DEPTH_INFINITE) ? FFX_FRAMEINTERPOLATION_ENABLE_DEPTH_INFINITE : 0; + fiDescription.flags |= (contextDescription->flags & FFX_FSR3_ENABLE_HIGH_DYNAMIC_RANGE) ? FFX_FRAMEINTERPOLATION_ENABLE_HDR_COLOR_INPUT : 0; + fiDescription.flags |= (contextDescription->flags & FFX_FSR3_ENABLE_SDR_UPSCALE_HDR_FINALOUTPUT) ? FFX_FRAMEINTERPOLATION_ENABLE_HDR_COLOR_INPUT : 0; + fiDescription.flags |= contextPrivate->asyncWorkloadSupported ? FFX_FRAMEINTERPOLATION_ENABLE_ASYNC_SUPPORT : 0; + fiDescription.maxRenderSize = contextDescription->maxRenderSize; + fiDescription.displaySize = contextDescription->displaySize; + fiDescription.backBufferFormat = contextDescription->backBufferFormat; + // This is a new item exposed only through ffx API on PC + fiDescription.previousInterpolationSourceFormat = contextDescription->backBufferFormat; + + // set up Frameinterpolation + FFX_VALIDATE(ffxFrameInterpolationContextCreate(&contextPrivate->fiContext, &fiDescription)); + contextPrivate->effectContextIdFrameGeneration = reinterpret_cast(&contextPrivate->fiContext)->effectContextId; + + // set up optical flow resources + FfxOpticalflowSharedResourceDescriptions ofResourceDescs = {}; + FFX_VALIDATE(ffxOpticalflowGetSharedResourceDescriptions(&contextPrivate->ofContext, &ofResourceDescs)); + + FFX_VALIDATE(contextDescription->backendInterfaceSharedResources.fpCreateResource( + &contextDescription->backendInterfaceSharedResources, &ofResourceDescs.opticalFlowVector, contextPrivate->effectContextIdSharedResources, &contextPrivate->sharedResources[FFX_FSR3_RESOURCE_IDENTIFIER_OPTICAL_FLOW_VECTOR])); + FFX_VALIDATE(contextDescription->backendInterfaceSharedResources.fpCreateResource( + &contextDescription->backendInterfaceSharedResources, &ofResourceDescs.opticalFlowSCD, contextPrivate->effectContextIdSharedResources, &contextPrivate->sharedResources[FFX_FSR3_RESOURCE_IDENTIFIER_OPTICAL_FLOW_SCD_OUTPUT])); + } + + // set up FSR3Upscaler resources + if (!contextPrivate->interpolationOnly) + { + FfxFsr3UpscalerSharedResourceDescriptions fs3UpscalerResourceDescs = {}; + FFX_VALIDATE(ffxFsr3UpscalerGetSharedResourceDescriptions(&contextPrivate->upscalerContext, &fs3UpscalerResourceDescs)); + + wchar_t Name[256] = {}; + for (FfxUInt32 i = 0; i < contextPrivate->sharedResourceCount; i++) + { + FfxCreateResourceDescription dilD = fs3UpscalerResourceDescs.dilatedDepth; + swprintf(Name, 255, L"%s%d", fs3UpscalerResourceDescs.dilatedDepth.name, i); + dilD.name = Name; + FFX_VALIDATE(contextDescription->backendInterfaceSharedResources.fpCreateResource( + &contextDescription->backendInterfaceSharedResources, &dilD, contextPrivate->effectContextIdSharedResources, &contextPrivate->sharedResources[FFX_FSR3_RESOURCE_IDENTIFIER_DILATED_DEPTH_0 + (i * FFX_FSR3_RESOURCE_IDENTIFIER_UPSCALED_COUNT)])); + + FfxCreateResourceDescription dilMVs = fs3UpscalerResourceDescs.dilatedMotionVectors; + swprintf(Name, 255, L"%s%d", fs3UpscalerResourceDescs.dilatedMotionVectors.name, i); + dilMVs.name = Name; + FFX_VALIDATE(contextDescription->backendInterfaceSharedResources.fpCreateResource( + &contextDescription->backendInterfaceSharedResources, &dilMVs, contextPrivate->effectContextIdSharedResources, &contextPrivate->sharedResources[FFX_FSR3_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS_0 + (i * FFX_FSR3_RESOURCE_IDENTIFIER_UPSCALED_COUNT)])); + + FfxCreateResourceDescription recND = fs3UpscalerResourceDescs.reconstructedPrevNearestDepth; + swprintf(Name, 255, L"%s%d", fs3UpscalerResourceDescs.reconstructedPrevNearestDepth.name, i); + recND.name = Name; + FFX_VALIDATE(contextDescription->backendInterfaceSharedResources.fpCreateResource( + &contextDescription->backendInterfaceSharedResources, &recND, contextPrivate->effectContextIdSharedResources, &contextPrivate->sharedResources[FFX_FSR3_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH_0 + (i * FFX_FSR3_RESOURCE_IDENTIFIER_UPSCALED_COUNT)])); + } + } + + return ret; +} + +FfxErrorCode ffxFsr3ContextGetGpuMemoryUsage( + FfxFsr3Context* context, + FfxEffectMemoryUsage* pUpscalerUsage, + FfxEffectMemoryUsage* pOpticalFlowUsage, + FfxEffectMemoryUsage* pFrameGenerationUsage) +{ + FFX_RETURN_ON_ERROR(context, FFX_ERROR_INVALID_POINTER); + + FfxFsr3Context_Private* contextPrivate = (FfxFsr3Context_Private*)(context); + + if (nullptr != pUpscalerUsage) + { + memset(pUpscalerUsage, 0, sizeof(FfxEffectMemoryUsage)); + ffxFsr3UpscalerContextGetGpuMemoryUsage(&contextPrivate->upscalerContext, pUpscalerUsage); + } + + + if (nullptr != pOpticalFlowUsage) + { + memset(pOpticalFlowUsage, 0, sizeof(FfxEffectMemoryUsage)); + ffxOpticalflowContextGetGpuMemoryUsage(&contextPrivate->ofContext, pOpticalFlowUsage); + } + + if (nullptr != pFrameGenerationUsage) + { + memset(pFrameGenerationUsage, 0, sizeof(FfxEffectMemoryUsage)); + ffxFrameInterpolationContextGetGpuMemoryUsage(&contextPrivate->fiContext, pFrameGenerationUsage); + } + + return FFX_OK; +} + +FfxErrorCode ffxFsr3ContextGenerateReactiveMask(FfxFsr3Context* context, const FfxFsr3GenerateReactiveDescription* params) +{ + FfxFsr3Context_Private* contextPrivate = (FfxFsr3Context_Private*)(context); + + FfxFsr3UpscalerGenerateReactiveDescription fsr3Params{}; + + fsr3Params.commandList = params->commandList; + fsr3Params.colorOpaqueOnly = params->colorOpaqueOnly; + fsr3Params.colorPreUpscale = params->colorPreUpscale; + fsr3Params.outReactive = params->outReactive; + fsr3Params.renderSize = params->renderSize; + fsr3Params.scale = params->scale; + fsr3Params.cutoffThreshold = params->cutoffThreshold; + fsr3Params.binaryValue = params->binaryValue; + fsr3Params.flags = params->flags; + + return ffxFsr3UpscalerContextGenerateReactiveMask(&contextPrivate->upscalerContext, &fsr3Params); +} + +FfxErrorCode ffxFsr3DispatchFrameGeneration(const FfxFrameGenerationDispatchDescription* callbackDesc) +{ + FfxErrorCode errorCode = FFX_OK; + + FFX_RETURN_ON_ERROR(s_Context, FFX_ERROR_INVALID_POINTER); + FFX_RETURN_ON_ERROR(callbackDesc, FFX_ERROR_INVALID_POINTER); + + FfxFsr3Context_Private* contextPrivate = (FfxFsr3Context_Private*)(s_Context); + + bool upscalingOnly = (contextPrivate->description.flags & FFX_FSR3_ENABLE_UPSCALING_ONLY) != 0; + FFX_ASSERT_MESSAGE(upscalingOnly == false, "Fsr3 context has not been initialized to support Frame Generation"); + + const FfxFrameInterpolationPrepareDescription* prepareDesc = &contextPrivate->fgPrepareDescriptions[callbackDesc->frameID & 1]; + + // Optical flow + { + FfxOpticalflowDispatchDescription ofDispatchDesc{}; + ofDispatchDesc.commandList = callbackDesc->commandList; + ofDispatchDesc.color = callbackDesc->presentColor; + if (contextPrivate->HUDLess_color.resource) + { + ofDispatchDesc.color = contextPrivate->HUDLess_color; + } + ofDispatchDesc.reset = callbackDesc->reset; + ofDispatchDesc.backbufferTransferFunction = callbackDesc->backBufferTransferFunction; + ofDispatchDesc.minMaxLuminance.x = callbackDesc->minMaxLuminance[0]; + ofDispatchDesc.minMaxLuminance.y = callbackDesc->minMaxLuminance[1]; + ofDispatchDesc.opticalFlowVector = contextPrivate->backendInterfaceSharedResources.fpGetResource(&contextPrivate->backendInterfaceSharedResources, contextPrivate->sharedResources[FFX_FSR3_RESOURCE_IDENTIFIER_OPTICAL_FLOW_VECTOR]); + ofDispatchDesc.opticalFlowSCD = contextPrivate->backendInterfaceSharedResources.fpGetResource(&contextPrivate->backendInterfaceSharedResources, contextPrivate->sharedResources[FFX_FSR3_RESOURCE_IDENTIFIER_OPTICAL_FLOW_SCD_OUTPUT]); + + errorCode |= ffxOpticalflowContextDispatch(&contextPrivate->ofContext, &ofDispatchDesc); + } + + // Frame interpolation + { + FfxFrameInterpolationDispatchDescription fiDispatchDesc{0}; + + // don't dispatch interpolation async for now: use the same commandlist for copy and interpolate + fiDispatchDesc.commandList = callbackDesc->commandList; + fiDispatchDesc.displaySize.width = callbackDesc->presentColor.description.width; + fiDispatchDesc.displaySize.height = callbackDesc->presentColor.description.height; + fiDispatchDesc.currentBackBuffer = callbackDesc->presentColor; + fiDispatchDesc.currentBackBuffer_HUDLess = contextPrivate->HUDLess_color; + + fiDispatchDesc.renderSize = prepareDesc->renderSize; + fiDispatchDesc.output = callbackDesc->outputs[0]; + fiDispatchDesc.opticalFlowVector = contextPrivate->backendInterfaceSharedResources.fpGetResource(&contextPrivate->backendInterfaceSharedResources, contextPrivate->sharedResources[FFX_FSR3_RESOURCE_IDENTIFIER_OPTICAL_FLOW_VECTOR]); + fiDispatchDesc.opticalFlowSceneChangeDetection = contextPrivate->backendInterfaceSharedResources.fpGetResource(&contextPrivate->backendInterfaceSharedResources, contextPrivate->sharedResources[FFX_FSR3_RESOURCE_IDENTIFIER_OPTICAL_FLOW_SCD_OUTPUT]); + fiDispatchDesc.opticalFlowBlockSize = 8; + fiDispatchDesc.opticalFlowScale = { 1.f / fiDispatchDesc.displaySize.width, 1.f / fiDispatchDesc.displaySize.height }; + fiDispatchDesc.frameTimeDelta = prepareDesc->frameTimeDelta; + fiDispatchDesc.reset = callbackDesc->reset; + fiDispatchDesc.cameraNear = prepareDesc->cameraNear; + fiDispatchDesc.cameraFar = prepareDesc->cameraFar; + fiDispatchDesc.viewSpaceToMetersFactor = prepareDesc->viewSpaceToMetersFactor; + fiDispatchDesc.cameraFovAngleVertical = prepareDesc->cameraFovAngleVertical; + fiDispatchDesc.interpolationRect.left = callbackDesc->interpolationRect.left; + fiDispatchDesc.interpolationRect.top = callbackDesc->interpolationRect.top; + fiDispatchDesc.interpolationRect.width = callbackDesc->interpolationRect.width; + fiDispatchDesc.interpolationRect.height = callbackDesc->interpolationRect.height; + fiDispatchDesc.frameID = callbackDesc->frameID; + + // use the same surfaces that were specified in the upscale (or interpolation prepare) + fiDispatchDesc.dilatedDepth = contextPrivate->dilatedDepth; + fiDispatchDesc.dilatedMotionVectors = contextPrivate->dilatedMotionVectors; + fiDispatchDesc.reconstructedPrevDepth = contextPrivate->reconstructedPrevNearestDepth; + + if (contextPrivate->frameGenerationFlags & FFX_FSR3_FRAME_GENERATION_FLAG_DRAW_DEBUG_TEAR_LINES) + { + fiDispatchDesc.flags |= FFX_FRAMEINTERPOLATION_DISPATCH_DRAW_DEBUG_TEAR_LINES; + } + + if (contextPrivate->frameGenerationFlags & FFX_FSR3_FRAME_GENERATION_FLAG_DRAW_DEBUG_VIEW) + { + fiDispatchDesc.flags |= FFX_FRAMEINTERPOLATION_DISPATCH_DRAW_DEBUG_VIEW; + } + + fiDispatchDesc.backBufferTransferFunction = callbackDesc->backBufferTransferFunction; + fiDispatchDesc.minMaxLuminance[0] = callbackDesc->minMaxLuminance[0]; + fiDispatchDesc.minMaxLuminance[1] = callbackDesc->minMaxLuminance[1]; + + errorCode |= ffxFrameInterpolationDispatch(&contextPrivate->fiContext, &fiDispatchDesc); + } + + return errorCode; +} + +FfxErrorCode ffxFsr3ContextDispatchUpscale(FfxFsr3Context* context, const FfxFsr3DispatchUpscaleDescription* dispatchParams) +{ + FfxErrorCode ret = FFX_OK; + + FFX_RETURN_ON_ERROR(context, FFX_ERROR_INVALID_POINTER); + FFX_RETURN_ON_ERROR(dispatchParams, FFX_ERROR_INVALID_POINTER); + + FfxFsr3Context_Private* contextPrivate = (FfxFsr3Context_Private*)(context); + + bool interpolationOnly = (contextPrivate->description.flags & FFX_FSR3_ENABLE_INTERPOLATION_ONLY) != 0; + FFX_ASSERT_MESSAGE(interpolationOnly == false, "Fsr3 context has not been initialized to support Frame Generation"); + + contextPrivate->deltaTime = FFX_MAXIMUM(0.0f, FFX_MINIMUM(1.0f, dispatchParams->frameTimeDelta / 1000.0f)); + + FfxUInt32 sharedResourceIndexUpscaling = dispatchParams->frameID % contextPrivate->sharedResourceCount; + + contextPrivate->dilatedDepth = contextPrivate->backendInterfaceSharedResources.fpGetResource( + &contextPrivate->backendInterfaceSharedResources, + contextPrivate->sharedResources[FFX_FSR3_RESOURCE_IDENTIFIER_DILATED_DEPTH_0 + + (sharedResourceIndexUpscaling * FFX_FSR3_RESOURCE_IDENTIFIER_UPSCALED_COUNT)]); + contextPrivate->dilatedMotionVectors = contextPrivate->backendInterfaceSharedResources.fpGetResource( + &contextPrivate->backendInterfaceSharedResources, + contextPrivate->sharedResources[FFX_FSR3_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS_0 + + (sharedResourceIndexUpscaling * FFX_FSR3_RESOURCE_IDENTIFIER_UPSCALED_COUNT)]); + contextPrivate->reconstructedPrevNearestDepth = contextPrivate->backendInterfaceSharedResources.fpGetResource( + &contextPrivate->backendInterfaceSharedResources, + contextPrivate->sharedResources[FFX_FSR3_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH_0 + + (sharedResourceIndexUpscaling * FFX_FSR3_RESOURCE_IDENTIFIER_UPSCALED_COUNT)]); + + // dispatch FSR3 + FfxFsr3UpscalerDispatchDescription fsr3DispatchParams{}; + fsr3DispatchParams.commandList = dispatchParams->commandList; + fsr3DispatchParams.color = dispatchParams->color; + fsr3DispatchParams.depth = dispatchParams->depth; + fsr3DispatchParams.motionVectors = dispatchParams->motionVectors; + fsr3DispatchParams.exposure = dispatchParams->exposure; + fsr3DispatchParams.reactive = dispatchParams->reactive; + fsr3DispatchParams.transparencyAndComposition = dispatchParams->transparencyAndComposition; + fsr3DispatchParams.output = dispatchParams->upscaleOutput; + fsr3DispatchParams.jitterOffset = dispatchParams->jitterOffset; + fsr3DispatchParams.motionVectorScale = dispatchParams->motionVectorScale; + fsr3DispatchParams.renderSize = dispatchParams->renderSize; + fsr3DispatchParams.enableSharpening = dispatchParams->enableSharpening; + fsr3DispatchParams.sharpness = dispatchParams->sharpness; + fsr3DispatchParams.frameTimeDelta = dispatchParams->frameTimeDelta; + fsr3DispatchParams.preExposure = dispatchParams->preExposure; + fsr3DispatchParams.reset = dispatchParams->reset; + fsr3DispatchParams.cameraNear = dispatchParams->cameraNear; + fsr3DispatchParams.cameraFar = dispatchParams->cameraFar; + fsr3DispatchParams.cameraFovAngleVertical = dispatchParams->cameraFovAngleVertical; + fsr3DispatchParams.viewSpaceToMetersFactor = dispatchParams->viewSpaceToMetersFactor; + fsr3DispatchParams.dilatedDepth = contextPrivate->dilatedDepth; + fsr3DispatchParams.dilatedMotionVectors = contextPrivate->dilatedMotionVectors; + fsr3DispatchParams.reconstructedPrevNearestDepth = contextPrivate->reconstructedPrevNearestDepth; + + if (dispatchParams->flags & FFX_FSR3_UPSCALER_FLAG_DRAW_DEBUG_VIEW) + { + fsr3DispatchParams.flags |= FFX_FSR3UPSCALER_DISPATCH_DRAW_DEBUG_VIEW; + } + + ret = ffxFsr3UpscalerContextDispatch(&contextPrivate->upscalerContext, &fsr3DispatchParams); + + return ret; +} + +FfxErrorCode ffxFsr3ContextDispatchFrameGenerationPrepare(FfxFsr3Context* context, const FfxFsr3DispatchFrameGenerationPrepareDescription* dispatchParams) +{ + FfxErrorCode ret = FFX_OK; + + FfxFsr3Context_Private* contextPrivate = (FfxFsr3Context_Private*)(context); + + bool upscalingOnly = (contextPrivate->description.flags & FFX_FSR3_ENABLE_UPSCALING_ONLY) != 0; + FFX_ASSERT_MESSAGE(upscalingOnly == false, "Fsr3 context has not been initialized to support Frame Generation"); + + // if not interpolationOnly there's no need to execute prepare as prepared resources from upscale can be used + bool interpolationOnly = (contextPrivate->description.flags & FFX_FSR3_ENABLE_INTERPOLATION_ONLY) != 0; + + FfxUInt32 sharedResourceIndexUpscaling = dispatchParams->frameID % contextPrivate->sharedResourceCount; + + FfxFrameInterpolationPrepareDescription fiPrepareParams = {0}; + fiPrepareParams.commandList = dispatchParams->commandList; + fiPrepareParams.renderSize = dispatchParams->renderSize; + fiPrepareParams.depth = dispatchParams->depth; + fiPrepareParams.motionVectors = dispatchParams->motionVectors; + fiPrepareParams.jitterOffset = dispatchParams->jitterOffset; + fiPrepareParams.motionVectorScale = dispatchParams->motionVectorScale; + fiPrepareParams.frameTimeDelta = dispatchParams->frameTimeDelta; + fiPrepareParams.cameraNear = dispatchParams->cameraNear; + fiPrepareParams.cameraFar = dispatchParams->cameraFar; + fiPrepareParams.viewSpaceToMetersFactor = dispatchParams->viewSpaceToMetersFactor; + fiPrepareParams.cameraFovAngleVertical = dispatchParams->cameraFovAngleVertical; + fiPrepareParams.frameID = dispatchParams->frameID; + + contextPrivate->dilatedDepth = contextPrivate->backendInterfaceSharedResources.fpGetResource( + &contextPrivate->backendInterfaceSharedResources, + contextPrivate->sharedResources[FFX_FSR3_RESOURCE_IDENTIFIER_DILATED_DEPTH_0 + + (sharedResourceIndexUpscaling * FFX_FSR3_RESOURCE_IDENTIFIER_UPSCALED_COUNT)]); + contextPrivate->dilatedMotionVectors = contextPrivate->backendInterfaceSharedResources.fpGetResource( + &contextPrivate->backendInterfaceSharedResources, + contextPrivate->sharedResources[FFX_FSR3_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS_0 + + (sharedResourceIndexUpscaling * FFX_FSR3_RESOURCE_IDENTIFIER_UPSCALED_COUNT)]); + contextPrivate->reconstructedPrevNearestDepth = contextPrivate->backendInterfaceSharedResources.fpGetResource( + &contextPrivate->backendInterfaceSharedResources, + contextPrivate->sharedResources[FFX_FSR3_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH_0 + + (sharedResourceIndexUpscaling * FFX_FSR3_RESOURCE_IDENTIFIER_UPSCALED_COUNT)]); + + fiPrepareParams.dilatedDepth = contextPrivate->dilatedDepth; + fiPrepareParams.dilatedMotionVectors = contextPrivate->dilatedMotionVectors; + fiPrepareParams.reconstructedPrevDepth = contextPrivate->reconstructedPrevNearestDepth; + + if (interpolationOnly) + { + ret = ffxFrameInterpolationPrepare(&contextPrivate->fiContext, &fiPrepareParams); + } + + contextPrivate->fgPrepareDescriptions[dispatchParams->frameID & 1] = fiPrepareParams; + + return ret; +} + +FfxErrorCode ffxFsr3ConfigureFrameGeneration(FfxFsr3Context* context, const FfxFrameGenerationConfig* config) +{ + FfxFsr3Context_Private* contextPrivate = (FfxFsr3Context_Private*)(context); + + bool upscalingOnly = (contextPrivate->description.flags & FFX_FSR3_ENABLE_UPSCALING_ONLY) != 0; + FFX_ASSERT_MESSAGE(upscalingOnly == false, "Fsr3 context has not been initialized to support Frame Generation"); + + FFX_ASSERT(config); + FFX_ASSERT_MESSAGE(!contextPrivate->frameGenerationEnabled || !config->allowAsyncWorkloads || contextPrivate->asyncWorkloadSupported, + "Illegal to allow async workload when context was created without FFX_FSR3_ENABLE_ASYNC_WORKLOAD_SUPPORT flag set."); + + FfxFrameGenerationConfig patchedConfig = *config; + + contextPrivate->frameGenerationFlags = patchedConfig.flags; + contextPrivate->HUDLess_color = patchedConfig.HUDLessColor; + + if (patchedConfig.flags & FFX_FSR3_FRAME_GENERATION_FLAG_DRAW_DEBUG_VIEW) + { + patchedConfig.onlyPresentInterpolated = true; + } + + if (patchedConfig.flags & FFX_FSR3_FRAME_GENERATION_FLAG_DRAW_DEBUG_PACING_LINES) + { + patchedConfig.drawDebugPacingLines = true; + } + + // reset shared resource indices + if (contextPrivate->frameGenerationEnabled != patchedConfig.frameGenerationEnabled) + { + contextPrivate->frameGenerationEnabled = patchedConfig.frameGenerationEnabled; + + if (contextPrivate->frameGenerationEnabled) { + FFX_ASSERT(nullptr == s_Context); + s_Context = context; + } + else if (s_Context == context) { + s_Context = nullptr; + } + } + + return contextPrivate->backendInterfaceFrameInterpolation.fpSwapChainConfigureFrameGeneration(&patchedConfig); +} + +FfxErrorCode ffxFsr3ContextDestroy(FfxFsr3Context* context) +{ + FfxFsr3Context_Private* contextPrivate = (FfxFsr3Context_Private*)(context); + + for (FfxUInt32 i = 0; i < FFX_FSR3_RESOURCE_IDENTIFIER_COUNT; i++) + { + FFX_VALIDATE(contextPrivate->backendInterfaceSharedResources.fpDestroyResource(&contextPrivate->backendInterfaceSharedResources, contextPrivate->sharedResources[i], contextPrivate->effectContextIdSharedResources)) + } + contextPrivate->backendInterfaceSharedResources.fpDestroyBackendContext(&contextPrivate->backendInterfaceSharedResources, contextPrivate->effectContextIdSharedResources); + + bool upscalingOnly = (contextPrivate->description.flags & FFX_FSR3_ENABLE_UPSCALING_ONLY) != 0; + bool interpolationOnly = (contextPrivate->description.flags & FFX_FSR3_ENABLE_INTERPOLATION_ONLY) != 0; + + if (!upscalingOnly) + { + FFX_VALIDATE(ffxFrameInterpolationContextDestroy(&contextPrivate->fiContext)); + FFX_VALIDATE(ffxOpticalflowContextDestroy(&contextPrivate->ofContext)); + } + + if (!interpolationOnly) + { + FFX_VALIDATE(ffxFsr3UpscalerContextDestroy(&contextPrivate->upscalerContext)); + } + + if (s_Context == context) { + s_Context = nullptr; + } + + return FFX_OK; +} + +float ffxFsr3GetUpscaleRatioFromQualityMode(FfxFsr3QualityMode qualityMode) +{ + return ffxFsr3UpscalerGetUpscaleRatioFromQualityMode((FfxFsr3UpscalerQualityMode)qualityMode); +} + +FfxErrorCode ffxFsr3GetRenderResolutionFromQualityMode( + uint32_t* renderWidth, uint32_t* renderHeight, uint32_t displayWidth, uint32_t displayHeight, FfxFsr3QualityMode qualityMode) +{ + return ffxFsr3UpscalerGetRenderResolutionFromQualityMode( renderWidth, renderHeight, displayWidth, displayHeight, (FfxFsr3UpscalerQualityMode) qualityMode); +} + +int32_t ffxFsr3GetJitterPhaseCount(int32_t renderWidth, int32_t displayWidth) +{ + return ffxFsr3UpscalerGetJitterPhaseCount(renderWidth, displayWidth); +} + +FfxErrorCode ffxFsr3GetJitterOffset(float* outX, float* outY, int32_t index, int32_t phaseCount) +{ + return ffxFsr3UpscalerGetJitterOffset(outX, outY, index, phaseCount); +} + +FFX_API bool ffxFsr3ResourceIsNull(FfxResource resource) +{ + return ffxFsr3UpscalerResourceIsNull(resource); +} + +FFX_API FfxVersionNumber ffxFsr3GetEffectVersion() +{ + return FFX_SDK_MAKE_VERSION(FFX_FSR3_VERSION_MAJOR, FFX_FSR3_VERSION_MINOR, FFX_FSR3_VERSION_PATCH); +} diff --git a/thirdparty/amd-ffx/ffx_fsr3.h b/thirdparty/amd-ffx/ffx_fsr3.h new file mode 100644 index 000000000000..6b9d5472617e --- /dev/null +++ b/thirdparty/amd-ffx/ffx_fsr3.h @@ -0,0 +1,540 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +// @defgroup FSR3 + +#pragma once + +// Include the interface for the backend of the FSR3 API. +#include "ffx_interface.h" +#include "ffx_fsr3upscaler.h" +#include "ffx_frameinterpolation.h" +#include "ffx_opticalflow.h" + +/// FidelityFX Super Resolution 3 major version. +/// +/// @ingroup FSR3 +#define FFX_FSR3_VERSION_MAJOR (3) + +/// FidelityFX Super Resolution 3 minor version. +/// +/// @ingroup FSR3 +#define FFX_FSR3_VERSION_MINOR (1) + +/// FidelityFX Super Resolution 3 patch version. +/// +/// @ingroup FSR3 +#define FFX_FSR3_VERSION_PATCH (4) + +/// FidelityFX Super Resolution 3 context count +/// +/// Defines the number of internal effect contexts required by FSR3 (+1 for proxy swapchain) +/// +/// @ingroup ffxFsr3 +#define FFX_FSR3_CONTEXT_COUNT (FFX_FSR3UPSCALER_CONTEXT_COUNT + FFX_OPTICALFLOW_CONTEXT_COUNT + FFX_FRAMEINTERPOLATION_CONTEXT_COUNT + 1) + +/// The size of the context specified in 32bit values. +/// +/// @ingroup FSR3 +#define FFX_FSR3_CONTEXT_SIZE (FFX_FSR3UPSCALER_CONTEXT_SIZE + FFX_OPTICALFLOW_CONTEXT_SIZE + FFX_FRAMEINTERPOLATION_CONTEXT_SIZE + FFX_SDK_DEFAULT_CONTEXT_SIZE) + +#if defined(__cplusplus) +extern "C" { +#endif // #if defined(__cplusplus) + +///// An enumeration of all the passes which constitute the FSR3 algorithm. +///// +///// FSR3 is implemented as a composite of several compute passes each +///// computing a key part of the final result. Each call to the +///// FfxFsr3ScheduleGpuJobFunc callback function will +///// correspond to a single pass included in FfxFsr3Pass. For a +///// more comprehensive description of each pass, please refer to the FSR3 +///// reference documentation. +///// +///// Please note in some cases e.g.: FFX_FSR3_PASS_ACCUMULATE +///// and FFX_FSR3_PASS_ACCUMULATE_SHARPEN either one pass or the +///// other will be used (they are mutually exclusive). The choice of which will +///// depend on the way the FfxFsr3Context is created and the +///// precise contents of FfxFsr3DispatchParamters each time a call +///// is made to ffxFsr3ContextDispatch. +///// +///// @ingroup FSR3 +//typedef enum FfxFsr3Pass +//{ +// // no special FSR3 pipelines +// +// FFX_FSR3_PASS_COUNT ///< The number of passes performed by FSR3. +//} FfxFsr3Pass; + +/// An enumeration of all the quality modes supported by FidelityFX Super +/// Resolution 2 upscaling. +/// +/// In order to provide a consistent user experience across multiple +/// applications which implement FSR3. It is strongly recommended that the +/// following preset scaling factors are made available through your +/// application's user interface. +/// +/// If your application does not expose the notion of preset scaling factors +/// for upscaling algorithms (perhaps instead implementing a fixed ratio which +/// is immutable) or implementing a more dynamic scaling scheme (such as +/// dynamic resolution scaling), then there is no need to use these presets. +/// +/// Please note that FFX_FSR3_QUALITY_MODE_ULTRA_PERFORMANCE is +/// an optional mode which may introduce significant quality degradation in the +/// final image. As such it is recommended that you evaluate the final results +/// of using this scaling mode before deciding if you should include it in your +/// application. +/// +/// @ingroup FSR3 +typedef enum FfxFsr3QualityMode { + + FFX_FSR3_QUALITY_MODE_QUALITY = 1, ///< Perform upscaling with a per-dimension upscaling ratio of 1.5x. + FFX_FSR3_QUALITY_MODE_BALANCED = 2, ///< Perform upscaling with a per-dimension upscaling ratio of 1.7x. + FFX_FSR3_QUALITY_MODE_PERFORMANCE = 3, ///< Perform upscaling with a per-dimension upscaling ratio of 2.0x. + FFX_FSR3_QUALITY_MODE_ULTRA_PERFORMANCE = 4 ///< Perform upscaling with a per-dimension upscaling ratio of 3.0x. +} FfxFsr3QualityMode; + +/// An enumeration of bit flags used when creating a +/// FfxFsr3Context. See FfxFsr3ContextDescription. +/// +/// @ingroup FSR3 +typedef enum FfxFsr3InitializationFlagBits { + + FFX_FSR3_ENABLE_HIGH_DYNAMIC_RANGE = (1<<0), ///< A bit indicating if the input color data provided to all inputs is using a high-dynamic range. + FFX_FSR3_ENABLE_DISPLAY_RESOLUTION_MOTION_VECTORS = (1<<1), ///< A bit indicating if the motion vectors are rendered at display resolution. + FFX_FSR3_ENABLE_MOTION_VECTORS_JITTER_CANCELLATION = (1<<2), ///< A bit indicating that the motion vectors have the jittering pattern applied to them. + FFX_FSR3_ENABLE_DEPTH_INVERTED = (1<<3), ///< A bit indicating that the input depth buffer data provided is inverted [1..0]. + FFX_FSR3_ENABLE_DEPTH_INFINITE = (1<<4), ///< A bit indicating that the input depth buffer data provided is using an infinite far plane. + FFX_FSR3_ENABLE_AUTO_EXPOSURE = (1<<5), ///< A bit indicating if automatic exposure should be applied to input color data. + FFX_FSR3_ENABLE_DYNAMIC_RESOLUTION = (1<<6), ///< A bit indicating that the application uses dynamic resolution scaling. + FFX_FSR3_ENABLE_TEXTURE1D_USAGE = (1<<7), ///< This value is deprecated, but remains in order to aid upgrading from older versions of FSR3. + FFX_FSR3_ENABLE_DEBUG_CHECKING = (1<<8), ///< A bit indicating that the runtime should check some API values and report issues. + FFX_FSR3_ENABLE_UPSCALING_ONLY = (1<<9), ///, A bit indicating that the context will only be used for upscaling + FFX_FSR3_ENABLE_HDR_UPSCALE_SDR_FINALOUTPUT = (1<<10), ///, A bit indicating if the input color data provided to UPSCALE is using a high-dynamic range, final output SDR. + FFX_FSR3_ENABLE_SDR_UPSCALE_HDR_FINALOUTPUT = (1<<11), ///, A bit indicating if the input color data provided to UPSCALE is using SDR, final output is high-dynamic range. + FFX_FSR3_ENABLE_ASYNC_WORKLOAD_SUPPORT = (1<<12), + FFX_FSR3_ENABLE_INTERPOLATION_ONLY = (1<<13), +} FfxFsr3InitializationFlagBits; + +typedef enum FfxFsr3FrameGenerationFlags +{ + FFX_FSR3_FRAME_GENERATION_FLAG_DRAW_DEBUG_TEAR_LINES = FFX_FRAMEINTERPOLATION_DISPATCH_DRAW_DEBUG_TEAR_LINES, ///< A bit indicating that the debug tear lines will be drawn to the interpolated output. + FFX_FSR3_FRAME_GENERATION_FLAG_DRAW_DEBUG_VIEW = FFX_FRAMEINTERPOLATION_DISPATCH_DRAW_DEBUG_VIEW, ///< A bit indicating that the interpolated output resource will contain debug views with relevant information. + FFX_FSR3_FRAME_GENERATION_FLAG_DRAW_DEBUG_PACING_LINES = FFX_FRAMEINTERPOLATION_DISPATCH_DRAW_DEBUG_PACING_LINES ///< A bit indicating that the debug pacing lines will be drawn to the generated output. +} FfxFsr3FrameGenerationFlags; + +typedef enum FfxFsr3UpscalingFlags +{ + FFX_FSR3_UPSCALER_FLAG_DRAW_DEBUG_VIEW = FFX_FSR3UPSCALER_DISPATCH_DRAW_DEBUG_VIEW, ///< A bit indicating that the upscaled output resource will contain debug views with relevant information. +} FfxFsr3UpscalingFlags; + +/// A structure encapsulating the parameters required to initialize FidelityFX +/// Super Resolution 3 upscaling. +/// +/// @ingroup FSR3 +typedef struct FfxFsr3ContextDescription { + uint32_t flags; ///< A collection of FfxFsr3InitializationFlagBits. + FfxDimensions2D maxRenderSize; ///< The maximum size that rendering will be performed at. + FfxDimensions2D maxUpscaleSize; ///< The size of the presentation resolution targeted by the upscaling process. + FfxDimensions2D displaySize; ///< The size of the presentation resolution targeted by the frame interpolation process. + FfxInterface backendInterfaceSharedResources; ///< A set of pointers to the backend implementation for FidelityFX SDK + FfxInterface backendInterfaceUpscaling; ///< A set of pointers to the backend implementation for FidelityFX SDK + FfxInterface backendInterfaceFrameInterpolation; ///< A set of pointers to the backend implementation for FidelityFX SDK + FfxFsr3UpscalerMessage fpMessage; ///< A pointer to a function that can receive messages from the runtime. + FfxSurfaceFormat backBufferFormat; ///< The format of the swapchain surface + +} FfxFsr3ContextDescription; + +/// A structure encapsulating the parameters for dispatching the various passes +/// of FidelityFX Super Resolution 3. +/// +/// @ingroup FSR3 +typedef struct FfxFsr3DispatchUpscaleDescription { + + FfxCommandList commandList; ///< The FfxCommandList to record FSR2 rendering commands into. + FfxResource color; ///< A FfxResource containing the color buffer for the current frame (at render resolution). + FfxResource depth; ///< A FfxResource containing 32bit depth values for the current frame (at render resolution). + FfxResource motionVectors; ///< A FfxResource containing 2-dimensional motion vectors (at render resolution if FFX_FSR2_ENABLE_DISPLAY_RESOLUTION_MOTION_VECTORS is not set). + FfxResource exposure; ///< A optional FfxResource containing a 1x1 exposure value. + FfxResource reactive; ///< A optional FfxResource containing alpha value of reactive objects in the scene. + FfxResource transparencyAndComposition; ///< A optional FfxResource containing alpha value of special objects in the scene. + FfxResource upscaleOutput; ///< A FfxResource containing the output color buffer for the current frame (at presentation resolution). + FfxFloatCoords2D jitterOffset; ///< The subpixel jitter offset applied to the camera. + FfxFloatCoords2D motionVectorScale; ///< The scale factor to apply to motion vectors. + FfxDimensions2D renderSize; ///< The resolution that was used for rendering the input resources. + FfxDimensions2D upscaleSize; ///< The resolution that the upscaler will output. + bool enableSharpening; ///< Enable an additional sharpening pass. + float sharpness; ///< The sharpness value between 0 and 1, where 0 is no additional sharpness and 1 is maximum additional sharpness. + float frameTimeDelta; ///< The time elapsed since the last frame (expressed in milliseconds). + float preExposure; ///< The pre exposure value (must be > 0.0f) + bool reset; ///< A boolean value which when set to true, indicates the camera has moved discontinuously. + float cameraNear; ///< The distance to the near plane of the camera. + float cameraFar; ///< The distance to the far plane of the camera. This is used only used in case of non infinite depth. + float cameraFovAngleVertical; ///< The camera angle field of view in the vertical direction (expressed in radians). + float viewSpaceToMetersFactor; ///< The scale factor to convert view space units to meters + uint32_t flags; ///< combination of FfxFsr3UpscalingFlags + uint64_t frameID; +} FfxFsr3DispatchUpscaleDescription; + +typedef struct FfxFsr3DispatchFrameGenerationPrepareDescription +{ + FfxCommandList commandList; ///< The FfxCommandList to record FSR2 rendering commands into. + FfxResource depth; ///< A FfxResource containing 32bit depth values for the current frame (at render resolution). + FfxResource motionVectors; ///< A FfxResource containing 2-dimensional motion vectors (at render resolution if FFX_FSR2_ENABLE_DISPLAY_RESOLUTION_MOTION_VECTORS is not set). + FfxFloatCoords2D jitterOffset; ///< The subpixel jitter offset applied to the camera. + FfxFloatCoords2D motionVectorScale; ///< The scale factor to apply to motion vectors. + FfxDimensions2D renderSize; ///< The resolution that was used for rendering the input resources. + + float frameTimeDelta; + float cameraNear; + float cameraFar; + float viewSpaceToMetersFactor; + float cameraFovAngleVertical; + + uint64_t frameID; +} FfxFsr3DispatchFrameGenerationPrepareDescription; + +FFX_API FfxErrorCode ffxFsr3DispatchFrameGeneration(const FfxFrameGenerationDispatchDescription* desc); + +/// A structure encapsulating the parameters for automatic generation of a reactive mask +/// +/// @ingroup FSR3 +typedef struct FfxFsr3GenerateReactiveDescription { + + FfxCommandList commandList; ///< The FfxCommandList to record FSR3 rendering commands into. + FfxResource colorOpaqueOnly; ///< A FfxResource containing the opaque only color buffer for the current frame (at render resolution). + FfxResource colorPreUpscale; ///< A FfxResource containing the opaque+translucent color buffer for the current frame (at render resolution). + FfxResource outReactive; ///< A FfxResource containing the surface to generate the reactive mask into. + FfxDimensions2D renderSize; ///< The resolution that was used for rendering the input resources. + float scale; ///< A value to scale the output + float cutoffThreshold; ///< A threshold value to generate a binary reactive mask + float binaryValue; + uint32_t flags; ///< Flags to determine how to generate the reactive mask +} FfxFsr3GenerateReactiveDescription; + +/// A structure encapsulating the FidelityFX Super Resolution 3 context. +/// +/// This sets up an object which contains all persistent internal data and +/// resources that are required by FSR3. +/// +/// The FfxFsr3Context object should have a lifetime matching +/// your use of FSR3. Before destroying the FSR3 context care should be taken +/// to ensure the GPU is not accessing the resources created or used by FSR3. +/// It is therefore recommended that the GPU is idle before destroying the +/// FSR3 context. +/// +/// @ingroup FSR3 +typedef struct FfxFsr3Context +{ + uint32_t data[FFX_FSR3_CONTEXT_SIZE]; ///< An opaque set of uint32_t which contain the data for the context. +} FfxFsr3Context; + +/// Create a FidelityFX Super Resolution 3 context from the parameters +/// programmed to the FfxFsr3CreateParams structure. +/// +/// The context structure is the main object used to interact with the FSR3 +/// API, and is responsible for the management of the internal resources used +/// by the FSR3 algorithm. When this API is called, multiple calls will be +/// made via the pointers contained in the callbacks structure. +/// These callbacks will attempt to retreive the device capabilities, and +/// create the internal resources, and pipelines required by FSR3's +/// frame-to-frame function. Depending on the precise configuration used when +/// creating the FfxFsr3Context a different set of resources and +/// pipelines might be requested via the callback functions. +/// +/// The flags included in the flags field of +/// FfxFsr3Context how match the configuration of your +/// application as well as the intended use of FSR3. It is important that these +/// flags are set correctly (as well as a correct programmed +/// FfxFsr3DispatchDescription) to ensure correct operation. It is +/// recommended to consult the overview documentation for further details on +/// how FSR3 should be integerated into an application. +/// +/// When the FfxFsr3Context is created, you should use the +/// ffxFsr3ContextDispatch function each frame where FSR3 +/// upscaling should be applied. See the documentation of +/// ffxFsr3ContextDispatch for more details. +/// +/// The FfxFsr3Context should be destroyed when use of it is +/// completed, typically when an application is unloaded or FSR3 upscaling is +/// disabled by a user. To destroy the FSR3 context you should call +/// ffxFsr3ContextDestroy. +/// +/// @param [out] context A pointer to a FfxFsr3Context structure to populate. +/// @param [in] contextDescription A pointer to a FfxFsr3ContextDescription structure. +/// +/// @retval +/// FFX_OK The operation completed successfully. +/// @retval +/// FFX_ERROR_CODE_NULL_POINTER The operation failed because either context or contextDescription was NULL. +/// @retval +/// FFX_ERROR_INCOMPLETE_INTERFACE The operation failed because the FfxFsr3ContextDescription.callbacks was not fully specified. +/// @retval +/// FFX_ERROR_BACKEND_API_ERROR The operation failed because of an error returned from the backend. +/// +/// @ingroup FSR3 +FFX_API FfxErrorCode ffxFsr3ContextCreate(FfxFsr3Context* context, FfxFsr3ContextDescription* contextDescription); + +FFX_API FfxErrorCode ffxFsr3ContextGetGpuMemoryUsage(FfxFsr3Context* pContext, + FfxEffectMemoryUsage* pUpscalerUsage, + FfxEffectMemoryUsage* pOpticalFlowUsage, + FfxEffectMemoryUsage* pFrameGenerationUsage); + +/// Dispatch the various passes that constitute FidelityFX Super Resolution 3 Upscaling. +/// +/// FSR3 is a composite effect, meaning that it is compromised of multiple +/// constituent passes (implemented as one or more clears, copies and compute +/// dispatches). The ffxFsr3ContextDispatchUpscale function is the +/// function which (via the use of the functions contained in the +/// callbacks field of the FfxFsr3Context +/// structure) utlimately generates the sequence of graphics API calls required +/// each frame. +/// +/// As with the creation of the FfxFsr3Context correctly +/// programming the dispatchParams is key to ensuring +/// the correct operation of FSR3. It is particularly important to ensure that +/// camera jitter is correctly applied to your application's projection matrix +/// (or camera origin for raytraced applications). FSR3 provides the +/// ffxFsr3GetJitterPhaseCount and +/// ffxFsr3GetJitterOffset entry points to help applications +/// correctly compute the camera jitter. Whatever jitter pattern is used by the +/// application it should be correctly programmed to the +/// jitterOffset field of the dispatchParams +/// structure. For more guidance on camera jitter please consult the +/// documentation for ffxFsr3GetJitterOffset as well as the +/// accompanying overview documentation for FSR3. +/// +/// @param [in] context A pointer to a FfxFsr3Context structure. +/// @param [in] dispatchParams A pointer to a FfxFsr3DispatchUpscaleDescription structure. +/// +/// @retval +/// FFX_OK The operation completed successfully. +/// @retval +/// FFX_ERROR_CODE_NULL_POINTER The operation failed because either context or dispatchParams was NULL. +/// @retval +/// FFX_ERROR_OUT_OF_RANGE The operation failed because dispatchParams.renderSize was larger than the maximum render resolution. +/// @retval +/// FFX_ERROR_NULL_DEVICE The operation failed because the device inside the context was NULL. +/// @retval +/// FFX_ERROR_BACKEND_API_ERROR The operation failed because of an error returned from the backend. +/// +/// @ingroup FSR3 +FFX_API FfxErrorCode ffxFsr3ContextDispatchUpscale(FfxFsr3Context* context, const FfxFsr3DispatchUpscaleDescription* dispatchParams); +FFX_API FfxErrorCode ffxFsr3ContextDispatchFrameGenerationPrepare(FfxFsr3Context* context, const FfxFsr3DispatchFrameGenerationPrepareDescription* dispatchParams); + +FFX_API FfxErrorCode ffxFsr3SkipPresent(FfxFsr3Context* context); + +/// A helper function generate a Reactive mask from an opaque only texure and one containing translucent objects. +/// +/// @param [in] context A pointer to a FfxFsr3Context structure. +/// @param [in] params A pointer to a FfxFsr3GenerateReactiveDescription structure +/// +/// @retval +/// FFX_OK The operation completed successfully. +/// +/// @ingroup FSR3 +FFX_API FfxErrorCode ffxFsr3ContextGenerateReactiveMask(FfxFsr3Context* context, const FfxFsr3GenerateReactiveDescription* params); + +FFX_API FfxErrorCode ffxFsr3ConfigureFrameGeneration(FfxFsr3Context* context, const FfxFrameGenerationConfig* config); + +/// Destroy the FidelityFX Super Resolution context. +/// +/// @param [out] context A pointer to a FfxFsr3Context structure to destroy. +/// +/// @retval +/// FFX_OK The operation completed successfully. +/// @retval +/// FFX_ERROR_CODE_NULL_POINTER The operation failed because either context was NULL. +/// +/// @ingroup FSR3 +FFX_API FfxErrorCode ffxFsr3ContextDestroy(FfxFsr3Context* context); + +/// Get the upscale ratio from the quality mode. +/// +/// The following table enumerates the mapping of the quality modes to +/// per-dimension scaling ratios. +/// +/// Quality preset | Scale factor +/// ----------------------------------------------------- | ------------- +/// FFX_FSR3_QUALITY_MODE_NATIVEAA | 1.0x +/// FFX_FSR3_QUALITY_MODE_QUALITY | 1.5x +/// FFX_FSR3_QUALITY_MODE_BALANCED | 1.7x +/// FFX_FSR3_QUALITY_MODE_PERFORMANCE | 2.0x +/// FFX_FSR3_QUALITY_MODE_ULTRA_PERFORMANCE | 3.0x +/// +/// Passing an invalid qualityMode will return 0.0f. +/// +/// @param [in] qualityMode The quality mode preset. +/// +/// @returns +/// The upscaling the per-dimension upscaling ratio for +/// qualityMode according to the table above. +/// +/// @ingroup FSR3 +FFX_API float ffxFsr3GetUpscaleRatioFromQualityMode(FfxFsr3QualityMode qualityMode); + +/// A helper function to calculate the rendering resolution from a target +/// resolution and desired quality level. +/// +/// This function applies the scaling factor returned by +/// ffxFsr3GetUpscaleRatioFromQualityMode to each dimension. +/// +/// @param [out] renderWidth A pointer to a uint32_t which will hold the calculated render resolution width. +/// @param [out] renderHeight A pointer to a uint32_t which will hold the calculated render resolution height. +/// @param [in] displayWidth The target display resolution width. +/// @param [in] displayHeight The target display resolution height. +/// @param [in] qualityMode The desired quality mode for FSR 2 upscaling. +/// +/// @retval +/// FFX_OK The operation completed successfully. +/// @retval +/// FFX_ERROR_INVALID_POINTER Either renderWidth or renderHeight was NULL. +/// @retval +/// FFX_ERROR_INVALID_ENUM An invalid quality mode was specified. +/// +/// @ingroup FSR3 +FFX_API FfxErrorCode ffxFsr3GetRenderResolutionFromQualityMode( + uint32_t* renderWidth, + uint32_t* renderHeight, + uint32_t displayWidth, + uint32_t displayHeight, + FfxFsr3QualityMode qualityMode); + +/// A helper function to calculate the jitter phase count from display +/// resolution. +/// +/// For more detailed information about the application of camera jitter to +/// your application's rendering please refer to the +/// ffxFsr3GetJitterOffset function. +/// +/// The table below shows the jitter phase count which this function +/// would return for each of the quality presets. +/// +/// Quality preset | Scale factor | Phase count +/// ----------------------------------------------------- | ------------- | --------------- +/// FFX_FSR3_QUALITY_MODE_QUALITY | 1.5x | 18 +/// FFX_FSR3_QUALITY_MODE_BALANCED | 1.7x | 23 +/// FFX_FSR3_QUALITY_MODE_PERFORMANCE | 2.0x | 32 +/// FFX_FSR3_QUALITY_MODE_ULTRA_PERFORMANCE | 3.0x | 72 +/// Custom | [1..n]x | ceil(8*n^2) +/// +/// @param [in] renderWidth The render resolution width. +/// @param [in] displayWidth The display resolution width. +/// +/// @returns +/// The jitter phase count for the scaling factor between renderWidth and displayWidth. +/// +/// @ingroup FSR3 +FFX_API int32_t ffxFsr3GetJitterPhaseCount(int32_t renderWidth, int32_t displayWidth); + +/// A helper function to calculate the subpixel jitter offset. +/// +/// FSR3 relies on the application to apply sub-pixel jittering while rendering. +/// This is typically included in the projection matrix of the camera. To make +/// the application of camera jitter simple, the FSR3 API provides a small set +/// of utility function which computes the sub-pixel jitter offset for a +/// particular frame within a sequence of separate jitter offsets. To begin, the +/// index within the jitter phase must be computed. To calculate the +/// sequence's length, you can call the ffxFsr3GetJitterPhaseCount +/// function. The index should be a value which is incremented each frame modulo +/// the length of the sequence computed by ffxFsr3GetJitterPhaseCount. +/// The index within the jitter phase is passed to +/// ffxFsr3GetJitterOffset via the index parameter. +/// +/// This function uses a Halton(2,3) sequence to compute the jitter offset. +/// The ultimate index used for the sequence is index % +/// phaseCount. +/// +/// It is important to understand that the values returned from the +/// ffxFsr3GetJitterOffset function are in unit pixel space, and +/// in order to composite this correctly into a projection matrix we must +/// convert them into projection offsets. This is done as per the pseudo code +/// listing which is shown below. +/// +/// const int32_t jitterPhaseCount = ffxFsr3GetJitterPhaseCount(renderWidth, displayWidth); +/// +/// float jitterX = 0; +/// float jitterY = 0; +/// ffxFsr3GetJitterOffset(&jitterX, &jitterY, index, jitterPhaseCount); +/// +/// const float jitterX = 2.0f * jitterX / (float)renderWidth; +/// const float jitterY = -2.0f * jitterY / (float)renderHeight; +/// const Matrix4 jitterTranslationMatrix = translateMatrix(Matrix3::identity, Vector3(jitterX, jitterY, 0)); +/// const Matrix4 jitteredProjectionMatrix = jitterTranslationMatrix * projectionMatrix; +/// +/// Jitter should be applied to all rendering. This includes opaque, alpha +/// transparent, and raytraced objects. For rasterized objects, the sub-pixel +/// jittering values calculated by the iffxFsr3GetJitterOffset +/// function can be applied to the camera projection matrix which is ultimately +/// used to perform transformations during vertex shading. For raytraced +/// rendering, the sub-pixel jitter should be applied to the ray's origin, +/// often the camera's position. +/// +/// Whether you elect to use the ffxFsr3GetJitterOffset function +/// or your own sequence generator, you must program the +/// jitterOffset field of the +/// FfxFsr3DispatchParameters structure in order to inform FSR3 +/// of the jitter offset that has been applied in order to render each frame. +/// +/// If not using the recommended ffxFsr3GetJitterOffset function, +/// care should be taken that your jitter sequence never generates a null vector; +/// that is value of 0 in both the X and Y dimensions. +/// +/// @param [out] outX A pointer to a float which will contain the subpixel jitter offset for the x dimension. +/// @param [out] outY A pointer to a float which will contain the subpixel jitter offset for the y dimension. +/// @param [in] index The index within the jitter sequence. +/// @param [in] phaseCount The length of jitter phase. See ffxFsr3GetJitterPhaseCount. +/// +/// @retval +/// FFX_OK The operation completed successfully. +/// @retval +/// FFX_ERROR_INVALID_POINTER Either outX or outY was NULL. +/// @retval +/// FFX_ERROR_INVALID_ARGUMENT Argument phaseCount must be greater than 0. +/// +/// @ingroup FSR3 +FFX_API FfxErrorCode ffxFsr3GetJitterOffset(float* outX, float* outY, int32_t index, int32_t phaseCount); + +/// A helper function to check if a resource is +/// FFX_FSR3_RESOURCE_IDENTIFIER_NULL. +/// +/// @param [in] resource A FfxResource. +/// +/// @returns +/// true The resource was not FFX_FSR3_RESOURCE_IDENTIFIER_NULL. +/// @returns +/// false The resource was FFX_FSR3_RESOURCE_IDENTIFIER_NULL. +/// +/// @ingroup FSR3 +FFX_API bool ffxFsr3ResourceIsNull(FfxResource resource); + +/// Queries the effect version number. +/// +/// @returns +/// The SDK version the effect was built with. +/// +/// @ingroup FSR3 +FFX_API FfxVersionNumber ffxFsr3GetEffectVersion(); + +#if defined(__cplusplus) +} +#endif // #if defined(__cplusplus) diff --git a/thirdparty/amd-ffx/ffx_fsr3_private.h b/thirdparty/amd-ffx/ffx_fsr3_private.h new file mode 100644 index 000000000000..a772daa2106c --- /dev/null +++ b/thirdparty/amd-ffx/ffx_fsr3_private.h @@ -0,0 +1,62 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once +#include "gpu/fsr3/ffx_fsr3_resources.h" +#include "ffx_fsr3upscaler.h" +#include "ffx_frameinterpolation.h" +#include "ffx_opticalflow.h" +#include "ffx_fsr3.h" + +// max queued frames for descriptor management +#define FSR3_MAX_QUEUED_FRAMES 2 + +// FfxFsr3Context_Private +// The private implementation of the FSR3 context. +// Actually this is only a container for Upscaler+Frameinterpolation+OpticalFlow +typedef struct FfxFsr3Context_Private { + FfxFsr3ContextDescription description; + FfxInterface backendInterfaceSharedResources; + FfxInterface backendInterfaceUpscaling; + FfxInterface backendInterfaceFrameInterpolation; + FfxFsr3UpscalerContext upscalerContext; + FfxOpticalflowContext ofContext; + FfxFrameInterpolationContext fiContext; + FfxResourceInternal sharedResources[FFX_FSR3_RESOURCE_IDENTIFIER_COUNT]; + FfxUInt32 effectContextIdSharedResources; + FfxUInt32 effectContextIdFrameGeneration; + float deltaTime; + bool upscalingOnly; + bool interpolationOnly; + bool asyncWorkloadSupported; + FfxUInt32 sharedResourceCount; + FfxDimensions2D renderSize; ///< The dimensions used to render game content, dilatedDepth, dilatedMotionVectors are expected to be of ths size. + + FfxResource HUDLess_color; + FfxResource dilatedDepth; + FfxResource dilatedMotionVectors; + FfxResource reconstructedPrevNearestDepth; + + bool frameGenerationEnabled; + int32_t frameGenerationFlags; + FfxFrameInterpolationPrepareDescription fgPrepareDescriptions[FSR3_MAX_QUEUED_FRAMES]; +} FfxFsr3Context_Private; diff --git a/thirdparty/amd-ffx/ffx_fsr3upscaler.cpp b/thirdparty/amd-ffx/ffx_fsr3upscaler.cpp new file mode 100644 index 000000000000..1e16eaa772ab --- /dev/null +++ b/thirdparty/amd-ffx/ffx_fsr3upscaler.cpp @@ -0,0 +1,1517 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include // for max used inside SPD CPU code. +#include // for fabs, abs, sinf, sqrt, etc. +#include // for memset +#include // for FLT_EPSILON +#include "ffx_fsr3upscaler.h" + +#define FFX_CPU + +#ifdef __clang__ +#pragma clang diagnostic ignored "-Wunused-function" +#pragma clang diagnostic ignored "-Wsign-compare" +#endif + +#include "gpu/ffx_core.h" +#include "gpu/fsr1/ffx_fsr1.h" +#include "gpu/spd/ffx_spd.h" +#include "gpu/fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h" +#include "gpu/fsr3upscaler/ffx_fsr3upscaler_resources.h" +#include "gpu/fsr3upscaler/ffx_fsr3upscaler_common.h" +#include "ffx_object_management.h" + +// max queued frames for descriptor management +static const uint32_t FSR3UPSCALER_MAX_QUEUED_FRAMES = 16; + +#include "ffx_fsr3upscaler_private.h" + +// lists to map shader resource bindpoint name to resource identifier +typedef struct ResourceBinding +{ + uint32_t index; + wchar_t name[64]; +}ResourceBinding; + +static const ResourceBinding srvTextureBindingTable[] = +{ + {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_COLOR, L"r_input_color_jittered"}, + {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_OPAQUE_ONLY, L"r_input_opaque_only"}, + {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_MOTION_VECTORS, L"r_input_motion_vectors"}, + {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_DEPTH, L"r_input_depth" }, + {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_EXPOSURE, L"r_input_exposure"}, + {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_FRAME_INFO, L"r_frame_info"}, + {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK, L"r_reactive_mask"}, + {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_TRANSPARENCY_AND_COMPOSITION_MASK, L"r_transparency_and_composition_mask"}, + {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH, L"r_reconstructed_previous_nearest_depth"}, + {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS, L"r_dilated_motion_vectors"}, + {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_DILATED_DEPTH, L"r_dilated_depth"}, + {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR, L"r_internal_upscaled_color"}, + {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_ACCUMULATION, L"r_accumulation"}, + {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_HISTORY, L"r_luma_history" }, + {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_RCAS_INPUT, L"r_rcas_input"}, + {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LANCZOS_LUT, L"r_lanczos_lut"}, + {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SPD_MIPS, L"r_spd_mips"}, + {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS, L"r_dilated_reactive_masks"}, + {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_NEW_LOCKS, L"r_new_locks"}, + {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_FARTHEST_DEPTH, L"r_farthest_depth"}, + {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_FARTHEST_DEPTH_MIP1, L"r_farthest_depth_mip1"}, + {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SHADING_CHANGE, L"r_shading_change"}, + + {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_CURRENT_LUMA, L"r_current_luma"}, + {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_PREVIOUS_LUMA, L"r_previous_luma"}, + {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_INSTABILITY, L"r_luma_instability"}, +}; + +static const ResourceBinding uavTextureBindingTable[] = +{ + {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH, L"rw_reconstructed_previous_nearest_depth"}, + {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS, L"rw_dilated_motion_vectors"}, + {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_DILATED_DEPTH, L"rw_dilated_depth"}, + {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR, L"rw_internal_upscaled_color"}, + {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_ACCUMULATION, L"rw_accumulation"}, + {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_HISTORY, L"rw_luma_history"}, + {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT, L"rw_upscaled_output"}, + {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS, L"rw_dilated_reactive_masks"}, + {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_FRAME_INFO, L"rw_frame_info"}, + {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SPD_ATOMIC_COUNT, L"rw_spd_global_atomic"}, + {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_NEW_LOCKS, L"rw_new_locks"}, + {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_AUTOREACTIVE, L"rw_output_autoreactive"}, + {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SHADING_CHANGE, L"rw_shading_change"}, + {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_FARTHEST_DEPTH, L"rw_farthest_depth"}, + {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_FARTHEST_DEPTH_MIP1, L"rw_farthest_depth_mip1"}, + + {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_CURRENT_LUMA, L"rw_current_luma"}, + {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_INSTABILITY, L"rw_luma_instability"}, + + {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SPD_MIPS_LEVEL_0, L"rw_spd_mip0"}, + {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SPD_MIPS_LEVEL_1, L"rw_spd_mip1"}, + {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SPD_MIPS_LEVEL_2, L"rw_spd_mip2"}, + {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SPD_MIPS_LEVEL_3, L"rw_spd_mip3"}, + {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SPD_MIPS_LEVEL_4, L"rw_spd_mip4"}, + {FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SPD_MIPS_LEVEL_5, L"rw_spd_mip5"}, + + +}; + +static const ResourceBinding constantBufferBindingTable[] = +{ + {FFX_FSR3UPSCALER_CONSTANTBUFFER_IDENTIFIER_FSR3UPSCALER, L"cbFSR3Upscaler"}, + {FFX_FSR3UPSCALER_CONSTANTBUFFER_IDENTIFIER_SPD, L"cbSPD"}, + {FFX_FSR3UPSCALER_CONSTANTBUFFER_IDENTIFIER_RCAS, L"cbRCAS"}, + {FFX_FSR3UPSCALER_CONSTANTBUFFER_IDENTIFIER_GENREACTIVE, L"cbGenerateReactive"}, +}; + +typedef struct Fsr3UpscalerRcasConstants { + + uint32_t rcasConfig[4]; +} FfxRcasConstants; + +typedef struct Fsr3UpscalerSpdConstants { + + uint32_t mips; + uint32_t numworkGroups; + uint32_t workGroupOffset[2]; + uint32_t renderSize[2]; +} Fsr3UpscalerSpdConstants; + +typedef struct Fsr3UpscalerGenerateReactiveConstants +{ + float scale; + float threshold; + float binaryValue; + uint32_t flags; + +} Fsr3UpscalerGenerateReactiveConstants; + +typedef struct Fsr3UpscalerGenerateReactiveConstants2 +{ + float autoTcThreshold; + float autoTcScale; + float autoReactiveScale; + float autoReactiveMax; + +} Fsr3UpscalerGenerateReactiveConstants2; + +typedef union Fsr3UpscalerSecondaryUnion { + + Fsr3UpscalerRcasConstants rcas; + Fsr3UpscalerSpdConstants spd; + Fsr3UpscalerGenerateReactiveConstants2 autogenReactive; +} Fsr3UpscalerSecondaryUnion; + +// Lanczos +static float lanczos2(float value) +{ + return abs(value) < FFX_EPSILON ? 1.f : (sinf(FFX_PI * value) / (FFX_PI * value)) * (sinf(0.5f * FFX_PI * value) / (0.5f * FFX_PI * value)); +} + +// Calculate halton number for index and base. +static float halton(int32_t index, int32_t base) +{ + float f = 1.0f, result = 0.0f; + + for (int32_t currentIndex = index; currentIndex > 0;) { + + f /= (float)base; + result = result + f * (float)(currentIndex % base); + currentIndex = (uint32_t)(floorf((float)(currentIndex) / (float)(base))); + } + + return result; +} + +static void fsr3upscalerDebugCheckDispatch(FfxFsr3UpscalerContext_Private* context, const FfxFsr3UpscalerDispatchDescription* params) +{ + if (params->commandList == nullptr) + { + FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_ERROR, L"commandList is null"); + } + + if (params->color.resource == nullptr) + { + FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_ERROR, L"color resource is null"); + } + + if (params->depth.resource == nullptr) + { + FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_ERROR, L"depth resource is null"); + } + + if (params->motionVectors.resource == nullptr) + { + FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_ERROR, L"motionVectors resource is null"); + } + + if (params->exposure.resource != nullptr) + { + if ((context->contextDescription.flags & FFX_FSR3UPSCALER_ENABLE_AUTO_EXPOSURE) == FFX_FSR3UPSCALER_ENABLE_AUTO_EXPOSURE) + { + FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_WARNING, L"exposure resource provided, however auto exposure flag is present"); + } + } + + if (params->output.resource == nullptr) + { + FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_ERROR, L"output resource is null"); + } + + if (fabs(params->jitterOffset.x) > 1.0f || fabs(params->jitterOffset.y) > 1.0f) + { + FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_WARNING, L"jitterOffset contains value outside of expected range [-1.0, 1.0]"); + } + + if ((params->motionVectorScale.x > (float)context->contextDescription.maxRenderSize.width) || + (params->motionVectorScale.y > (float)context->contextDescription.maxRenderSize.height)) + { + FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_WARNING, L"motionVectorScale contains scale value greater than maxRenderSize"); + } + if ((params->motionVectorScale.x == 0.0f) || + (params->motionVectorScale.y == 0.0f)) + { + FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_WARNING, L"motionVectorScale contains zero scale value"); + } + + if ((params->renderSize.width > context->contextDescription.maxRenderSize.width) || + (params->renderSize.height > context->contextDescription.maxRenderSize.height)) + { + FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_WARNING, L"renderSize is greater than context maxRenderSize"); + } + if ((params->renderSize.width == 0) || + (params->renderSize.height == 0)) + { + FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_WARNING, L"renderSize contains zero dimension"); + } + + if (params->sharpness < 0.0f || params->sharpness > 1.0f) + { + FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_WARNING, L"sharpness contains value outside of expected range [0.0, 1.0]"); + } + + if (params->frameTimeDelta < 1.0f) + { + FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_WARNING, L"frameTimeDelta is less than 1.0f - this value should be milliseconds (~16.6f for 60fps)"); + } + + if (params->preExposure == 0.0f) + { + FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_ERROR, L"preExposure provided as 0.0f which is invalid"); + } + + bool infiniteDepth = (context->contextDescription.flags & FFX_FSR3UPSCALER_ENABLE_DEPTH_INFINITE) == FFX_FSR3UPSCALER_ENABLE_DEPTH_INFINITE; + bool inverseDepth = (context->contextDescription.flags & FFX_FSR3UPSCALER_ENABLE_DEPTH_INVERTED) == FFX_FSR3UPSCALER_ENABLE_DEPTH_INVERTED; + + if (inverseDepth) + { + if (params->cameraNear < params->cameraFar) + { + FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_WARNING, + L"FFX_FSR3UPSCALER_ENABLE_DEPTH_INVERTED flag is present yet cameraNear is less than cameraFar"); + } + if (infiniteDepth) + { + if (params->cameraNear != FLT_MAX) + { + FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_WARNING, + L"FFX_FSR3UPSCALER_ENABLE_DEPTH_INFINITE and FFX_FSR3UPSCALER_ENABLE_DEPTH_INVERTED present, yet cameraNear != FLT_MAX"); + } + } + if (params->cameraFar < 0.075f) + { + FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_WARNING, + L"FFX_FSR3UPSCALER_ENABLE_DEPTH_INVERTED present, cameraFar value is very low which may result in depth separation artefacting"); + } + } + else + { + if (params->cameraNear > params->cameraFar) + { + FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_WARNING, + L"cameraNear is greater than cameraFar in non-inverted-depth context"); + } + if (infiniteDepth) + { + if (params->cameraFar != FLT_MAX) + { + FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_WARNING, + L"FFX_FSR3UPSCALER_ENABLE_DEPTH_INFINITE present, yet cameraFar != FLT_MAX"); + } + } + if (params->cameraNear < 0.075f) + { + FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_WARNING, + L"cameraNear value is very low which may result in depth separation artefacting"); + } + } + + if (params->cameraFovAngleVertical <= 0.0f) + { + FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_ERROR, L"cameraFovAngleVertical is 0.0f - this value should be > 0.0f"); + } + if (params->cameraFovAngleVertical > FFX_PI) + { + FFX_PRINT_MESSAGE(FFX_MESSAGE_TYPE_ERROR, L"cameraFovAngleVertical is greater than 180 degrees/PI"); + } +} + +static FfxErrorCode patchResourceBindings(FfxPipelineState* inoutPipeline) +{ + for (uint32_t srvIndex = 0; srvIndex < inoutPipeline->srvTextureCount; ++srvIndex) + { + int32_t mapIndex = 0; + for (mapIndex = 0; mapIndex < _countof(srvTextureBindingTable); ++mapIndex) + { + if (0 == wcscmp(srvTextureBindingTable[mapIndex].name, inoutPipeline->srvTextureBindings[srvIndex].name)) + break; + } + if (mapIndex == _countof(srvTextureBindingTable)) + return FFX_ERROR_INVALID_ARGUMENT; + + inoutPipeline->srvTextureBindings[srvIndex].resourceIdentifier = srvTextureBindingTable[mapIndex].index; + } + + for (uint32_t uavIndex = 0; uavIndex < inoutPipeline->uavTextureCount; ++uavIndex) + { + int32_t mapIndex = 0; + for (mapIndex = 0; mapIndex < _countof(uavTextureBindingTable); ++mapIndex) + { + if (0 == wcscmp(uavTextureBindingTable[mapIndex].name, inoutPipeline->uavTextureBindings[uavIndex].name)) + break; + } + if (mapIndex == _countof(uavTextureBindingTable)) + return FFX_ERROR_INVALID_ARGUMENT; + + inoutPipeline->uavTextureBindings[uavIndex].resourceIdentifier = uavTextureBindingTable[mapIndex].index; + } + + for (uint32_t cbIndex = 0; cbIndex < inoutPipeline->constCount; ++cbIndex) + { + int32_t mapIndex = 0; + for (mapIndex = 0; mapIndex < _countof(constantBufferBindingTable); ++mapIndex) + { + if (0 == wcscmp(constantBufferBindingTable[mapIndex].name, inoutPipeline->constantBufferBindings[cbIndex].name)) + break; + } + if (mapIndex == _countof(constantBufferBindingTable)) + return FFX_ERROR_INVALID_ARGUMENT; + + inoutPipeline->constantBufferBindings[cbIndex].resourceIdentifier = constantBufferBindingTable[mapIndex].index; + } + + return FFX_OK; +} + +static uint32_t getPipelinePermutationFlags(uint32_t contextFlags, FfxFsr3UpscalerPass passId, bool fp16, bool force64, bool useLut) +{ + // work out what permutation to load. + uint32_t flags = 0; + flags |= (contextFlags & FFX_FSR3UPSCALER_ENABLE_HIGH_DYNAMIC_RANGE) ? FSR3UPSCALER_SHADER_PERMUTATION_HDR_COLOR_INPUT : 0; + flags |= (contextFlags & FFX_FSR3UPSCALER_ENABLE_DISPLAY_RESOLUTION_MOTION_VECTORS) ? 0 : FSR3UPSCALER_SHADER_PERMUTATION_LOW_RES_MOTION_VECTORS; + flags |= (contextFlags & FFX_FSR3UPSCALER_ENABLE_MOTION_VECTORS_JITTER_CANCELLATION) ? FSR3UPSCALER_SHADER_PERMUTATION_JITTER_MOTION_VECTORS : 0; + flags |= (contextFlags & FFX_FSR3UPSCALER_ENABLE_DEPTH_INVERTED) ? FSR3UPSCALER_SHADER_PERMUTATION_DEPTH_INVERTED : 0; + flags |= (passId == FFX_FSR3UPSCALER_PASS_ACCUMULATE_SHARPEN) ? FSR3UPSCALER_SHADER_PERMUTATION_ENABLE_SHARPENING : 0; + flags |= (useLut) ? FSR3UPSCALER_SHADER_PERMUTATION_USE_LANCZOS_TYPE : 0; + flags |= (force64) ? FSR3UPSCALER_SHADER_PERMUTATION_FORCE_WAVE64 : 0; +#if defined(_GAMING_XBOX) + /** On Xbox we enable 16-bit math, and use 32-bit within the shader only where it's necessary. */ + flags |= (fp16) ? FSR3UPSCALER_SHADER_PERMUTATION_ALLOW_FP16 : 0; +#else + flags |= (fp16 && (passId != FFX_FSR3UPSCALER_PASS_RCAS)) ? FSR3UPSCALER_SHADER_PERMUTATION_ALLOW_FP16 : 0; +#endif // defined(_GAMING_XBOX) + return flags; +} + +static FfxErrorCode createPipelineStates(FfxFsr3UpscalerContext_Private* context) +{ + FFX_ASSERT(context); + + FfxPipelineDescription pipelineDescription = {}; + pipelineDescription.contextFlags = context->contextDescription.flags; + pipelineDescription.stage = FFX_BIND_COMPUTE_SHADER_STAGE; + + // Samplers + pipelineDescription.samplerCount = 2; + FfxSamplerDescription samplerDescs[2] = { { FFX_FILTER_TYPE_MINMAGMIP_POINT, FFX_ADDRESS_MODE_CLAMP, FFX_ADDRESS_MODE_CLAMP, FFX_ADDRESS_MODE_CLAMP, FFX_BIND_COMPUTE_SHADER_STAGE }, + { FFX_FILTER_TYPE_MINMAGMIP_LINEAR, FFX_ADDRESS_MODE_CLAMP, FFX_ADDRESS_MODE_CLAMP, FFX_ADDRESS_MODE_CLAMP, FFX_BIND_COMPUTE_SHADER_STAGE} }; + pipelineDescription.samplers = samplerDescs; + + // Root constants + pipelineDescription.rootConstantBufferCount = 2; + FfxRootConstantDescription rootConstantDescs[2] = { {sizeof(Fsr3UpscalerConstants) / sizeof(uint32_t), FFX_BIND_COMPUTE_SHADER_STAGE }, + { sizeof(Fsr3UpscalerSecondaryUnion) / sizeof(uint32_t), FFX_BIND_COMPUTE_SHADER_STAGE } }; + pipelineDescription.rootConstants = rootConstantDescs; + + // Query device capabilities + FfxDeviceCapabilities capabilities; + context->contextDescription.backendInterface.fpGetDeviceCapabilities(&context->contextDescription.backendInterface, &capabilities); + + // Setup a few options used to determine permutation flags + bool haveShaderModel66 = capabilities.maximumSupportedShaderModel >= FFX_SHADER_MODEL_6_6; + bool supportedFP16 = capabilities.fp16Supported; + bool canForceWave64 = false; + bool useLut = false; + + const uint32_t waveLaneCountMin = capabilities.waveLaneCountMin; + const uint32_t waveLaneCountMax = capabilities.waveLaneCountMax; + if (waveLaneCountMin == 32 && waveLaneCountMax == 64) + { + useLut = true; + canForceWave64 = haveShaderModel66; + } + else + { + canForceWave64 = false; + } + + // Work out what permutation to load. + uint32_t contextFlags = context->contextDescription.flags; + + // Set up pipeline descriptor (basically RootSignature and binding) + wcscpy_s(pipelineDescription.name, L"FSR3-LUMA-PYRAMID"); + FFX_VALIDATE(context->contextDescription.backendInterface.fpCreatePipeline(&context->contextDescription.backendInterface, FFX_EFFECT_FSR3UPSCALER, FFX_FSR3UPSCALER_PASS_LUMA_PYRAMID, + getPipelinePermutationFlags(contextFlags, FFX_FSR3UPSCALER_PASS_LUMA_PYRAMID, supportedFP16, canForceWave64, useLut), + &pipelineDescription, context->effectContextId, &context->pipelineLumaPyramid)); + wcscpy_s(pipelineDescription.name, L"FSR3-RCAS"); + FFX_VALIDATE(context->contextDescription.backendInterface.fpCreatePipeline(&context->contextDescription.backendInterface, FFX_EFFECT_FSR3UPSCALER, FFX_FSR3UPSCALER_PASS_RCAS, + getPipelinePermutationFlags(contextFlags, FFX_FSR3UPSCALER_PASS_RCAS, supportedFP16, canForceWave64, useLut), + &pipelineDescription, context->effectContextId, &context->pipelineRCAS)); + wcscpy_s(pipelineDescription.name, L"FSR3-GEN_REACTIVE"); + FFX_VALIDATE(context->contextDescription.backendInterface.fpCreatePipeline(&context->contextDescription.backendInterface, FFX_EFFECT_FSR3UPSCALER, FFX_FSR3UPSCALER_PASS_GENERATE_REACTIVE, + getPipelinePermutationFlags(contextFlags, FFX_FSR3UPSCALER_PASS_GENERATE_REACTIVE, supportedFP16, canForceWave64, useLut), + &pipelineDescription, context->effectContextId, &context->pipelineGenerateReactive)); + + pipelineDescription.rootConstantBufferCount = 1; + + wcscpy_s(pipelineDescription.name, L"FSR3-PREPARE-INPUTS"); + FFX_VALIDATE(context->contextDescription.backendInterface.fpCreatePipeline(&context->contextDescription.backendInterface, FFX_EFFECT_FSR3UPSCALER, FFX_FSR3UPSCALER_PASS_PREPARE_INPUTS, + getPipelinePermutationFlags(contextFlags, FFX_FSR3UPSCALER_PASS_PREPARE_INPUTS, supportedFP16, canForceWave64, useLut), + &pipelineDescription, context->effectContextId, &context->pipelinePrepareInputs)); + + wcscpy_s(pipelineDescription.name, L"FSR3-PREPARE-REACTIVITY"); + FFX_VALIDATE(context->contextDescription.backendInterface.fpCreatePipeline(&context->contextDescription.backendInterface, FFX_EFFECT_FSR3UPSCALER, FFX_FSR3UPSCALER_PASS_PREPARE_REACTIVITY, + getPipelinePermutationFlags(contextFlags, FFX_FSR3UPSCALER_PASS_PREPARE_REACTIVITY, supportedFP16, canForceWave64, useLut), + &pipelineDescription, context->effectContextId, &context->pipelinePrepareReactivity)); + + wcscpy_s(pipelineDescription.name, L"FSR3-SHADING-CHANGE"); + FFX_VALIDATE(context->contextDescription.backendInterface.fpCreatePipeline(&context->contextDescription.backendInterface, FFX_EFFECT_FSR3UPSCALER, FFX_FSR3UPSCALER_PASS_SHADING_CHANGE, + getPipelinePermutationFlags(contextFlags, FFX_FSR3UPSCALER_PASS_SHADING_CHANGE, supportedFP16, canForceWave64, useLut), + &pipelineDescription, context->effectContextId, &context->pipelineShadingChange)); + + wcscpy_s(pipelineDescription.name, L"FSR3-ACCUMULATE"); + FFX_VALIDATE(context->contextDescription.backendInterface.fpCreatePipeline(&context->contextDescription.backendInterface, FFX_EFFECT_FSR3UPSCALER, FFX_FSR3UPSCALER_PASS_ACCUMULATE, + getPipelinePermutationFlags(contextFlags, FFX_FSR3UPSCALER_PASS_ACCUMULATE, supportedFP16, canForceWave64, useLut), + &pipelineDescription, context->effectContextId, &context->pipelineAccumulate)); + + wcscpy_s(pipelineDescription.name, L"FSR3-ACCUM_SHARP"); + FFX_VALIDATE(context->contextDescription.backendInterface.fpCreatePipeline(&context->contextDescription.backendInterface, FFX_EFFECT_FSR3UPSCALER, FFX_FSR3UPSCALER_PASS_ACCUMULATE_SHARPEN, + getPipelinePermutationFlags(contextFlags, FFX_FSR3UPSCALER_PASS_ACCUMULATE_SHARPEN, supportedFP16, canForceWave64, useLut), + &pipelineDescription, context->effectContextId, &context->pipelineAccumulateSharpen)); + + wcscpy_s(pipelineDescription.name, L"FSR3-SHADING-CHANGE-PYRAMID"); + FFX_VALIDATE(context->contextDescription.backendInterface.fpCreatePipeline(&context->contextDescription.backendInterface, FFX_EFFECT_FSR3UPSCALER, FFX_FSR3UPSCALER_PASS_SHADING_CHANGE_PYRAMID, + getPipelinePermutationFlags(contextFlags, FFX_FSR3UPSCALER_PASS_SHADING_CHANGE_PYRAMID, supportedFP16, canForceWave64, useLut), + &pipelineDescription, context->effectContextId, &context->pipelineShadingChangePyramid)); + + wcscpy_s(pipelineDescription.name, L"FSR3-LUMA-INSTABILITY"); + FFX_VALIDATE(context->contextDescription.backendInterface.fpCreatePipeline(&context->contextDescription.backendInterface, FFX_EFFECT_FSR3UPSCALER, FFX_FSR3UPSCALER_PASS_LUMA_INSTABILITY, + getPipelinePermutationFlags(contextFlags, FFX_FSR3UPSCALER_PASS_LUMA_INSTABILITY, supportedFP16, canForceWave64, useLut), + &pipelineDescription, context->effectContextId, &context->pipelineLumaInstability)); + + wcscpy_s(pipelineDescription.name, L"FSR3-DEBUG-VIEW"); + FFX_VALIDATE(context->contextDescription.backendInterface.fpCreatePipeline(&context->contextDescription.backendInterface, FFX_EFFECT_FSR3UPSCALER, FFX_FSR3UPSCALER_PASS_DEBUG_VIEW, + getPipelinePermutationFlags(contextFlags, FFX_FSR3UPSCALER_PASS_DEBUG_VIEW, supportedFP16, canForceWave64, useLut), + &pipelineDescription, context->effectContextId, &context->pipelineDebugView)); + + // for each pipeline: re-route/fix-up IDs based on names + FFX_VALIDATE(patchResourceBindings(&context->pipelinePrepareInputs)); + FFX_VALIDATE(patchResourceBindings(&context->pipelinePrepareReactivity)); + FFX_VALIDATE(patchResourceBindings(&context->pipelineShadingChange)); + FFX_VALIDATE(patchResourceBindings(&context->pipelineAccumulate)); + FFX_VALIDATE(patchResourceBindings(&context->pipelineLumaPyramid)); + FFX_VALIDATE(patchResourceBindings(&context->pipelineAccumulateSharpen)); + FFX_VALIDATE(patchResourceBindings(&context->pipelineRCAS)); + FFX_VALIDATE(patchResourceBindings(&context->pipelineGenerateReactive)); + FFX_VALIDATE(patchResourceBindings(&context->pipelineTcrAutogenerate)); + FFX_VALIDATE(patchResourceBindings(&context->pipelineShadingChangePyramid)); + FFX_VALIDATE(patchResourceBindings(&context->pipelineLumaInstability)); + FFX_VALIDATE(patchResourceBindings(&context->pipelineDebugView)); + + return FFX_OK; +} + +static FfxErrorCode generateReactiveMaskInternal(FfxFsr3UpscalerContext_Private* contextPrivate, const FfxFsr3UpscalerDispatchDescription* params); + +static FfxErrorCode fsr3upscalerCreate(FfxFsr3UpscalerContext_Private* context, const FfxFsr3UpscalerContextDescription* contextDescription) +{ + FFX_ASSERT(context); + FFX_ASSERT(contextDescription); + + // Setup the data for implementation. + memset(context, 0, sizeof(FfxFsr3UpscalerContext_Private)); + context->device = contextDescription->backendInterface.device; + + memcpy(&context->contextDescription, contextDescription, sizeof(FfxFsr3UpscalerContextDescription)); + + // Check version info - make sure we are linked with the right backend version + FfxVersionNumber version = context->contextDescription.backendInterface.fpGetSDKVersion(&context->contextDescription.backendInterface); + FFX_RETURN_ON_ERROR(version == FFX_SDK_MAKE_VERSION(1, 1, 4), FFX_ERROR_INVALID_VERSION); + + // Create the context. + FfxErrorCode errorCode = context->contextDescription.backendInterface.fpCreateBackendContext(&context->contextDescription.backendInterface, FFX_EFFECT_FSR3UPSCALER, nullptr, &context->effectContextId); + FFX_RETURN_ON_ERROR(errorCode == FFX_OK, errorCode); + + // call out for device caps. + errorCode = context->contextDescription.backendInterface.fpGetDeviceCapabilities(&context->contextDescription.backendInterface, &context->deviceCapabilities); + FFX_RETURN_ON_ERROR(errorCode == FFX_OK, errorCode); + + // set defaults + context->firstExecution = true; + context->resourceFrameIndex = 0; + + context->constants.maxUpscaleSize[0] = contextDescription->maxUpscaleSize.width; + context->constants.maxUpscaleSize[1] = contextDescription->maxUpscaleSize.height; + context->constants.velocityFactor = 1.0f; + context->constants.reactivenessScale = 1.0f; + context->constants.shadingChangeScale = 1.0f; + context->constants.accumulationAddedPerFrame = 1.0f/3.0f; + context->constants.minDisocclusionAccumulation = -1.0f/3.0f; + + // generate the data for the LUT. + const uint32_t lanczos2LutWidth = 128; + int16_t lanczos2Weights[lanczos2LutWidth] = { }; + + for (uint32_t currentLanczosWidthIndex = 0; currentLanczosWidthIndex < lanczos2LutWidth; currentLanczosWidthIndex++) { + + const float x = 2.0f * currentLanczosWidthIndex / float(lanczos2LutWidth - 1); + const float y = lanczos2(x); + lanczos2Weights[currentLanczosWidthIndex] = int16_t(roundf(y * 32767.0f)); + } + + uint8_t defaultReactiveMaskData = 0U; + uint32_t atomicInitData = 0U; + float defaultExposure[] = { 0.0f, 0.0f }; + + const FfxDimensions2D maxRenderSizeDiv2 = { contextDescription->maxRenderSize.width / 2, contextDescription->maxRenderSize.height / 2 }; + + // declare internal resources needed + const FfxInternalResourceDescription internalSurfaceDesc[] = { + + { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_ACCUMULATION_1, L"FSR3UPSCALER_Accumulation1", FFX_RESOURCE_TYPE_TEXTURE2D, (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV), + FFX_SURFACE_FORMAT_R8_UNORM, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} }, + + { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_ACCUMULATION_2, L"FSR3UPSCALER_Accumulation2", FFX_RESOURCE_TYPE_TEXTURE2D, (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV), + FFX_SURFACE_FORMAT_R8_UNORM, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} }, + + { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_1, L"FSR3UPSCALER_Luma1", FFX_RESOURCE_TYPE_TEXTURE2D, (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV), + FFX_SURFACE_FORMAT_R16_FLOAT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} }, + + { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_2, L"FSR3UPSCALER_Luma2", FFX_RESOURCE_TYPE_TEXTURE2D, (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV), + FFX_SURFACE_FORMAT_R16_FLOAT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} }, + + { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERMEDIATE_FP16x1, L"FSR3UPSCALER_IntermediateFp16x1", FFX_RESOURCE_TYPE_TEXTURE2D, (FfxResourceUsage)(FFX_RESOURCE_USAGE_UAV | FFX_RESOURCE_USAGE_DCC_RENDERTARGET), + FFX_SURFACE_FORMAT_R16_FLOAT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} }, + + { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SHADING_CHANGE, L"FSR3UPSCALER_ShadingChange", FFX_RESOURCE_TYPE_TEXTURE2D, (FfxResourceUsage)(FFX_RESOURCE_USAGE_UAV | FFX_RESOURCE_USAGE_DCC_RENDERTARGET), + FFX_SURFACE_FORMAT_R8_UNORM, maxRenderSizeDiv2.width, maxRenderSizeDiv2.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} }, + + { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_NEW_LOCKS, L"FSR3UPSCALER_NewLocks", FFX_RESOURCE_TYPE_TEXTURE2D, (FfxResourceUsage)(FFX_RESOURCE_USAGE_UAV), + FFX_SURFACE_FORMAT_R8_UNORM, contextDescription->maxUpscaleSize.width, contextDescription->maxUpscaleSize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} }, + + { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_1, L"FSR3UPSCALER_InternalUpscaled1", FFX_RESOURCE_TYPE_TEXTURE2D, (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV | FFX_RESOURCE_USAGE_DCC_RENDERTARGET), + FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT, contextDescription->maxUpscaleSize.width, contextDescription->maxUpscaleSize.height, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} }, + + { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_2, L"FSR3UPSCALER_InternalUpscaled2", FFX_RESOURCE_TYPE_TEXTURE2D, (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV | FFX_RESOURCE_USAGE_DCC_RENDERTARGET), + FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT, contextDescription->maxUpscaleSize.width, contextDescription->maxUpscaleSize.height, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} }, + + { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SPD_MIPS, L"FSR3UPSCALER_SpdMips", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV, + FFX_SURFACE_FORMAT_R16G16_FLOAT, maxRenderSizeDiv2.width, maxRenderSizeDiv2.height, 0, FFX_RESOURCE_FLAGS_ALIASABLE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} }, + + { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_FARTHEST_DEPTH_MIP1, L"FSR3UPSCALER_FarthestDepthMip1", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV, + FFX_SURFACE_FORMAT_R16_FLOAT, maxRenderSizeDiv2.width, maxRenderSizeDiv2.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} }, + + { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_HISTORY_1, L"FSR3UPSCALER_LumaHistory1", FFX_RESOURCE_TYPE_TEXTURE2D, (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV), + FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} }, + + { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_HISTORY_2, L"FSR3UPSCALER_LumaHistory2", FFX_RESOURCE_TYPE_TEXTURE2D, (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV), + FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} }, + + { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SPD_ATOMIC_COUNT, L"FSR3UPSCALER_SpdAtomicCounter", FFX_RESOURCE_TYPE_TEXTURE2D, (FfxResourceUsage)(FFX_RESOURCE_USAGE_UAV), + FFX_SURFACE_FORMAT_R32_UINT, 1, 1, 1, FFX_RESOURCE_FLAGS_NONE, FfxResourceInitData::FfxResourceInitValue(sizeof(atomicInitData), 0) }, + + { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS, L"FSR3UPSCALER_DilatedReactiveMasks", FFX_RESOURCE_TYPE_TEXTURE2D, (FfxResourceUsage)(FFX_RESOURCE_USAGE_UAV | FFX_RESOURCE_USAGE_DCC_RENDERTARGET), + FFX_SURFACE_FORMAT_R8G8B8A8_UNORM, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} }, + + { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LANCZOS_LUT, L"FSR3UPSCALER_LanczosLutData", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_READ_ONLY, + FFX_SURFACE_FORMAT_R16_SNORM, lanczos2LutWidth, 1, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_BUFFER, sizeof(lanczos2Weights), lanczos2Weights} }, + + { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_REACTIVITY, L"FSR3UPSCALER_DefaultReactivityMask", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_READ_ONLY, + FFX_SURFACE_FORMAT_R8_UNORM, 1, 1, 1, FFX_RESOURCE_FLAGS_NONE, FfxResourceInitData::FfxResourceInitValue(sizeof(defaultReactiveMaskData), defaultReactiveMaskData) }, + + { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_EXPOSURE, L"FSR3UPSCALER_DefaultExposure", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_READ_ONLY, + FFX_SURFACE_FORMAT_R32G32_FLOAT, 1, 1, 1, FFX_RESOURCE_FLAGS_NONE, FfxResourceInitData::FfxResourceInitBuffer(sizeof(defaultExposure), defaultExposure) }, + + { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_FRAME_INFO, L"FSR3UPSCALER_FrameInfo", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV, + FFX_SURFACE_FORMAT_R32G32B32A32_FLOAT, 1, 1, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} }, + + }; + + // clear the SRV resources to NULL. + memset(context->srvResources, 0, sizeof(context->srvResources)); + + for (int32_t currentSurfaceIndex = 0; currentSurfaceIndex < FFX_ARRAY_ELEMENTS(internalSurfaceDesc); ++currentSurfaceIndex) { + + const FfxInternalResourceDescription* currentSurfaceDescription = &internalSurfaceDesc[currentSurfaceIndex]; + const FfxResourceType resourceType = internalSurfaceDesc[currentSurfaceIndex].type; + const FfxResourceDescription resourceDescription = {resourceType, + currentSurfaceDescription->format, + currentSurfaceDescription->width, + currentSurfaceDescription->height, + 1, + currentSurfaceDescription->mipCount, + currentSurfaceDescription->flags, + currentSurfaceDescription->usage}; + const FfxResourceStates initialState = (currentSurfaceDescription->usage == FFX_RESOURCE_USAGE_READ_ONLY) ? FFX_RESOURCE_STATE_COMPUTE_READ : FFX_RESOURCE_STATE_UNORDERED_ACCESS; + const FfxCreateResourceDescription createResourceDescription = { FFX_HEAP_TYPE_DEFAULT, resourceDescription, initialState, currentSurfaceDescription->name, currentSurfaceDescription->id, currentSurfaceDescription->initData }; + + FFX_VALIDATE(context->contextDescription.backendInterface.fpCreateResource(&context->contextDescription.backendInterface, &createResourceDescription, context->effectContextId, &context->srvResources[currentSurfaceDescription->id])); + } + + // copy resources to uavResrouces list + memcpy(context->uavResources, context->srvResources, sizeof(context->srvResources)); + + // avoid compiling pipelines on first render + { + errorCode = createPipelineStates(context); + FFX_RETURN_ON_ERROR(errorCode == FFX_OK, errorCode); + } + + return FFX_OK; +} + +static FfxErrorCode fsr3upscalerRelease(FfxFsr3UpscalerContext_Private* context) +{ + FFX_ASSERT(context); + + ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelinePrepareInputs, context->effectContextId); + ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelinePrepareReactivity, context->effectContextId); + ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineShadingChange, context->effectContextId); + ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineAccumulate, context->effectContextId); + ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineAccumulateSharpen, context->effectContextId); + ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineRCAS, context->effectContextId); + ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineLumaPyramid, context->effectContextId); + ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineGenerateReactive, context->effectContextId); + ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineTcrAutogenerate, context->effectContextId); + ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineShadingChangePyramid, context->effectContextId); + ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineLumaInstability, context->effectContextId); + ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineDebugView, context->effectContextId); + + // Unregister external resources + context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_TRANSPARENCY_AND_COMPOSITION_MASK] = { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_NULL }; + context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH] = { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_NULL }; + context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS] = { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_NULL }; + context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_DILATED_DEPTH] = { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_NULL }; + context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_OPAQUE_ONLY] = { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_NULL }; + context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_COLOR] = { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_NULL }; + context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_DEPTH] = { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_NULL }; + context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_MOTION_VECTORS] = { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_NULL }; + context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_EXPOSURE] = { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_NULL }; + context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK] = { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_NULL }; + + // Unregister references + context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_ACCUMULATION] = { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_NULL }; + context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR] = { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_NULL }; + context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_RCAS_INPUT] = { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_NULL }; + context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT] = { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_NULL }; + context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_CURRENT_LUMA] = { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_NULL }; + context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_PREVIOUS_LUMA] = { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_NULL }; + context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_FARTHEST_DEPTH] = { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_NULL }; + context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_INSTABILITY] = { FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_NULL }; + + // Release the copy resources for those that had init data + ffxSafeReleaseCopyResource(&context->contextDescription.backendInterface, context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SPD_ATOMIC_COUNT], context->effectContextId); + ffxSafeReleaseCopyResource(&context->contextDescription.backendInterface, context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LANCZOS_LUT], context->effectContextId); + ffxSafeReleaseCopyResource(&context->contextDescription.backendInterface, context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_REACTIVITY], context->effectContextId); + ffxSafeReleaseCopyResource(&context->contextDescription.backendInterface, context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_EXPOSURE], context->effectContextId); + + // release internal resources + for (int32_t currentResourceIndex = 0; currentResourceIndex < FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_COUNT; ++currentResourceIndex) { + + ffxSafeReleaseResource(&context->contextDescription.backendInterface, context->srvResources[currentResourceIndex], context->effectContextId); + } + + // Destroy the context + context->contextDescription.backendInterface.fpDestroyBackendContext(&context->contextDescription.backendInterface, context->effectContextId); + + return FFX_OK; +} + +static void setupDeviceDepthToViewSpaceDepthParams(FfxFsr3UpscalerContext_Private* context, const FfxFsr3UpscalerDispatchDescription* params) +{ + const bool bInverted = (context->contextDescription.flags & FFX_FSR3UPSCALER_ENABLE_DEPTH_INVERTED) == FFX_FSR3UPSCALER_ENABLE_DEPTH_INVERTED; + const bool bInfinite = (context->contextDescription.flags & FFX_FSR3UPSCALER_ENABLE_DEPTH_INFINITE) == FFX_FSR3UPSCALER_ENABLE_DEPTH_INFINITE; + + // make sure it has no impact if near and far plane values are swapped in dispatch params + // the flags "inverted" and "infinite" will decide what transform to use + float fMin = FFX_MINIMUM(params->cameraNear, params->cameraFar); + float fMax = FFX_MAXIMUM(params->cameraNear, params->cameraFar); + + if (bInverted) { + float tmp = fMin; + fMin = fMax; + fMax = tmp; + } + + // a 0 0 0 x + // 0 b 0 0 y + // 0 0 c d z + // 0 0 e 0 1 + + const float fQ = fMax / (fMin - fMax); + const float d = -1.0f; // for clarity + + const float matrix_elem_c[2][2] = { + fQ, // non reversed, non infinite + -1.0f - FLT_EPSILON, // non reversed, infinite + fQ, // reversed, non infinite + 0.0f + FLT_EPSILON // reversed, infinite + }; + + const float matrix_elem_e[2][2] = { + fQ * fMin, // non reversed, non infinite + -fMin - FLT_EPSILON, // non reversed, infinite + fQ * fMin, // reversed, non infinite + fMax, // reversed, infinite + }; + + context->constants.deviceToViewDepth[0] = d * matrix_elem_c[bInverted][bInfinite]; + context->constants.deviceToViewDepth[1] = matrix_elem_e[bInverted][bInfinite]; + + // revert x and y coords + const float aspect = params->renderSize.width / float(params->renderSize.height); + const float cotHalfFovY = cosf(0.5f * params->cameraFovAngleVertical) / sinf(0.5f * params->cameraFovAngleVertical); + const float a = cotHalfFovY / aspect; + const float b = cotHalfFovY; + + context->constants.deviceToViewDepth[2] = (1.0f / a); + context->constants.deviceToViewDepth[3] = (1.0f / b); +} + +static void scheduleDispatch(FfxFsr3UpscalerContext_Private* context, const FfxFsr3UpscalerDispatchDescription*, const FfxPipelineState* pipeline, uint32_t dispatchX, uint32_t dispatchY) +{ + FfxComputeJobDescription jobDescriptor = {}; + + for (uint32_t currentShaderResourceViewIndex = 0; currentShaderResourceViewIndex < pipeline->srvTextureCount; ++currentShaderResourceViewIndex) { + + const uint32_t currentResourceId = pipeline->srvTextureBindings[currentShaderResourceViewIndex].resourceIdentifier; + const FfxResourceInternal currentResource = context->srvResources[currentResourceId]; + jobDescriptor.srvTextures[currentShaderResourceViewIndex].resource = currentResource; +#ifdef FFX_DEBUG + wcscpy_s(jobDescriptor.srvTextures[currentShaderResourceViewIndex].name, pipeline->srvTextureBindings[currentShaderResourceViewIndex].name); +#endif + } + + for (uint32_t currentUnorderedAccessViewIndex = 0; currentUnorderedAccessViewIndex < pipeline->uavTextureCount; ++currentUnorderedAccessViewIndex) { + + const uint32_t currentResourceId = pipeline->uavTextureBindings[currentUnorderedAccessViewIndex].resourceIdentifier; +#ifdef FFX_DEBUG + wcscpy_s(jobDescriptor.uavTextures[currentUnorderedAccessViewIndex].name, pipeline->uavTextureBindings[currentUnorderedAccessViewIndex].name); +#endif + + if (currentResourceId >= FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SPD_MIPS_LEVEL_0 && currentResourceId <= FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SPD_MIPS_LEVEL_5) + { + const FfxResourceInternal currentResource = context->uavResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SPD_MIPS]; + jobDescriptor.uavTextures[currentUnorderedAccessViewIndex].resource = currentResource; + jobDescriptor.uavTextures[currentUnorderedAccessViewIndex].mip = currentResourceId - FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SPD_MIPS_LEVEL_0; + } + else + { + const FfxResourceInternal currentResource = context->uavResources[currentResourceId]; + jobDescriptor.uavTextures[currentUnorderedAccessViewIndex].resource = currentResource; + jobDescriptor.uavTextures[currentUnorderedAccessViewIndex].mip = 0; + } + } + + jobDescriptor.dimensions[0] = dispatchX; + jobDescriptor.dimensions[1] = dispatchY; + jobDescriptor.dimensions[2] = 1; + jobDescriptor.pipeline = *pipeline; + + for (uint32_t currentRootConstantIndex = 0; currentRootConstantIndex < pipeline->constCount; ++currentRootConstantIndex) { +#ifdef FFX_DEBUG + wcscpy_s( jobDescriptor.cbNames[currentRootConstantIndex], pipeline->constantBufferBindings[currentRootConstantIndex].name); +#endif + jobDescriptor.cbs[currentRootConstantIndex] = context->constantBuffers[pipeline->constantBufferBindings[currentRootConstantIndex].resourceIdentifier]; + } + + FfxGpuJobDescription dispatchJob = { FFX_GPU_JOB_COMPUTE }; + wcscpy_s(dispatchJob.jobLabel, pipeline->name); + dispatchJob.computeJobDescriptor = jobDescriptor; + + context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &dispatchJob); +} + +FFX_API FfxErrorCode ffxFsr3UpscalerGetSharedResourceDescriptions(FfxFsr3UpscalerContext* context, FfxFsr3UpscalerSharedResourceDescriptions* SharedResources) +{ + FFX_RETURN_ON_ERROR( + context, + FFX_ERROR_INVALID_POINTER); + FFX_RETURN_ON_ERROR( + SharedResources, + FFX_ERROR_INVALID_POINTER); + + FfxFsr3UpscalerContext_Private* contextPrivate = (FfxFsr3UpscalerContext_Private*)(context); + SharedResources->dilatedDepth = { FFX_HEAP_TYPE_DEFAULT, { FFX_RESOURCE_TYPE_TEXTURE2D, FFX_SURFACE_FORMAT_R32_FLOAT, contextPrivate->contextDescription.maxRenderSize.width, contextPrivate->contextDescription.maxRenderSize.height, 1, 1, FFX_RESOURCE_FLAGS_NONE, (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV | FFX_RESOURCE_USAGE_DCC_RENDERTARGET) }, + FFX_RESOURCE_STATE_UNORDERED_ACCESS, L"FSR3UPSCALER_DilatedDepth", FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_DILATED_DEPTH, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} }; + SharedResources->dilatedMotionVectors = { FFX_HEAP_TYPE_DEFAULT, { FFX_RESOURCE_TYPE_TEXTURE2D, FFX_SURFACE_FORMAT_R16G16_FLOAT, contextPrivate->contextDescription.maxRenderSize.width, contextPrivate->contextDescription.maxRenderSize.height, 1, 1, FFX_RESOURCE_FLAGS_NONE, (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV | FFX_RESOURCE_USAGE_DCC_RENDERTARGET) }, + FFX_RESOURCE_STATE_UNORDERED_ACCESS, L"FSR3UPSCALER_DilatedVelocity", FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} }; + SharedResources->reconstructedPrevNearestDepth = { FFX_HEAP_TYPE_DEFAULT, { FFX_RESOURCE_TYPE_TEXTURE2D, FFX_SURFACE_FORMAT_R32_UINT, contextPrivate->contextDescription.maxRenderSize.width, contextPrivate->contextDescription.maxRenderSize.height, 1, 1, FFX_RESOURCE_FLAGS_NONE, (FfxResourceUsage)(FFX_RESOURCE_USAGE_UAV) }, + FFX_RESOURCE_STATE_UNORDERED_ACCESS, L"FSR3UPSCALER_ReconstructedPrevNearestDepth", FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} }; + + return FFX_OK; +} + +static FfxErrorCode fsr3upscalerDispatch(FfxFsr3UpscalerContext_Private* context, const FfxFsr3UpscalerDispatchDescription* params) +{ + + if ((context->contextDescription.flags & FFX_FSR3UPSCALER_ENABLE_DEBUG_CHECKING) == FFX_FSR3UPSCALER_ENABLE_DEBUG_CHECKING) + { + fsr3upscalerDebugCheckDispatch(context, params); + } + + // take a short cut to the command list + FfxCommandList commandList = params->commandList; + + if (context->firstExecution) + { + FfxGpuJobDescription clearJob = { FFX_GPU_JOB_CLEAR_FLOAT }; + + const float clearValuesToZeroFloat[]{ 0.f, 0.f, 0.f, 0.f }; + memcpy(clearJob.clearJobDescriptor.color, clearValuesToZeroFloat, 4 * sizeof(float)); + + wcscpy_s(clearJob.jobLabel, L"Clear Accumulation 1"); + clearJob.clearJobDescriptor.target = context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_ACCUMULATION_1]; + context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &clearJob); + wcscpy_s(clearJob.jobLabel, L"Clear Accumulation 2"); + clearJob.clearJobDescriptor.target = context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_ACCUMULATION_2]; + context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &clearJob); + + wcscpy_s(clearJob.jobLabel, L"Clear Temporal Luma 1"); + clearJob.clearJobDescriptor.target = context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_1]; + context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &clearJob); + wcscpy_s(clearJob.jobLabel, L"Clear Temporal Luma 2"); + clearJob.clearJobDescriptor.target = context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_2]; + context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &clearJob); + } + + // Prepare per frame descriptor tables + const bool isOddFrame = !!(context->resourceFrameIndex & 1); + const uint32_t currentCpuOnlyTableBase = isOddFrame ? FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_COUNT : 0; + const uint32_t currentGpuTableBase = 2 * FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_COUNT * context->resourceFrameIndex; + const uint32_t accumulationSrvResourceIndex = isOddFrame ? FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_ACCUMULATION_2 : FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_ACCUMULATION_1; + const uint32_t accumulationUavResourceIndex = isOddFrame ? FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_ACCUMULATION_1 : FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_ACCUMULATION_2; + const uint32_t upscaledColorSrvResourceIndex = isOddFrame ? FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_2 : FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_1; + const uint32_t upscaledColorUavResourceIndex = isOddFrame ? FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_1 : FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_2; + const uint32_t lumaHistorySrvResourceIndex = isOddFrame ? FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_HISTORY_2 : FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_HISTORY_1; + const uint32_t lumaHistoryUavResourceIndex = isOddFrame ? FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_HISTORY_1 : FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_HISTORY_2; + const uint32_t currentLumaSrvResourceIndex = isOddFrame ? FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_2 : FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_1; + const uint32_t currentLumaUavResourceIndex = currentLumaSrvResourceIndex; + const uint32_t previousLumaSrvResourceIndex = isOddFrame ? FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_1 : FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_2; + + const bool resetAccumulation = params->reset || context->firstExecution; + context->firstExecution = false; + + context->contextDescription.backendInterface.fpRegisterResource(&context->contextDescription.backendInterface, ¶ms->color, context->effectContextId, &context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_COLOR]); + context->contextDescription.backendInterface.fpRegisterResource(&context->contextDescription.backendInterface, ¶ms->depth, context->effectContextId, &context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_DEPTH]); + context->contextDescription.backendInterface.fpRegisterResource(&context->contextDescription.backendInterface, ¶ms->motionVectors, context->effectContextId, &context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_MOTION_VECTORS]); + + context->contextDescription.backendInterface.fpRegisterResource(&context->contextDescription.backendInterface, ¶ms->dilatedMotionVectors, context->effectContextId, &context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS]); + context->uavResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS] = context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS]; + context->contextDescription.backendInterface.fpRegisterResource(&context->contextDescription.backendInterface, ¶ms->dilatedDepth, context->effectContextId, &context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_DILATED_DEPTH]); + context->uavResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_DILATED_DEPTH] = context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_DILATED_DEPTH]; + context->contextDescription.backendInterface.fpRegisterResource(&context->contextDescription.backendInterface, ¶ms->reconstructedPrevNearestDepth, context->effectContextId, &context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH]); + context->uavResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH] = context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH]; + + // if auto exposure is enabled use the auto exposure SRV, otherwise what the app sends. + if (context->contextDescription.flags & FFX_FSR3UPSCALER_ENABLE_AUTO_EXPOSURE) { + context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_EXPOSURE] = context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_FRAME_INFO]; + } else { + if (ffxFsr3UpscalerResourceIsNull(params->exposure)) { + context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_EXPOSURE] = context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_EXPOSURE]; + } else { + context->contextDescription.backendInterface.fpRegisterResource(&context->contextDescription.backendInterface, ¶ms->exposure, context->effectContextId, &context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_EXPOSURE]); + } + } + + if (ffxFsr3UpscalerResourceIsNull(params->reactive)) { + context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK] = context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_REACTIVITY]; + } + else { + context->contextDescription.backendInterface.fpRegisterResource(&context->contextDescription.backendInterface, ¶ms->reactive, context->effectContextId, &context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK]); + } + + if (ffxFsr3UpscalerResourceIsNull(params->transparencyAndComposition)) { + context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_TRANSPARENCY_AND_COMPOSITION_MASK] = context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_REACTIVITY]; + } else { + context->contextDescription.backendInterface.fpRegisterResource(&context->contextDescription.backendInterface, ¶ms->transparencyAndComposition, context->effectContextId, &context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_TRANSPARENCY_AND_COMPOSITION_MASK]); + } + + context->contextDescription.backendInterface.fpRegisterResource(&context->contextDescription.backendInterface, ¶ms->output, context->effectContextId, &context->uavResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT]); + context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_ACCUMULATION] = context->srvResources[accumulationSrvResourceIndex]; + context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR] = context->srvResources[upscaledColorSrvResourceIndex]; + context->uavResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_ACCUMULATION] = context->uavResources[accumulationUavResourceIndex]; + context->uavResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR] = context->uavResources[upscaledColorUavResourceIndex]; + context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_RCAS_INPUT] = context->uavResources[upscaledColorUavResourceIndex]; + + context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_CURRENT_LUMA] = context->srvResources[currentLumaSrvResourceIndex]; + context->uavResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_CURRENT_LUMA] = context->uavResources[currentLumaUavResourceIndex]; + context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_PREVIOUS_LUMA] = context->srvResources[previousLumaSrvResourceIndex]; + + context->uavResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_HISTORY] = context->uavResources[lumaHistoryUavResourceIndex]; + context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_HISTORY] = context->srvResources[lumaHistorySrvResourceIndex]; + + context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_FARTHEST_DEPTH] = context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERMEDIATE_FP16x1]; + context->uavResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_FARTHEST_DEPTH] = context->uavResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERMEDIATE_FP16x1]; + + context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_INSTABILITY] = context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERMEDIATE_FP16x1]; + context->uavResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_INSTABILITY] = context->uavResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERMEDIATE_FP16x1]; + + // actual resource size may differ from render/display resolution (e.g. due to Hw/API restrictions), so query the descriptor for UVs adjustment + const FfxResourceDescription resourceDescInputColor = context->contextDescription.backendInterface.fpGetResourceDescription(&context->contextDescription.backendInterface, context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_COLOR]); + const FfxResourceDescription resourceDescReactiveMask = context->contextDescription.backendInterface.fpGetResourceDescription(&context->contextDescription.backendInterface, context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK]); + FFX_ASSERT(resourceDescInputColor.type == FFX_RESOURCE_TYPE_TEXTURE2D); + + context->constants.previousFrameJitterOffset[0] = context->constants.jitterOffset[0]; + context->constants.previousFrameJitterOffset[1] = context->constants.jitterOffset[1]; + context->constants.jitterOffset[0] = params->jitterOffset.x; + context->constants.jitterOffset[1] = params->jitterOffset.y; + + context->constants.previousFrameRenderSize[0] = context->constants.renderSize[0]; + context->constants.previousFrameRenderSize[1] = context->constants.renderSize[1]; + context->constants.renderSize[0] = int32_t(params->renderSize.width ? params->renderSize.width : resourceDescInputColor.width); + context->constants.renderSize[1] = int32_t(params->renderSize.height ? params->renderSize.height : resourceDescInputColor.height); + context->constants.maxRenderSize[0] = int32_t(context->contextDescription.maxRenderSize.width); + context->constants.maxRenderSize[1] = int32_t(context->contextDescription.maxRenderSize.height); + + // compute the horizontal FOV for the shader from the vertical one. + const float aspectRatio = (float)params->renderSize.width / (float)params->renderSize.height; + const float cameraAngleHorizontal = atan(tan(params->cameraFovAngleVertical / 2) * aspectRatio) * 2; + context->constants.tanHalfFOV = tanf(cameraAngleHorizontal * 0.5f); + context->constants.viewSpaceToMetersFactor = (params->viewSpaceToMetersFactor > 0.0f) ? params->viewSpaceToMetersFactor : 1.0f; + + // compute params to enable device depth to view space depth computation in shader + setupDeviceDepthToViewSpaceDepthParams(context, params); + + context->constants.previousFrameUpscaleSize[0] = context->constants.upscaleSize[0]; + context->constants.previousFrameUpscaleSize[1] = context->constants.upscaleSize[1]; + + if (params->upscaleSize.height == 0 && params->upscaleSize.width == 0) + { + context->constants.upscaleSize[0] = context->contextDescription.maxUpscaleSize.width; + context->constants.upscaleSize[1] = context->contextDescription.maxUpscaleSize.height; + } + else + { + context->constants.upscaleSize[0] = params->upscaleSize.width; + context->constants.upscaleSize[1] = params->upscaleSize.height; + } + + // To be updated if resource is larger than the actual image size + context->constants.downscaleFactor[0] = float(context->constants.renderSize[0]) / context->constants.upscaleSize[0]; + context->constants.downscaleFactor[1] = float(context->constants.renderSize[1]) / context->constants.upscaleSize[1]; + + // calculate pre-exposure relevant factors + context->constants.deltaPreExposure = 1.0f; + context->previousFramePreExposure = context->preExposure; + context->preExposure = (params->preExposure != 0.0f) ? params->preExposure : 1.0f; + + if (context->previousFramePreExposure > 0.0f) { + context->constants.deltaPreExposure = context->preExposure / context->previousFramePreExposure; + } + + // motion vector data + const int32_t* motionVectorsTargetSize = (context->contextDescription.flags & FFX_FSR3UPSCALER_ENABLE_DISPLAY_RESOLUTION_MOTION_VECTORS) + ? context->constants.upscaleSize + : context->constants.renderSize; + + context->constants.motionVectorScale[0] = (params->motionVectorScale.x / motionVectorsTargetSize[0]); + context->constants.motionVectorScale[1] = (params->motionVectorScale.y / motionVectorsTargetSize[1]); + + // compute jitter cancellation + if (context->contextDescription.flags & FFX_FSR3UPSCALER_ENABLE_MOTION_VECTORS_JITTER_CANCELLATION) { + + context->constants.motionVectorJitterCancellation[0] = (context->previousJitterOffset[0] - context->constants.jitterOffset[0]) / motionVectorsTargetSize[0]; + context->constants.motionVectorJitterCancellation[1] = (context->previousJitterOffset[1] - context->constants.jitterOffset[1]) / motionVectorsTargetSize[1]; + + context->previousJitterOffset[0] = context->constants.jitterOffset[0]; + context->previousJitterOffset[1] = context->constants.jitterOffset[1]; + } + + // lock data, assuming jitter sequence length computation for now + const int32_t jitterPhaseCount = ffxFsr3UpscalerGetJitterPhaseCount(params->renderSize.width, context->constants.upscaleSize[0]); + + // init on first frame + if (resetAccumulation || context->constants.jitterPhaseCount == 0) { + context->constants.jitterPhaseCount = (float)jitterPhaseCount; + } else { + const int32_t jitterPhaseCountDelta = (int32_t)(jitterPhaseCount - context->constants.jitterPhaseCount); + if (jitterPhaseCountDelta > 0) { + context->constants.jitterPhaseCount++; + } else if (jitterPhaseCountDelta < 0) { + context->constants.jitterPhaseCount--; + } + } + + // convert delta time to seconds and clamp to [0, 1]. + context->constants.deltaTime = FFX_MAXIMUM(0.0f, FFX_MINIMUM(1.0f, params->frameTimeDelta / 1000.0f)); + + if (resetAccumulation) { + context->constants.frameIndex = 0.0f; + } else { + context->constants.frameIndex += 1.0f; + } + + // GODOT BEGINS + memcpy(context->constants.reprojectionMatrix, params->reprojectionMatrix, sizeof(context->constants.reprojectionMatrix)); + // GODOT ENDS + + // reactive mask bias + const int32_t threadGroupWorkRegionDim = 8; + const int32_t dispatchSrcX = (context->constants.renderSize[0] + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; + const int32_t dispatchSrcY = (context->constants.renderSize[1] + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; + const int32_t dispatchDstX = (context->constants.upscaleSize[0] + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; + const int32_t dispatchDstY = (context->constants.upscaleSize[1] + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; + const int32_t dispatchShadingChangePassX = (int32_t(context->constants.renderSize[0] * 0.5f) + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; + const int32_t dispatchShadingChangePassY = (int32_t(context->constants.renderSize[1] * 0.5f) + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; + + // Clear reconstructed depth for max depth store. + if (resetAccumulation) { + + FfxGpuJobDescription clearJob = { FFX_GPU_JOB_CLEAR_FLOAT }; + wcscpy_s(clearJob.jobLabel, L"Clear Resource"); + + const float clearValuesToZeroFloat[]{ 0.f, 0.f, 0.f, 0.f }; + memcpy(clearJob.clearJobDescriptor.color, clearValuesToZeroFloat, 4 * sizeof(float)); + clearJob.clearJobDescriptor.target = context->srvResources[accumulationSrvResourceIndex]; + context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &clearJob); + + wcscpy_s(clearJob.jobLabel, L"Clear Scene Luminance"); + clearJob.clearJobDescriptor.target = context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SPD_MIPS]; + context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &clearJob); + + // Auto exposure always used to track luma changes in locking logic + { + const float clearValuesExposure[]{ -1.f, 1.f, 0.f, 0.f }; + memcpy(clearJob.clearJobDescriptor.color, clearValuesExposure, 4 * sizeof(float)); + wcscpy_s(clearJob.jobLabel, L"Clear Frame Info"); + clearJob.clearJobDescriptor.target = context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_FRAME_INFO]; + context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &clearJob); + } + } + + { + FfxGpuJobDescription clearJob = {FFX_GPU_JOB_CLEAR_FLOAT}; + // FSR3: need to clear here since we need the content of this surface for frameinterpolation + // so clearing in the lock pass is not an option + const bool bInverted = (context->contextDescription.flags & FFX_FSR3UPSCALER_ENABLE_DEPTH_INVERTED) == FFX_FSR3UPSCALER_ENABLE_DEPTH_INVERTED; + const float clearDepthValue[]{bInverted ? 0.f : 1.f, bInverted ? 0.f : 1.f, bInverted ? 0.f : 1.f, bInverted ? 0.f : 1.f}; + memcpy(clearJob.clearJobDescriptor.color, clearDepthValue, 4 * sizeof(float)); + wcscpy_s(clearJob.jobLabel, L"Clear Reconstructed Previous Nearest Depth"); + clearJob.clearJobDescriptor.target = context->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH]; + context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &clearJob); + } + + // Suggested by Enduring to resolve issues with running FSR3 on console via the RHI backend in the plugin as this resource won't be cleared to 0 by default. + { + FfxGpuJobDescription clearJob = { FFX_GPU_JOB_CLEAR_FLOAT }; + wcscpy_s(clearJob.jobLabel, L"Clear Spd Atomic Count"); + const float clearValuesToZeroFloat[]{ 0.f, 0.f, 0.f, 0.f }; + memcpy(clearJob.clearJobDescriptor.color, clearValuesToZeroFloat, 4 * sizeof(float)); + clearJob.clearJobDescriptor.target = context->uavResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SPD_ATOMIC_COUNT]; + context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &clearJob); + } + + // Auto exposure + uint32_t dispatchThreadGroupCountXY[2]; + uint32_t workGroupOffset[2]; + uint32_t numWorkGroupsAndMips[2]; + uint32_t rectInfo[4] = { 0, 0, params->renderSize.width, params->renderSize.height }; + ffxSpdSetup(dispatchThreadGroupCountXY, workGroupOffset, numWorkGroupsAndMips, rectInfo); + + // downsample + Fsr3UpscalerSpdConstants luminancePyramidConstants; + luminancePyramidConstants.numworkGroups = numWorkGroupsAndMips[0]; + luminancePyramidConstants.mips = numWorkGroupsAndMips[1]; + luminancePyramidConstants.workGroupOffset[0] = workGroupOffset[0]; + luminancePyramidConstants.workGroupOffset[1] = workGroupOffset[1]; + luminancePyramidConstants.renderSize[0] = params->renderSize.width; + luminancePyramidConstants.renderSize[1] = params->renderSize.height; + + // compute the constants. + Fsr3UpscalerRcasConstants rcasConsts = {}; + const float sharpenessRemapped = (-2.0f * params->sharpness) + 2.0f; + FsrRcasCon(rcasConsts.rcasConfig, sharpenessRemapped); + + // initialize constantBuffers data + context->contextDescription.backendInterface.fpStageConstantBufferDataFunc(&context->contextDescription.backendInterface, &context->constants, sizeof(context->constants), &context->constantBuffers[FFX_FSR3UPSCALER_CONSTANTBUFFER_IDENTIFIER_FSR3UPSCALER]); + context->contextDescription.backendInterface.fpStageConstantBufferDataFunc(&context->contextDescription.backendInterface, &luminancePyramidConstants, sizeof(luminancePyramidConstants), &context->constantBuffers[FFX_FSR3UPSCALER_CONSTANTBUFFER_IDENTIFIER_SPD]); + context->contextDescription.backendInterface.fpStageConstantBufferDataFunc(&context->contextDescription.backendInterface, &rcasConsts, sizeof(rcasConsts), &context->constantBuffers[FFX_FSR3UPSCALER_CONSTANTBUFFER_IDENTIFIER_RCAS]); + + { + FfxResourceInternal aliasableResources[] = { + context->uavResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERMEDIATE_FP16x1], + context->uavResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SHADING_CHANGE], + context->uavResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_NEW_LOCKS], + // SPD_MIPS are an aliasable resource, but need to be cleared to prevent reading pixels that have never been written + //context->uavResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SPD_MIPS], + context->uavResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_FARTHEST_DEPTH_MIP1], + context->uavResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS], + }; + for(int i = 0; i<_countof(aliasableResources); ++i) + { + FfxGpuJobDescription discardJob = { FFX_GPU_JOB_DISCARD }; + discardJob.discardJobDescriptor.target = aliasableResources[i]; + context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &discardJob); + } + // SPD counter needs to be cleared + { + FfxGpuJobDescription clearJob = { FFX_GPU_JOB_CLEAR_FLOAT }; + wcscpy_s(clearJob.jobLabel, L"Clear Spd Atomic Count"); + const float clearValuesToZeroFloat[]{ 0.f, 0.f, 0.f, 0.f }; + memcpy(clearJob.clearJobDescriptor.color, clearValuesToZeroFloat, 4 * sizeof(float)); + clearJob.clearJobDescriptor.target = context->uavResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SPD_MIPS]; + context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &clearJob); + } + } + + scheduleDispatch(context, params, &context->pipelinePrepareInputs, dispatchSrcX, dispatchSrcY); + scheduleDispatch(context, params, &context->pipelineLumaPyramid, dispatchThreadGroupCountXY[0], dispatchThreadGroupCountXY[1]); + scheduleDispatch(context, params, &context->pipelineShadingChangePyramid, dispatchThreadGroupCountXY[0], dispatchThreadGroupCountXY[1]); + scheduleDispatch(context, params, &context->pipelineShadingChange, dispatchShadingChangePassX, dispatchShadingChangePassY); + scheduleDispatch(context, params, &context->pipelinePrepareReactivity, dispatchSrcX, dispatchSrcY); + scheduleDispatch(context, params, &context->pipelineLumaInstability, dispatchSrcX, dispatchSrcY); + + scheduleDispatch(context, params, params->enableSharpening ? &context->pipelineAccumulateSharpen : &context->pipelineAccumulate, dispatchDstX, dispatchDstY); + + // RCAS + if (params->enableSharpening) + { + + // dispatch RCAS + const int32_t threadGroupWorkRegionDimRCAS = 16; + const int32_t dispatchX = (context->constants.upscaleSize[0] + (threadGroupWorkRegionDimRCAS - 1)) / threadGroupWorkRegionDimRCAS; + const int32_t dispatchY = (context->constants.upscaleSize[1] + (threadGroupWorkRegionDimRCAS - 1)) / threadGroupWorkRegionDimRCAS; + scheduleDispatch(context, params, &context->pipelineRCAS, dispatchX, dispatchY); + } + + if (params->flags & FFX_FSR3UPSCALER_DISPATCH_DRAW_DEBUG_VIEW) { + scheduleDispatch(context, params, &context->pipelineDebugView, dispatchDstX, dispatchDstY); + } + + context->resourceFrameIndex = (context->resourceFrameIndex + 1) % FSR3UPSCALER_MAX_QUEUED_FRAMES; + + // Fsr3UpscalerMaxQueuedFrames must be an even number. + FFX_STATIC_ASSERT((FSR3UPSCALER_MAX_QUEUED_FRAMES & 1) == 0); + + context->contextDescription.backendInterface.fpExecuteGpuJobs(&context->contextDescription.backendInterface, commandList, context->effectContextId); + + // release dynamic resources + context->contextDescription.backendInterface.fpUnregisterResources(&context->contextDescription.backendInterface, commandList, context->effectContextId); + + return FFX_OK; +} + +FFX_API FfxErrorCode ffxFsr3UpscalerContextCreate(FfxFsr3UpscalerContext* context, const FfxFsr3UpscalerContextDescription* contextDescription) +{ + // zero context memory + memset(context, 0, sizeof(FfxFsr3UpscalerContext)); + + // check pointers are valid. + FFX_RETURN_ON_ERROR( + context, + FFX_ERROR_INVALID_POINTER); + FFX_RETURN_ON_ERROR( + contextDescription, + FFX_ERROR_INVALID_POINTER); + + // validate that all callbacks are set for the interface + FFX_RETURN_ON_ERROR(contextDescription->backendInterface.fpGetSDKVersion, FFX_ERROR_INCOMPLETE_INTERFACE); + FFX_RETURN_ON_ERROR(contextDescription->backendInterface.fpGetDeviceCapabilities, FFX_ERROR_INCOMPLETE_INTERFACE); + FFX_RETURN_ON_ERROR(contextDescription->backendInterface.fpCreateBackendContext, FFX_ERROR_INCOMPLETE_INTERFACE); + FFX_RETURN_ON_ERROR(contextDescription->backendInterface.fpDestroyBackendContext, FFX_ERROR_INCOMPLETE_INTERFACE); + + // if a scratch buffer is declared, then we must have a size + if (contextDescription->backendInterface.scratchBuffer) { + + FFX_RETURN_ON_ERROR(contextDescription->backendInterface.scratchBufferSize, FFX_ERROR_INCOMPLETE_INTERFACE); + } + + // ensure the context is large enough for the internal context. + FFX_STATIC_ASSERT(sizeof(FfxFsr3UpscalerContext) >= sizeof(FfxFsr3UpscalerContext_Private)); + + // create the context. + FfxFsr3UpscalerContext_Private* contextPrivate = (FfxFsr3UpscalerContext_Private*)(context); + const FfxErrorCode errorCode = fsr3upscalerCreate(contextPrivate, contextDescription); + + return errorCode; +} + +FFX_API FfxErrorCode ffxFsr3UpscalerContextGetGpuMemoryUsage(FfxFsr3UpscalerContext* context, FfxEffectMemoryUsage* vramUsage) +{ + FFX_RETURN_ON_ERROR(context, FFX_ERROR_INVALID_POINTER); + FFX_RETURN_ON_ERROR(vramUsage, FFX_ERROR_INVALID_POINTER); + FfxFsr3UpscalerContext_Private* contextPrivate = (FfxFsr3UpscalerContext_Private*)(context); + + FFX_RETURN_ON_ERROR(contextPrivate->device, FFX_ERROR_NULL_DEVICE); + + FfxErrorCode errorCode = contextPrivate->contextDescription.backendInterface.fpGetEffectGpuMemoryUsage( + &contextPrivate->contextDescription.backendInterface, contextPrivate->effectContextId, vramUsage); + FFX_RETURN_ON_ERROR(errorCode == FFX_OK, errorCode); + + return FFX_OK; +} + +FfxErrorCode ffxFsr3UpscalerContextDestroy(FfxFsr3UpscalerContext* context) +{ + FFX_RETURN_ON_ERROR( + context, + FFX_ERROR_INVALID_POINTER); + + // destroy the context. + FfxFsr3UpscalerContext_Private* contextPrivate = (FfxFsr3UpscalerContext_Private*)(context); + const FfxErrorCode errorCode = fsr3upscalerRelease(contextPrivate); + return errorCode; +} + +FfxErrorCode ffxFsr3UpscalerContextDispatch(FfxFsr3UpscalerContext* context, const FfxFsr3UpscalerDispatchDescription* dispatchParams) +{ + FFX_RETURN_ON_ERROR( + context, + FFX_ERROR_INVALID_POINTER); + FFX_RETURN_ON_ERROR( + dispatchParams, + FFX_ERROR_INVALID_POINTER); + + FfxFsr3UpscalerContext_Private* contextPrivate = (FfxFsr3UpscalerContext_Private*)(context); + + // validate that renderSize is within the maximum. + FFX_RETURN_ON_ERROR( + dispatchParams->renderSize.width <= contextPrivate->contextDescription.maxRenderSize.width, + FFX_ERROR_OUT_OF_RANGE); + FFX_RETURN_ON_ERROR( + dispatchParams->renderSize.height <= contextPrivate->contextDescription.maxRenderSize.height, + FFX_ERROR_OUT_OF_RANGE); + FFX_RETURN_ON_ERROR( + dispatchParams->upscaleSize.width <= contextPrivate->contextDescription.maxUpscaleSize.width, + FFX_ERROR_OUT_OF_RANGE); + FFX_RETURN_ON_ERROR( + dispatchParams->upscaleSize.height <= contextPrivate->contextDescription.maxUpscaleSize.height, + FFX_ERROR_OUT_OF_RANGE); + FFX_RETURN_ON_ERROR( + contextPrivate->device, + FFX_ERROR_NULL_DEVICE); + + // dispatch the FSR3 passes. + const FfxErrorCode errorCode = fsr3upscalerDispatch(contextPrivate, dispatchParams); + return errorCode; +} + +FFX_API float ffxFsr3UpscalerGetUpscaleRatioFromQualityMode(FfxFsr3UpscalerQualityMode qualityMode) +{ + switch (qualityMode) { + case FFX_FSR3UPSCALER_QUALITY_MODE_NATIVEAA: + return 1.0f; + case FFX_FSR3UPSCALER_QUALITY_MODE_QUALITY: + return 1.5f; + case FFX_FSR3UPSCALER_QUALITY_MODE_BALANCED: + return 1.7f; + case FFX_FSR3UPSCALER_QUALITY_MODE_PERFORMANCE: + return 2.0f; + case FFX_FSR3UPSCALER_QUALITY_MODE_ULTRA_PERFORMANCE: + return 3.0f; + default: + return 0.0f; + } +} + +FFX_API FfxErrorCode ffxFsr3UpscalerGetRenderResolutionFromQualityMode( + uint32_t* renderWidth, + uint32_t* renderHeight, + uint32_t displayWidth, + uint32_t displayHeight, + FfxFsr3UpscalerQualityMode qualityMode) +{ + FFX_RETURN_ON_ERROR( + renderWidth, + FFX_ERROR_INVALID_POINTER); + FFX_RETURN_ON_ERROR( + renderHeight, + FFX_ERROR_INVALID_POINTER); + FFX_RETURN_ON_ERROR( + FFX_FSR3UPSCALER_QUALITY_MODE_NATIVEAA <= qualityMode && qualityMode <= FFX_FSR3UPSCALER_QUALITY_MODE_ULTRA_PERFORMANCE, + FFX_ERROR_INVALID_ENUM); + + // scale by the predefined ratios in each dimension. + const float ratio = ffxFsr3UpscalerGetUpscaleRatioFromQualityMode(qualityMode); + const uint32_t scaledDisplayWidth = (uint32_t)((float)displayWidth / ratio); + const uint32_t scaledDisplayHeight = (uint32_t)((float)displayHeight / ratio); + *renderWidth = scaledDisplayWidth; + *renderHeight = scaledDisplayHeight; + + return FFX_OK; +} + +int32_t ffxFsr3UpscalerGetJitterPhaseCount(int32_t renderWidth, int32_t displayWidth) +{ + const float basePhaseCount = 8.0f; + const int32_t jitterPhaseCount = int32_t(basePhaseCount * pow((float(displayWidth) / renderWidth), 2.0f)); + return jitterPhaseCount; +} + +FfxErrorCode ffxFsr3UpscalerGetJitterOffset(float* outX, float* outY, int32_t index, int32_t phaseCount) +{ + FFX_RETURN_ON_ERROR( + outX, + FFX_ERROR_INVALID_POINTER); + FFX_RETURN_ON_ERROR( + outY, + FFX_ERROR_INVALID_POINTER); + FFX_RETURN_ON_ERROR( + phaseCount > 0, + FFX_ERROR_INVALID_ARGUMENT); + + const float x = halton((index % phaseCount) + 1, 2) - 0.5f; + const float y = halton((index % phaseCount) + 1, 3) - 0.5f; + + *outX = x; + *outY = y; + return FFX_OK; +} + +FFX_API bool ffxFsr3UpscalerResourceIsNull(FfxResource resource) +{ + return resource.resource == NULL; +} + +FfxErrorCode ffxFsr3UpscalerContextGenerateReactiveMask(FfxFsr3UpscalerContext* context, const FfxFsr3UpscalerGenerateReactiveDescription* params) +{ + FFX_RETURN_ON_ERROR( + context, + FFX_ERROR_INVALID_POINTER); + FFX_RETURN_ON_ERROR( + params, + FFX_ERROR_INVALID_POINTER); + // GODOT BEGINS + // Godot doesn't use FFX context to pass command list. + // So we don't need to ensure that the command list is not null. + // FFX_RETURN_ON_ERROR( + // params->commandList, + // FFX_ERROR_INVALID_POINTER); + // GODOT ENDS + + FfxFsr3UpscalerContext_Private* contextPrivate = (FfxFsr3UpscalerContext_Private*)(context); + + FFX_RETURN_ON_ERROR( + contextPrivate->device, + FFX_ERROR_NULL_DEVICE); + + // take a short cut to the command list + FfxCommandList commandList = params->commandList; + + FfxPipelineState* pipeline = &contextPrivate->pipelineGenerateReactive; + + const int32_t threadGroupWorkRegionDim = 8; + const int32_t dispatchSrcX = (params->renderSize.width + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; + const int32_t dispatchSrcY = (params->renderSize.height + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; + + FfxComputeJobDescription jobDescriptor = {}; + contextPrivate->contextDescription.backendInterface.fpRegisterResource(&contextPrivate->contextDescription.backendInterface, ¶ms->colorOpaqueOnly, contextPrivate->effectContextId, &contextPrivate->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_OPAQUE_ONLY]); + contextPrivate->contextDescription.backendInterface.fpRegisterResource(&contextPrivate->contextDescription.backendInterface, ¶ms->colorPreUpscale, contextPrivate->effectContextId, &contextPrivate->srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_COLOR]); + contextPrivate->contextDescription.backendInterface.fpRegisterResource(&contextPrivate->contextDescription.backendInterface, ¶ms->outReactive, contextPrivate->effectContextId, &contextPrivate->uavResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_AUTOREACTIVE]); + + jobDescriptor.uavTextures[0].resource = contextPrivate->uavResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_AUTOREACTIVE]; + +#ifdef FFX_DEBUG + wcscpy_s(jobDescriptor.srvTextures[0].name, pipeline->srvTextureBindings[0].name); + wcscpy_s(jobDescriptor.srvTextures[1].name, pipeline->srvTextureBindings[1].name); + wcscpy_s(jobDescriptor.uavTextures[0].name, pipeline->uavTextureBindings[0].name); +#endif + + jobDescriptor.dimensions[0] = dispatchSrcX; + jobDescriptor.dimensions[1] = dispatchSrcY; + jobDescriptor.dimensions[2] = 1; + jobDescriptor.pipeline = *pipeline; + + for (uint32_t currentShaderResourceViewIndex = 0; currentShaderResourceViewIndex < pipeline->srvTextureCount; ++currentShaderResourceViewIndex) { + + const uint32_t currentResourceId = pipeline->srvTextureBindings[currentShaderResourceViewIndex].resourceIdentifier; + const FfxResourceInternal currentResource = contextPrivate->srvResources[currentResourceId]; + jobDescriptor.srvTextures[currentShaderResourceViewIndex].resource = currentResource; +#ifdef FFX_DEBUG + wcscpy_s(jobDescriptor.srvTextures[currentShaderResourceViewIndex].name, pipeline->srvTextureBindings[currentShaderResourceViewIndex].name); +#endif + } + + Fsr3UpscalerGenerateReactiveConstants genReactiveConsts = {}; + genReactiveConsts.scale = params->scale; + genReactiveConsts.threshold = params->cutoffThreshold; + genReactiveConsts.binaryValue = params->binaryValue; + genReactiveConsts.flags = params->flags; + + contextPrivate->contextDescription.backendInterface.fpStageConstantBufferDataFunc(&contextPrivate->contextDescription.backendInterface, &genReactiveConsts, sizeof(genReactiveConsts), &contextPrivate->constantBuffers[FFX_FSR3UPSCALER_CONSTANTBUFFER_IDENTIFIER_GENREACTIVE]); + + for (uint32_t currentRootConstantIndex = 0; currentRootConstantIndex < pipeline->constCount; ++currentRootConstantIndex) + { +#ifdef FFX_DEBUG + wcscpy_s(jobDescriptor.cbNames[currentRootConstantIndex], pipeline->constantBufferBindings[currentRootConstantIndex].name); +#endif + jobDescriptor.cbs[currentRootConstantIndex] = contextPrivate->constantBuffers[pipeline->constantBufferBindings[currentRootConstantIndex].resourceIdentifier]; + } + + FfxGpuJobDescription dispatchJob = { FFX_GPU_JOB_COMPUTE }; + dispatchJob.computeJobDescriptor = jobDescriptor; + + contextPrivate->contextDescription.backendInterface.fpScheduleGpuJob(&contextPrivate->contextDescription.backendInterface, &dispatchJob); + + contextPrivate->contextDescription.backendInterface.fpExecuteGpuJobs(&contextPrivate->contextDescription.backendInterface, commandList, contextPrivate->effectContextId); + + // release dynamic resources + contextPrivate->contextDescription.backendInterface.fpUnregisterResources(&contextPrivate->contextDescription.backendInterface, commandList, contextPrivate->effectContextId); + + return FFX_OK; +} + +FFX_API FfxVersionNumber ffxFsr3UpscalerGetEffectVersion() +{ + return FFX_SDK_MAKE_VERSION(FFX_FSR3UPSCALER_VERSION_MAJOR, FFX_FSR3UPSCALER_VERSION_MINOR, FFX_FSR3UPSCALER_VERSION_PATCH); +} + +FFX_API FfxErrorCode ffxFsr3UpscalerSetConstant(FfxFsr3UpscalerContext* context, FfxFsr3UpscalerConfigureKey key, void* valuePtr) +{ + FFX_RETURN_ON_ERROR( + context, + FFX_ERROR_INVALID_POINTER); + + FfxFsr3UpscalerContext_Private* contextPrivate = (FfxFsr3UpscalerContext_Private*)(context); + switch (key) + { + case FFX_FSR3UPSCALER_CONFIGURE_UPSCALE_KEY_FVELOCITYFACTOR: + { + float fValue = 1.0f; + if (valuePtr != nullptr) + { + fValue = *(static_cast(valuePtr)); + } + contextPrivate->constants.velocityFactor = ffxSaturate(fValue); + break; + } + case FFX_FSR3UPSCALER_CONFIGURE_UPSCALE_KEY_FREACTIVENESSSCALE: + { + float fValue = 1.0f; + if (valuePtr != nullptr) + { + fValue = *(static_cast(valuePtr)); + } + contextPrivate->constants.reactivenessScale = ffxMax(0.f,fValue); + break; + } + case FFX_FSR3UPSCALER_CONFIGURE_UPSCALE_KEY_FSHADINGCHANGESCALE: + { + float fValue = 1.0f; + if (valuePtr != nullptr) + { + fValue = *(static_cast(valuePtr)); + } + contextPrivate->constants.shadingChangeScale = ffxMax(0.f,fValue); + break; + } + case FFX_FSR3UPSCALER_CONFIGURE_UPSCALE_KEY_FACCUMULATIONADDEDPERFRAME: + { + float fValue = 1.0f/3.0f; + if (valuePtr != nullptr) + { + fValue = *(static_cast(valuePtr)); + } + contextPrivate->constants.accumulationAddedPerFrame = ffxSaturate(fValue); + break; + } + case FFX_FSR3UPSCALER_CONFIGURE_UPSCALE_KEY_FMINDISOCCLUSIONACCUMULATION: + { + float fValue = -1.0f/3.0f; + if (valuePtr != nullptr) + { + fValue = *(static_cast(valuePtr)); + } + contextPrivate->constants.minDisocclusionAccumulation = ffxMin(1.0f, ffxMax(-1.0f, fValue)); + break; + } + default: + return FFX_ERROR_INVALID_ENUM; + } + return FFX_OK; +} + +FFX_API FfxErrorCode ffxFsr3UpscalerSetGlobalDebugMessage(ffxMessageCallback fpMessage, uint32_t debugLevel) +{ + ffxSetPrintMessageCallback(fpMessage, debugLevel); + return FFX_OK; +} diff --git a/thirdparty/amd-ffx/ffx_fsr3upscaler.h b/thirdparty/amd-ffx/ffx_fsr3upscaler.h new file mode 100644 index 000000000000..e4646895961c --- /dev/null +++ b/thirdparty/amd-ffx/ffx_fsr3upscaler.h @@ -0,0 +1,597 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +// Include the interface for the backend of the FSR3 API. +#include "ffx_interface.h" + +/// @defgroup ffxFsr3Upscaler FidelityFX FSR3 +/// FidelityFX Super Resolution 3 runtime library +/// +/// @ingroup SDKComponents + +/// FidelityFX Super Resolution 3 major version. +/// +/// @ingroup ffxFsr3Upscaler +#define FFX_FSR3UPSCALER_VERSION_MAJOR (3) + +/// FidelityFX Super Resolution 3 minor version. +/// +/// @ingroup ffxFsr3Upscaler +#define FFX_FSR3UPSCALER_VERSION_MINOR (1) + +/// FidelityFX Super Resolution 3 patch version. +/// +/// @ingroup ffxFsr3Upscaler +#define FFX_FSR3UPSCALER_VERSION_PATCH (4) + +/// FidelityFX Super Resolution 3 context count +/// +/// Defines the number of internal effect contexts required by FSR3 +/// +/// @ingroup ffxFsr3Upscaler +#define FFX_FSR3UPSCALER_CONTEXT_COUNT 1 + +/// The size of the context specified in 32bit values. +/// +/// @ingroup ffxFsr3Upscaler +#define FFX_FSR3UPSCALER_CONTEXT_SIZE (FFX_SDK_DEFAULT_CONTEXT_SIZE) + +#if defined(__cplusplus) +extern "C" { +#endif // #if defined(__cplusplus) + +/// An enumeration of all the passes which constitute the FSR3 algorithm. +/// +/// FSR3 is implemented as a composite of several compute passes each +/// computing a key part of the final result. Each call to the +/// FfxFsr3UpscalerScheduleGpuJobFunc callback function will +/// correspond to a single pass included in FfxFsr3UpscalerPass. For a +/// more comprehensive description of each pass, please refer to the FSR3 +/// reference documentation. +/// +/// Please note in some cases e.g.: FFX_FSR3UPSCALER_PASS_ACCUMULATE +/// and FFX_FSR3UPSCALER_PASS_ACCUMULATE_SHARPEN either one pass or the +/// other will be used (they are mutually exclusive). The choice of which will +/// depend on the way the FfxFsr3UpscalerContext is created and the +/// precise contents of FfxFsr3UpscalerDispatchParamters each time a call +/// is made to ffxFsr3UpscalerContextDispatch. +/// +/// @ingroup ffxFsr3Upscaler +typedef enum FfxFsr3UpscalerPass +{ + FFX_FSR3UPSCALER_PASS_PREPARE_INPUTS, ///< A pass which prepares game inputs for later passes + FFX_FSR3UPSCALER_PASS_LUMA_PYRAMID, ///< A pass which generates the luminance mipmap chain for the current frame. + FFX_FSR3UPSCALER_PASS_SHADING_CHANGE_PYRAMID, ///< A pass which generates the shading change detection mipmap chain for the current frame. + FFX_FSR3UPSCALER_PASS_SHADING_CHANGE, ///< A pass which estimates shading changes for the current frame + FFX_FSR3UPSCALER_PASS_PREPARE_REACTIVITY, ///< A pass which prepares accumulation relevant information + FFX_FSR3UPSCALER_PASS_LUMA_INSTABILITY, ///< A pass which estimates temporal instability of the luminance changes. + FFX_FSR3UPSCALER_PASS_ACCUMULATE, ///< A pass which performs upscaling. + FFX_FSR3UPSCALER_PASS_ACCUMULATE_SHARPEN, ///< A pass which performs upscaling when sharpening is used. + FFX_FSR3UPSCALER_PASS_RCAS, ///< A pass which performs sharpening. + FFX_FSR3UPSCALER_PASS_DEBUG_VIEW, ///< A pass which draws some internal resources, for debugging purposes + + FFX_FSR3UPSCALER_PASS_GENERATE_REACTIVE, ///< An optional pass to generate a reactive mask. + FFX_FSR3UPSCALER_PASS_TCR_AUTOGENERATE, ///< DEPRECATED - NO LONGER SUPPORTED + FFX_FSR3UPSCALER_PASS_COUNT ///< The number of passes performed by FSR3. +} FfxFsr3UpscalerPass; + +/// An enumeration of all the quality modes supported by FidelityFX Super +/// Resolution 3 upscaling. +/// +/// In order to provide a consistent user experience across multiple +/// applications which implement FSR3. It is strongly recommended that the +/// following preset scaling factors are made available through your +/// application's user interface. +/// +/// If your application does not expose the notion of preset scaling factors +/// for upscaling algorithms (perhaps instead implementing a fixed ratio which +/// is immutable) or implementing a more dynamic scaling scheme (such as +/// dynamic resolution scaling), then there is no need to use these presets. +/// +/// Please note that FFX_FSR3UPSCALER_QUALITY_MODE_ULTRA_PERFORMANCE is +/// an optional mode which may introduce significant quality degradation in the +/// final image. As such it is recommended that you evaluate the final results +/// of using this scaling mode before deciding if you should include it in your +/// application. +/// +/// @ingroup ffxFsr3Upscaler +typedef enum FfxFsr3UpscalerQualityMode { + FFX_FSR3UPSCALER_QUALITY_MODE_NATIVEAA = 0, ///< Perform upscaling with a per-dimension upscaling ratio of 1.0x. + FFX_FSR3UPSCALER_QUALITY_MODE_QUALITY = 1, ///< Perform upscaling with a per-dimension upscaling ratio of 1.5x. + FFX_FSR3UPSCALER_QUALITY_MODE_BALANCED = 2, ///< Perform upscaling with a per-dimension upscaling ratio of 1.7x. + FFX_FSR3UPSCALER_QUALITY_MODE_PERFORMANCE = 3, ///< Perform upscaling with a per-dimension upscaling ratio of 2.0x. + FFX_FSR3UPSCALER_QUALITY_MODE_ULTRA_PERFORMANCE = 4 ///< Perform upscaling with a per-dimension upscaling ratio of 3.0x. +} FfxFsr3UpscalerQualityMode; + +/// An enumeration of bit flags used when creating a +/// FfxFsr3UpscalerContext. See FfxFsr3UpscalerContextDescription. +/// +/// @ingroup ffxFsr3Upscaler +typedef enum FfxFsr3UpscalerInitializationFlagBits { + + FFX_FSR3UPSCALER_ENABLE_HIGH_DYNAMIC_RANGE = (1<<0), ///< A bit indicating if the input color data provided is using a high-dynamic range. + FFX_FSR3UPSCALER_ENABLE_DISPLAY_RESOLUTION_MOTION_VECTORS = (1<<1), ///< A bit indicating if the motion vectors are rendered at display resolution. + FFX_FSR3UPSCALER_ENABLE_MOTION_VECTORS_JITTER_CANCELLATION = (1<<2), ///< A bit indicating that the motion vectors have the jittering pattern applied to them. + FFX_FSR3UPSCALER_ENABLE_DEPTH_INVERTED = (1<<3), ///< A bit indicating that the input depth buffer data provided is inverted [1..0]. + FFX_FSR3UPSCALER_ENABLE_DEPTH_INFINITE = (1<<4), ///< A bit indicating that the input depth buffer data provided is using an infinite far plane. + FFX_FSR3UPSCALER_ENABLE_AUTO_EXPOSURE = (1<<5), ///< A bit indicating if automatic exposure should be applied to input color data. + FFX_FSR3UPSCALER_ENABLE_DYNAMIC_RESOLUTION = (1<<6), ///< A bit indicating that the application uses dynamic resolution scaling. + FFX_FSR3UPSCALER_ENABLE_TEXTURE1D_USAGE = (1<<7), ///< This value is deprecated, but remains in order to aid upgrading from older versions of FSR3. + FFX_FSR3UPSCALER_ENABLE_DEBUG_CHECKING = (1<<8), ///< A bit indicating that the runtime should check some API values and report issues. +} FfxFsr3UpscalerInitializationFlagBits; + +/// Pass a string message +/// +/// Used for debug messages. +/// +/// @param [in] type The type of message. +/// @param [in] message A string message to pass. +/// +/// +/// @ingroup ffxFsr3Upscaler +typedef void(*FfxFsr3UpscalerMessage)( + FfxMsgType type, + const wchar_t* message); + +/// A structure encapsulating the parameters required to initialize FidelityFX +/// Super Resolution 3 upscaling. +/// +/// @ingroup ffxFsr3Upscaler +typedef struct FfxFsr3UpscalerContextDescription { + + uint32_t flags; ///< A collection of FfxFsr3UpscalerInitializationFlagBits. + FfxDimensions2D maxRenderSize; ///< The maximum size that rendering will be performed at. + FfxDimensions2D maxUpscaleSize; ///< The size of the output resolution targeted by the upscaling process. + FfxFsr3UpscalerMessage fpMessage; ///< A pointer to a function that can receive messages from the runtime. + FfxInterface backendInterface; ///< A set of pointers to the backend implementation for FidelityFX SDK + +} FfxFsr3UpscalerContextDescription; + +typedef enum FfxFsr3UpscalerDispatchFlags +{ + FFX_FSR3UPSCALER_DISPATCH_DRAW_DEBUG_VIEW = (1 << 0), ///< A bit indicating that the interpolated output resource will contain debug views with relevant information. +} FfxFsr3UpscalerDispatchFlags; + +typedef enum FfxFsr3UpscalerConfigureKey +{ + FFX_FSR3UPSCALER_CONFIGURE_UPSCALE_KEY_FVELOCITYFACTOR = 0, //Override constant buffer fVelocityFactor. The float value is casted from void * ptr. Value of 0.0f can improve temporal stability of bright pixels. Default value is 1.0f. Value is clamped to [0.0f, 1.0f]. + FFX_FSR3UPSCALER_CONFIGURE_UPSCALE_KEY_FREACTIVENESSSCALE = 1, //Override constant buffer fReactivenessScale. The float value is casted from void * ptr. Meant for development purpose to test if writing a larger value to reactive mask, reduces ghosting. Default value is 1.0f. Value is clamped to [0.0f, +infinity]. + FFX_FSR3UPSCALER_CONFIGURE_UPSCALE_KEY_FSHADINGCHANGESCALE =2, //Override fShadingChangeScale. Increasing this scales fsr3.1 computed shading change value at read to have higher reactiveness. Default value is 1.0f. Value is clamped to [0.0f, +infinity]. + FFX_FSR3UPSCALER_CONFIGURE_UPSCALE_KEY_FACCUMULATIONADDEDPERFRAME = 3, // Override constant buffer fAccumulationAddedPerFrame. Corresponds to amount of accumulation added per frame at pixel coordinate where disocclusion occured or when reactive mask value is > 0.0f. Decreasing this and drawing the ghosting object (IE no mv) to reactive mask with value close to 1.0f can decrease temporal ghosting. Decreasing this value could result in more thin feature pixels flickering. Default value is 0.333. Value is clamped to [0.0f, 1.0f]. + FFX_FSR3UPSCALER_CONFIGURE_UPSCALE_KEY_FMINDISOCCLUSIONACCUMULATION = 4, //Override constant buffer fMinDisocclusionAccumulation. Increasing this value may reduce white pixel temporal flickering around swaying thin objects that are disoccluding one another often. Too high value may increase ghosting. Default value is -0.333. A sufficiently negative value means for pixel coordinate at frame N that is disoccluded, add fAccumulationAddedPerFrame starting at frame N+2. Default value is -0.333. Value is clamped to [-1.0f, 1.0f]. +} FfxFsr3UpscalerConfigureKey; + +/// A structure encapsulating the parameters for dispatching the various passes +/// of FidelityFX Super Resolution 3. +/// +/// @ingroup ffxFsr3Upscaler +typedef struct FfxFsr3UpscalerDispatchDescription { + + FfxCommandList commandList; ///< The FfxCommandList to record FSR3 rendering commands into. + FfxResource color; ///< A FfxResource containing the color buffer for the current frame (at render resolution). + FfxResource depth; ///< A FfxResource containing 32bit depth values for the current frame (at render resolution). + FfxResource motionVectors; ///< A FfxResource containing 2-dimensional motion vectors (at render resolution if FFX_FSR3UPSCALER_ENABLE_DISPLAY_RESOLUTION_MOTION_VECTORS is not set). + FfxResource exposure; ///< A optional FfxResource containing a 1x1 exposure value. + FfxResource reactive; ///< A optional FfxResource containing alpha value of reactive objects in the scene. + FfxResource transparencyAndComposition; ///< A optional FfxResource containing alpha value of special objects in the scene. + FfxResource dilatedDepth; ///< A FfxResource allocated as described in FfxFsr3UpscalerSharedResourceDescriptions that is used to emit dilated depth and share with following effects. + FfxResource dilatedMotionVectors; ///< A FfxResource allocated as described in FfxFsr3UpscalerSharedResourceDescriptions that is used to emit dilated motion vectors and share with following effects. + FfxResource reconstructedPrevNearestDepth; ///< A FfxResource allocated as described in FfxFsr3UpscalerSharedResourceDescriptions that is used to emit reconstructed previous nearest depth and share with following effects. + FfxResource output; ///< A FfxResource containing the output color buffer for the current frame (at presentation resolution). + FfxFloatCoords2D jitterOffset; ///< The subpixel jitter offset applied to the camera. + FfxFloatCoords2D motionVectorScale; ///< The scale factor to apply to motion vectors. + FfxDimensions2D renderSize; ///< The resolution that was used for rendering the input resources. + FfxDimensions2D upscaleSize; ///< The resolution that the upscaler will output. + bool enableSharpening; ///< Enable an additional sharpening pass. + float sharpness; ///< The sharpness value between 0 and 1, where 0 is no additional sharpness and 1 is maximum additional sharpness. + float frameTimeDelta; ///< The time elapsed since the last frame (expressed in milliseconds). + float preExposure; ///< The pre exposure value (must be > 0.0f) + bool reset; ///< A boolean value which when set to true, indicates the camera has moved discontinuously. + float cameraNear; ///< The distance to the near plane of the camera. + float cameraFar; ///< The distance to the far plane of the camera. + float cameraFovAngleVertical; ///< The camera angle field of view in the vertical direction (expressed in radians). + float viewSpaceToMetersFactor; ///< The scale factor to convert view space units to meters + uint32_t flags; ///< combination of FfxFsr3UpscalerDispatchFlags + + // GODOT BEGINS + float reprojectionMatrix[16]; + // GODOT ENDS + +} FfxFsr3UpscalerDispatchDescription; + +/// A structure encapsulating the parameters for automatic generation of a reactive mask +/// +/// @ingroup ffxFsr3Upscaler +typedef struct FfxFsr3UpscalerGenerateReactiveDescription { + + FfxCommandList commandList; ///< The FfxCommandList to record FSR3 rendering commands into. + FfxResource colorOpaqueOnly; ///< A FfxResource containing the opaque only color buffer for the current frame (at render resolution). + FfxResource colorPreUpscale; ///< A FfxResource containing the opaque+translucent color buffer for the current frame (at render resolution). + FfxResource outReactive; ///< A FfxResource containing the surface to generate the reactive mask into. + FfxDimensions2D renderSize; ///< The resolution that was used for rendering the input resources. + float scale; ///< A value to scale the output + float cutoffThreshold; ///< A threshold value to generate a binary reactive mask + float binaryValue; ///< A value to set for the binary reactive mask + uint32_t flags; ///< Flags to determine how to generate the reactive mask +} FfxFsr3UpscalerGenerateReactiveDescription; + +/// A structure encapsulating the resource descriptions for shared resources for this effect. +/// +/// @ingroup ffxFsr3Upscaler +typedef struct FfxFsr3UpscalerSharedResourceDescriptions { + + FfxCreateResourceDescription reconstructedPrevNearestDepth; ///< The FfxCreateResourceDescription for allocating the reconstructedPrevNearestDepth shared resource. + FfxCreateResourceDescription dilatedDepth; ///< The FfxCreateResourceDescription for allocating the dilatedDepth shared resource. + FfxCreateResourceDescription dilatedMotionVectors; ///< The FfxCreateResourceDescription for allocating the dilatedMotionVectors shared resource. +} FfxFsr3UpscalerSharedResourceDescriptions; + +/// A structure encapsulating the FidelityFX Super Resolution 3 context. +/// +/// This sets up an object which contains all persistent internal data and +/// resources that are required by FSR3. +/// +/// The FfxFsr3UpscalerContext object should have a lifetime matching +/// your use of FSR3. Before destroying the FSR3 context care should be taken +/// to ensure the GPU is not accessing the resources created or used by FSR3. +/// It is therefore recommended that the GPU is idle before destroying the +/// FSR3 context. +/// +/// @ingroup ffxFsr3Upscaler +typedef struct FfxFsr3UpscalerContext +{ + uint32_t data[FFX_FSR3UPSCALER_CONTEXT_SIZE]; ///< An opaque set of uint32_t which contain the data for the context. +} FfxFsr3UpscalerContext; + + +/// Create a FidelityFX Super Resolution 3 context from the parameters +/// programmed to the FfxFsr3UpscalerCreateParams structure. +/// +/// The context structure is the main object used to interact with the FSR3 +/// API, and is responsible for the management of the internal resources used +/// by the FSR3 algorithm. When this API is called, multiple calls will be +/// made via the pointers contained in the callbacks structure. +/// These callbacks will attempt to retreive the device capabilities, and +/// create the internal resources, and pipelines required by FSR3's +/// frame-to-frame function. Depending on the precise configuration used when +/// creating the FfxFsr3UpscalerContext a different set of resources and +/// pipelines might be requested via the callback functions. +/// +/// The flags included in the flags field of +/// FfxFsr3UpscalerContext how match the configuration of your +/// application as well as the intended use of FSR3. It is important that these +/// flags are set correctly (as well as a correct programmed +/// FfxFsr3UpscalerDispatchDescription) to ensure correct operation. It is +/// recommended to consult the overview documentation for further details on +/// how FSR3 should be integerated into an application. +/// +/// When the FfxFsr3UpscalerContext is created, you should use the +/// ffxFsr3UpscalerContextDispatch function each frame where FSR3 +/// upscaling should be applied. See the documentation of +/// ffxFsr3UpscalerContextDispatch for more details. +/// +/// The FfxFsr3UpscalerContext should be destroyed when use of it is +/// completed, typically when an application is unloaded or FSR3 upscaling is +/// disabled by a user. To destroy the FSR3 context you should call +/// ffxFsr3UpscalerContextDestroy. +/// +/// @param [out] pContext A pointer to a FfxFsr3UpscalerContext structure to populate. +/// @param [in] pContextDescription A pointer to a FfxFsr3UpscalerContextDescription structure. +/// +/// @retval +/// FFX_OK The operation completed successfully. +/// @retval +/// FFX_ERROR_CODE_NULL_POINTER The operation failed because either context or contextDescription was NULL. +/// @retval +/// FFX_ERROR_INCOMPLETE_INTERFACE The operation failed because the FfxFsr3UpscalerContextDescription.callbacks was not fully specified. +/// @retval +/// FFX_ERROR_BACKEND_API_ERROR The operation failed because of an error returned from the backend. +/// +/// @ingroup ffxFsr3Upscaler +FFX_API FfxErrorCode ffxFsr3UpscalerContextCreate(FfxFsr3UpscalerContext* pContext, const FfxFsr3UpscalerContextDescription* pContextDescription); + +/// Provides the descriptions for shared resources that must be allocated for this effect. +/// +/// @param [in] context A pointer to a FfxFsr3UpscalerContext structure. +/// @param [out] SharedResources A pointer to a FfxFsr3UpscalerSharedResourceDescriptions to populate. +/// +/// @returns +/// FFX_OK The operation completed successfully. +/// @returns +/// Anything else The operation failed. +/// +/// @ingroup ffxFsr3Upscaler +FFX_API FfxErrorCode ffxFsr3UpscalerGetSharedResourceDescriptions(FfxFsr3UpscalerContext* context, FfxFsr3UpscalerSharedResourceDescriptions* SharedResources); + +/// Get GPU memory usage of the FidelityFX Super Resolution context. +/// +/// @param [in] pContext A pointer to a FfxFsr3UpscalerContext structure. +/// @param [out] pVramUsage A pointer to a FfxEffectMemoryUsage structure. +/// +/// @retval +/// FFX_OK The operation completed successfully. +/// @retval +/// FFX_ERROR_CODE_NULL_POINTER The operation failed because either context or vramUsage were NULL. +/// +/// @ingroup ffxFsr3Upscaler +FFX_API FfxErrorCode ffxFsr3UpscalerContextGetGpuMemoryUsage(FfxFsr3UpscalerContext* pContext, FfxEffectMemoryUsage* pVramUsage); + +/// Dispatch the various passes that constitute FidelityFX Super Resolution 3. +/// +/// FSR3 is a composite effect, meaning that it is compromised of multiple +/// constituent passes (implemented as one or more clears, copies and compute +/// dispatches). The ffxFsr3UpscalerContextDispatch function is the +/// function which (via the use of the functions contained in the +/// callbacks field of the FfxFsr3UpscalerContext +/// structure) utlimately generates the sequence of graphics API calls required +/// each frame. +/// +/// As with the creation of the FfxFsr3UpscalerContext correctly +/// programming the FfxFsr3UpscalerDispatchDescription is key to ensuring +/// the correct operation of FSR3. It is particularly important to ensure that +/// camera jitter is correctly applied to your application's projection matrix +/// (or camera origin for raytraced applications). FSR3 provides the +/// ffxFsr3UpscalerGetJitterPhaseCount and +/// ffxFsr3UpscalerGetJitterOffset entry points to help applications +/// correctly compute the camera jitter. Whatever jitter pattern is used by the +/// application it should be correctly programmed to the +/// jitterOffset field of the dispatchDescription +/// structure. For more guidance on camera jitter please consult the +/// documentation for ffxFsr3UpscalerGetJitterOffset as well as the +/// accompanying overview documentation for FSR3. +/// +/// @param [in] pContext A pointer to a FfxFsr3UpscalerContext structure. +/// @param [in] pDispatchDescription A pointer to a FfxFsr3UpscalerDispatchDescription structure. +/// +/// @retval +/// FFX_OK The operation completed successfully. +/// @retval +/// FFX_ERROR_CODE_NULL_POINTER The operation failed because either context or dispatchDescription was NULL. +/// @retval +/// FFX_ERROR_OUT_OF_RANGE The operation failed because dispatchDescription.renderSize was larger than the maximum render resolution. +/// @retval +/// FFX_ERROR_NULL_DEVICE The operation failed because the device inside the context was NULL. +/// @retval +/// FFX_ERROR_BACKEND_API_ERROR The operation failed because of an error returned from the backend. +/// +/// @ingroup ffxFsr3Upscaler +FFX_API FfxErrorCode ffxFsr3UpscalerContextDispatch(FfxFsr3UpscalerContext* pContext, const FfxFsr3UpscalerDispatchDescription* pDispatchDescription); + +/// A helper function generate a Reactive mask from an opaque only texure and one containing translucent objects. +/// +/// @param [in] pContext A pointer to a FfxFsr3UpscalerContext structure. +/// @param [in] pParams A pointer to a FfxFsr3UpscalerGenerateReactiveDescription structure +/// +/// @retval +/// FFX_OK The operation completed successfully. +/// +/// @ingroup ffxFsr3Upscaler +FFX_API FfxErrorCode ffxFsr3UpscalerContextGenerateReactiveMask(FfxFsr3UpscalerContext* pContext, const FfxFsr3UpscalerGenerateReactiveDescription* pParams); + +/// Destroy the FidelityFX Super Resolution context. +/// +/// @param [out] pContext A pointer to a FfxFsr3UpscalerContext structure to destroy. +/// +/// @retval +/// FFX_OK The operation completed successfully. +/// @retval +/// FFX_ERROR_CODE_NULL_POINTER The operation failed because either context was NULL. +/// +/// @ingroup ffxFsr3Upscaler +FFX_API FfxErrorCode ffxFsr3UpscalerContextDestroy(FfxFsr3UpscalerContext* pContext); + +/// Get the upscale ratio from the quality mode. +/// +/// The following table enumerates the mapping of the quality modes to +/// per-dimension scaling ratios. +/// +/// Quality preset | Scale factor +/// ----------------------------------------------------- | ------------- +/// FFX_FSR3UPSCALER_QUALITY_MODE_QUALITY | 1.5x +/// FFX_FSR3UPSCALER_QUALITY_MODE_BALANCED | 1.7x +/// FFX_FSR3UPSCALER_QUALITY_MODE_PERFORMANCE | 2.0x +/// FFX_FSR3UPSCALER_QUALITY_MODE_ULTRA_PERFORMANCE | 3.0x +/// +/// Passing an invalid qualityMode will return 0.0f. +/// +/// @param [in] qualityMode The quality mode preset. +/// +/// @returns +/// The upscaling the per-dimension upscaling ratio for +/// qualityMode according to the table above. +/// +/// @ingroup ffxFsr3Upscaler +FFX_API float ffxFsr3UpscalerGetUpscaleRatioFromQualityMode(FfxFsr3UpscalerQualityMode qualityMode); + +/// A helper function to calculate the rendering resolution from a target +/// resolution and desired quality level. +/// +/// This function applies the scaling factor returned by +/// ffxFsr3UpscalerGetUpscaleRatioFromQualityMode to each dimension. +/// +/// @param [out] pRenderWidth A pointer to a uint32_t which will hold the calculated render resolution width. +/// @param [out] pRenderHeight A pointer to a uint32_t which will hold the calculated render resolution height. +/// @param [in] displayWidth The target display resolution width. +/// @param [in] displayHeight The target display resolution height. +/// @param [in] qualityMode The desired quality mode for FSR 2 upscaling. +/// +/// @retval +/// FFX_OK The operation completed successfully. +/// @retval +/// FFX_ERROR_INVALID_POINTER Either renderWidth or renderHeight was NULL. +/// @retval +/// FFX_ERROR_INVALID_ENUM An invalid quality mode was specified. +/// +/// @ingroup ffxFsr3Upscaler +FFX_API FfxErrorCode ffxFsr3UpscalerGetRenderResolutionFromQualityMode( + uint32_t* pRenderWidth, + uint32_t* pRenderHeight, + uint32_t displayWidth, + uint32_t displayHeight, + FfxFsr3UpscalerQualityMode qualityMode); + +/// A helper function to calculate the jitter phase count from display +/// resolution. +/// +/// For more detailed information about the application of camera jitter to +/// your application's rendering please refer to the +/// ffxFsr3UpscalerGetJitterOffset function. +/// +/// The table below shows the jitter phase count which this function +/// would return for each of the quality presets. +/// +/// Quality preset | Scale factor | Phase count +/// ----------------------------------------------------- | ------------- | --------------- +/// FFX_FSR3UPSCALER_QUALITY_MODE_QUALITY | 1.5x | 18 +/// FFX_FSR3UPSCALER_QUALITY_MODE_BALANCED | 1.7x | 23 +/// FFX_FSR3UPSCALER_QUALITY_MODE_PERFORMANCE | 2.0x | 32 +/// FFX_FSR3UPSCALER_QUALITY_MODE_ULTRA_PERFORMANCE | 3.0x | 72 +/// Custom | [1..n]x | ceil(8*n^2) +/// +/// @param [in] renderWidth The render resolution width. +/// @param [in] displayWidth The display resolution width. +/// +/// @returns +/// The jitter phase count for the scaling factor between renderWidth and displayWidth. +/// +/// @ingroup ffxFsr3Upscaler +FFX_API int32_t ffxFsr3UpscalerGetJitterPhaseCount(int32_t renderWidth, int32_t displayWidth); + +/// A helper function to calculate the subpixel jitter offset. +/// +/// FSR3 relies on the application to apply sub-pixel jittering while rendering. +/// This is typically included in the projection matrix of the camera. To make +/// the application of camera jitter simple, the FSR3 API provides a small set +/// of utility function which computes the sub-pixel jitter offset for a +/// particular frame within a sequence of separate jitter offsets. To begin, the +/// index within the jitter phase must be computed. To calculate the +/// sequence's length, you can call the ffxFsr3UpscalerGetJitterPhaseCount +/// function. The index should be a value which is incremented each frame modulo +/// the length of the sequence computed by ffxFsr3UpscalerGetJitterPhaseCount. +/// The index within the jitter phase is passed to +/// ffxFsr3UpscalerGetJitterOffset via the index parameter. +/// +/// This function uses a Halton(2,3) sequence to compute the jitter offset. +/// The ultimate index used for the sequence is index % +/// phaseCount. +/// +/// It is important to understand that the values returned from the +/// ffxFsr3UpscalerGetJitterOffset function are in unit pixel space, and +/// in order to composite this correctly into a projection matrix we must +/// convert them into projection offsets. This is done as per the pseudo code +/// listing which is shown below. +/// +/// const int32_t jitterPhaseCount = ffxFsr3UpscalerGetJitterPhaseCount(renderWidth, displayWidth); +/// +/// float jitterX = 0; +/// float jitterY = 0; +/// ffxFsr3UpscalerGetJitterOffset(&jitterX, &jitterY, index, jitterPhaseCount); +/// +/// const float jitterX = 2.0f * jitterX / (float)renderWidth; +/// const float jitterY = -2.0f * jitterY / (float)renderHeight; +/// const Matrix4 jitterTranslationMatrix = translateMatrix(Matrix3::identity, Vector3(jitterX, jitterY, 0)); +/// const Matrix4 jitteredProjectionMatrix = jitterTranslationMatrix * projectionMatrix; +/// +/// Jitter should be applied to all rendering. This includes opaque, alpha +/// transparent, and raytraced objects. For rasterized objects, the sub-pixel +/// jittering values calculated by the iffxFsr3UpscalerGetJitterOffset +/// function can be applied to the camera projection matrix which is ultimately +/// used to perform transformations during vertex shading. For raytraced +/// rendering, the sub-pixel jitter should be applied to the ray's origin, +/// often the camera's position. +/// +/// Whether you elect to use the ffxFsr3UpscalerGetJitterOffset function +/// or your own sequence generator, you must program the +/// jitterOffset field of the +/// FfxFsr3UpscalerDispatchParameters structure in order to inform FSR3 +/// of the jitter offset that has been applied in order to render each frame. +/// +/// If not using the recommended ffxFsr3UpscalerGetJitterOffset function, +/// care should be taken that your jitter sequence never generates a null vector; +/// that is value of 0 in both the X and Y dimensions. +/// +/// @param [out] pOutX A pointer to a float which will contain the subpixel jitter offset for the x dimension. +/// @param [out] pOutY A pointer to a float which will contain the subpixel jitter offset for the y dimension. +/// @param [in] index The index within the jitter sequence. +/// @param [in] phaseCount The length of jitter phase. See ffxFsr3UpscalerGetJitterPhaseCount. +/// +/// @retval +/// FFX_OK The operation completed successfully. +/// @retval +/// FFX_ERROR_INVALID_POINTER Either outX or outY was NULL. +/// @retval +/// FFX_ERROR_INVALID_ARGUMENT Argument phaseCount must be greater than 0. +/// +/// @ingroup ffxFsr3Upscaler +FFX_API FfxErrorCode ffxFsr3UpscalerGetJitterOffset(float* pOutX, float* pOutY, int32_t index, int32_t phaseCount); + +/// A helper function to check if a resource is +/// FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_NULL. +/// +/// @param [in] resource A FfxResource. +/// +/// @returns +/// true The resource was not FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_NULL. +/// @returns +/// false The resource was FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_NULL. +/// +/// @ingroup ffxFsr3Upscaler +FFX_API bool ffxFsr3UpscalerResourceIsNull(FfxResource resource); + +/// Queries the effect version number. +/// +/// @returns +/// The SDK version the effect was built with. +/// +/// @ingroup ffxFsr3Upscaler +FFX_API FfxVersionNumber ffxFsr3UpscalerGetEffectVersion(); + +/// Override upscaler constant buffer value after upscaler context creation. +/// +/// @param [in] context A pointer to a FfxFsr3UpscalerContext structure. +/// @param [in] key A key from FfxFsr3UpscalerConfigureKey enum +/// @param [in] valuePtr A pointer to value to pass to shader in Constant Buffer. See Fsr3UpscalerConstants +/// +/// @retval +/// FFX_OK The operation completed successfully. +/// @retval +/// FFX_ERROR_INVALID_ENUM An invalid FfxFsr3UpscalerConfigureKey was specified. +/// @retval +/// FFX_ERROR_INVALID_POINTER pContext was NULL. +/// +/// @ingroup ffxFsr3Upscaler +FFX_API FfxErrorCode ffxFsr3UpscalerSetConstant(FfxFsr3UpscalerContext* context, FfxFsr3UpscalerConfigureKey key, void* valuePtr); + +/// Set global debug message settings +/// +/// @param [in] fpMessage A ffxMessageCallback +/// @param [in] debugLevel An unsigned integer. Unimplemented. +/// @retval +/// FFX_OK The operation completed successfully. +/// +/// @ingroup FRAMEINTERPOLATION +FFX_API FfxErrorCode ffxFsr3UpscalerSetGlobalDebugMessage(ffxMessageCallback fpMessage, uint32_t debugLevel); + +#if defined(__cplusplus) +} +#endif // #if defined(__cplusplus) diff --git a/thirdparty/amd-ffx/ffx_fsr3upscaler_private.h b/thirdparty/amd-ffx/ffx_fsr3upscaler_private.h new file mode 100644 index 000000000000..dc80dcd3ae60 --- /dev/null +++ b/thirdparty/amd-ffx/ffx_fsr3upscaler_private.h @@ -0,0 +1,127 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once +#include "gpu/fsr3upscaler/ffx_fsr3upscaler_resources.h" + +/// An enumeration of all the permutations that can be passed to the FSR3 Upscaler algorithm. +/// +/// FSR3 Upscaler features are organized through a set of pre-defined compile +/// permutation options that need to be specified. Which shader blob +/// is returned for pipeline creation will be determined by what combination +/// of shader permutations are enabled. +/// +/// @ingroup FSR3Upscaler +typedef enum Fs3UpscalerShaderPermutationOptions +{ + FSR3UPSCALER_SHADER_PERMUTATION_USE_LANCZOS_TYPE = (1 << 0), ///< Off means reference, On means LUT + FSR3UPSCALER_SHADER_PERMUTATION_HDR_COLOR_INPUT = (1 << 1), ///< Enables the HDR code path + FSR3UPSCALER_SHADER_PERMUTATION_LOW_RES_MOTION_VECTORS = (1 << 2), ///< Indicates low resolution motion vectors provided + FSR3UPSCALER_SHADER_PERMUTATION_JITTER_MOTION_VECTORS = (1 << 3), ///< Indicates motion vectors were generated with jitter + FSR3UPSCALER_SHADER_PERMUTATION_DEPTH_INVERTED = (1 << 4), ///< Indicates input resources were generated with inverted depth + FSR3UPSCALER_SHADER_PERMUTATION_ENABLE_SHARPENING = (1 << 5), ///< Enables a supplementary sharpening pass + FSR3UPSCALER_SHADER_PERMUTATION_FORCE_WAVE64 = (1 << 6), ///< doesn't map to a define, selects different table + FSR3UPSCALER_SHADER_PERMUTATION_ALLOW_FP16 = (1 << 7), ///< Enables fast math computations where possible +} Fs3UpscalerShaderPermutationOptions; + +// Constants for FSR3 Upscaler dispatches. Must be kept in sync with cbFSR3Upscaler in ffx_fsr2_callbacks_hlsl.h +typedef struct Fsr3UpscalerConstants { + + int32_t renderSize[2]; + int32_t previousFrameRenderSize[2]; + + int32_t upscaleSize[2]; + int32_t previousFrameUpscaleSize[2]; + + int32_t maxRenderSize[2]; + int32_t maxUpscaleSize[2]; + + float deviceToViewDepth[4]; + + float jitterOffset[2]; + float previousFrameJitterOffset[2]; + + float motionVectorScale[2]; + float downscaleFactor[2]; + + float motionVectorJitterCancellation[2]; + float tanHalfFOV; + float jitterPhaseCount; + + float deltaTime; + float deltaPreExposure; + float viewSpaceToMetersFactor; + float frameIndex; + + float velocityFactor; + float reactivenessScale; + float shadingChangeScale; + float accumulationAddedPerFrame; + float minDisocclusionAccumulation; + + // GODOT BEGINS + float pad[3]; + float reprojectionMatrix[16]; + // GODOT ENDS +} Fsr3UpscalerConstants; + +struct FfxFsr3UpscalerContextDescription; +struct FfxDeviceCapabilities; +struct FfxPipelineState; + +// FfxFsr3UpscalerContext_Private +// The private implementation of the FSR3 Upscaler context. +typedef struct FfxFsr3UpscalerContext_Private { + + FfxFsr3UpscalerContextDescription contextDescription; + FfxUInt32 effectContextId; + Fsr3UpscalerConstants constants; + FfxDevice device; + FfxDeviceCapabilities deviceCapabilities; + FfxPipelineState pipelinePrepareInputs; + FfxPipelineState pipelinePrepareReactivity; + FfxPipelineState pipelineShadingChange; + FfxPipelineState pipelineAccumulate; + FfxPipelineState pipelineAccumulateSharpen; + FfxPipelineState pipelineRCAS; + FfxPipelineState pipelineLumaPyramid; + FfxPipelineState pipelineGenerateReactive; + FfxPipelineState pipelineTcrAutogenerate; + FfxPipelineState pipelineShadingChangePyramid; + FfxPipelineState pipelineLumaInstability; + FfxPipelineState pipelineDebugView; + FfxConstantBuffer constantBuffers[FFX_FSR3UPSCALER_CONSTANTBUFFER_COUNT]; + + // 2 arrays of resources, as e.g. FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LOCK_STATUS will use different resources when bound as SRV vs when bound as UAV + FfxResourceInternal srvResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_COUNT]; + FfxResourceInternal uavResources[FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_COUNT]; + + bool firstExecution; + uint32_t resourceFrameIndex; + float previousJitterOffset[2]; + float preExposure; + float previousFramePreExposure; + +} FfxFsr3UpscalerContext_Private; + +// declare fsr3UpscalerCreate so it can be used from fsr3 +FFX_API FfxErrorCode fsr3UpscalerCreate(FfxFsr3UpscalerContext_Private* context, const FfxFsr3UpscalerContextDescription* contextDescription); diff --git a/thirdparty/amd-ffx/ffx_interface.h b/thirdparty/amd-ffx/ffx_interface.h new file mode 100644 index 000000000000..7fe3916296fd --- /dev/null +++ b/thirdparty/amd-ffx/ffx_interface.h @@ -0,0 +1,676 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +#include "ffx_assert.h" +#include "ffx_types.h" +#include "ffx_error.h" +#include "ffx_message.h" + +// GODOT BEGINS +// Fix GCC build +#ifndef _MSC_VER +#include +#include +#define wcscpy_s wcscpy +#ifndef _countof +#define _countof(a) (sizeof(a) / sizeof(*(a))) +#endif +#endif +// GODOT ENDS + +#if defined(__cplusplus) +#define FFX_CPU +extern "C" { +#endif // #if defined(__cplusplus) + +/// @defgroup Backends Backends +/// Core interface declarations and natively supported backends +/// +/// @ingroup ffxSDK + +/// @defgroup FfxInterface FfxInterface +/// FidelityFX SDK function signatures and core defines requiring +/// overrides for backend implementation. +/// +/// @ingroup Backends +FFX_FORWARD_DECLARE(FfxInterface); + +/// FidelityFX SDK major version. +/// +/// @ingroup FfxInterface +#define FFX_SDK_VERSION_MAJOR (1) + +/// FidelityFX SDK minor version. +/// +/// @ingroup FfxInterface +#define FFX_SDK_VERSION_MINOR (1) + +/// FidelityFX SDK patch version. +/// +/// @ingroup FfxInterface +#define FFX_SDK_VERSION_PATCH (4) + +/// Macro to pack a FidelityFX SDK version id together. +/// +/// @ingroup FfxInterface +#define FFX_SDK_MAKE_VERSION( major, minor, patch ) ( ( major << 22 ) | ( minor << 12 ) | patch ) + +/// Stand in type for FfxPass +/// +/// These will be defined for each effect individually (i.e. FfxFsr2Pass). +/// They are used to fetch the proper blob index to build effect shaders +/// +/// @ingroup FfxInterface +typedef uint32_t FfxPass; + +/// Get the SDK version of the backend context. +/// +/// @param [in] backendInterface A pointer to the backend interface. +/// +/// @returns +/// The SDK version a backend was built with. +/// +/// @ingroup FfxInterface +typedef FfxVersionNumber(*FfxGetSDKVersionFunc)( + FfxInterface* backendInterface); + +/// Get effect VRAM usage. +/// +/// Newer effects may require support that legacy versions of the SDK will not be +/// able to provide. A version query is thus required to ensure an effect component +/// will always be paired with a backend which will support all needed functionality. +/// +/// @param [in] backendInterface A pointer to the backend interface. +/// @param [in] effectContextId The context space to be used for the effect in question. +/// @param [out] outVramUsage The effect memory usage structure to fill out. +/// +/// @retval +/// FFX_OK The operation completed successfully. +/// @retval +/// Anything else The operation failed. +/// +/// @ingroup FfxInterface +typedef FfxErrorCode (*FfxGetEffectGpuMemoryUsageFunc)(FfxInterface* backendInterface, FfxUInt32 effectContextId, FfxEffectMemoryUsage* outVramUsage); + +/// Create and initialize the backend context. +/// +/// The callback function sets up the backend context for rendering. +/// It will create or reference the device and create required internal data structures. +/// +/// @param [in] backendInterface A pointer to the backend interface. +/// @param [in] effect The effect the context is being created for +/// @param [in] bindlessConfig A pointer to the bindless configuration, if required by the effect. +/// @param [out] effectContextId The context space to be used for the effect in question. +/// +/// @retval +/// FFX_OK The operation completed successfully. +/// @retval +/// Anything else The operation failed. +/// +/// @ingroup FfxInterface +typedef FfxErrorCode (*FfxCreateBackendContextFunc)( + FfxInterface* backendInterface, + FfxEffect effect, + FfxEffectBindlessConfig* bindlessConfig, + FfxUInt32* effectContextId); + +/// Get a list of capabilities of the device. +/// +/// When creating an FfxEffectContext it is desirable for the FFX +/// core implementation to be aware of certain characteristics of the platform +/// that is being targetted. This is because some optimizations which FFX SDK +/// attempts to perform are more effective on certain classes of hardware than +/// others, or are not supported by older hardware. In order to avoid cases +/// where optimizations actually have the effect of decreasing performance, or +/// reduce the breadth of support provided by FFX SDK, the FFX interface queries the +/// capabilities of the device to make such decisions. +/// +/// For target platforms with fixed hardware support you need not implement +/// this callback function by querying the device, but instead may hardcore +/// what features are available on the platform. +/// +/// @param [in] backendInterface A pointer to the backend interface. +/// @param [out] outDeviceCapabilities The device capabilities structure to fill out. +/// +/// @retval +/// FFX_OK The operation completed successfully. +/// @retval +/// Anything else The operation failed. +/// +/// @ingroup FfxInterface +typedef FfxErrorCode(*FfxGetDeviceCapabilitiesFunc)( + FfxInterface* backendInterface, + FfxDeviceCapabilities* outDeviceCapabilities); + +/// Destroy the backend context and dereference the device. +/// +/// This function is called when the FfxEffectContext is destroyed. +/// +/// @param [in] backendInterface A pointer to the backend interface. +/// @param [in] effectContextId The context space to be used for the effect in question. +/// +/// @retval +/// FFX_OK The operation completed successfully. +/// @retval +/// Anything else The operation failed. +/// +/// @ingroup FfxInterface +typedef FfxErrorCode(*FfxDestroyBackendContextFunc)( + FfxInterface* backendInterface, + FfxUInt32 effectContextId); + +/// Create a resource. +/// +/// This callback is intended for the backend to create internal resources. +/// +/// Please note: It is also possible that the creation of resources might +/// itself cause additional resources to be created by simply calling the +/// FfxCreateResourceFunc function pointer again. This is +/// useful when handling the initial creation of resources which must be +/// initialized. The flow in such a case would be an initial call to create the +/// CPU-side resource, another to create the GPU-side resource, and then a call +/// to schedule a copy render job to move the data between the two. Typically +/// this type of function call flow is only seen during the creation of an +/// FfxEffectContext. +/// +/// @param [in] backendInterface A pointer to the backend interface. +/// @param [in] createResourceDescription A pointer to a FfxCreateResourceDescription. +/// @param [in] effectContextId The context space to be used for the effect in question. +/// @param [out] outResource A pointer to a FfxResource object. +/// +/// @retval +/// FFX_OK The operation completed successfully. +/// @retval +/// Anything else The operation failed. +/// +/// @ingroup FfxInterface +typedef FfxErrorCode (*FfxCreateResourceFunc)( + FfxInterface* backendInterface, + const FfxCreateResourceDescription* createResourceDescription, + FfxUInt32 effectContextId, + FfxResourceInternal* outResource); + +/// Register a resource in the backend for the current frame. +/// +/// Since the FfxInterface and the backends are not aware how many different +/// resources will get passed in over time, it's not safe +/// to register all resources simultaneously in the backend. +/// Also passed resources may not be valid after the dispatch call. +/// As a result it's safest to register them as FfxResourceInternal +/// and clear them at the end of the dispatch call. +/// +/// @param [in] backendInterface A pointer to the backend interface. +/// @param [in] inResource A pointer to a FfxResource. +/// @param [in] effectContextId The context space to be used for the effect in question. +/// @param [out] outResource A pointer to a FfxResourceInternal object. +/// +/// @retval +/// FFX_OK The operation completed successfully. +/// @retval +/// Anything else The operation failed. +/// +/// @ingroup FfxInterface +typedef FfxErrorCode(*FfxRegisterResourceFunc)( + FfxInterface* backendInterface, + const FfxResource* inResource, + FfxUInt32 effectContextId, + FfxResourceInternal* outResource); + + +/// Get an FfxResource from an FfxResourceInternal resource. +/// +/// At times it is necessary to create an FfxResource representation +/// of an internally created resource in order to register it with a +/// child effect context. This function sets up the FfxResource needed +/// to register. +/// +/// @param [in] backendInterface A pointer to the backend interface. +/// @param [in] resource The FfxResourceInternal for which to setup an FfxResource. +/// +/// @returns +/// An FfxResource built from the internal resource +/// +/// @ingroup FfxInterface +typedef FfxResource(*FfxGetResourceFunc)( + FfxInterface* backendInterface, + FfxResourceInternal resource); + +/// Unregister all temporary FfxResourceInternal from the backend. +/// +/// Unregister FfxResourceInternal referencing resources passed to +/// a function as a parameter. +/// +/// @param [in] backendInterface A pointer to the backend interface. +/// @param [in] commandList A pointer to a FfxCommandList structure. +/// @param [in] effectContextId The context space to be used for the effect in question. +/// +/// @retval +/// FFX_OK The operation completed successfully. +/// @retval +/// Anything else The operation failed. +/// +/// @ingroup FfxInterface +typedef FfxErrorCode(*FfxUnregisterResourcesFunc)( + FfxInterface* backendInterface, + FfxCommandList commandList, + FfxUInt32 effectContextId); + +/// Register a resource in the static bindless table of the backend. +/// +/// A static resource will persist in their respective bindless table until it is +/// overwritten by a different resource at the same index. +/// The calling code must take care not to immediately register a new resource at an index +/// that might be in use by an in-flight frame. +/// +/// @param [in] backendInterface A pointer to the backend interface. +/// @param [in] desc A pointer to an FfxStaticResourceDescription. +/// @param [in] effectContextId The context space to be used for the effect in question. +/// +/// @retval +/// FFX_OK The operation completed successfully. +/// @retval +/// Anything else The operation failed. +/// +/// @ingroup FfxInterface +typedef FfxErrorCode (*FfxRegisterStaticResourceFunc)(FfxInterface* backendInterface, + const FfxStaticResourceDescription* desc, + FfxUInt32 effectContextId); + +/// Retrieve a FfxResourceDescription matching a +/// FfxResource structure. +/// +/// @param [in] backendInterface A pointer to the backend interface. +/// @param [in] resource A pointer to a FfxResource object. +/// +/// @returns +/// A description of the resource. +/// +/// @ingroup FfxInterface +typedef FfxResourceDescription (*FfxGetResourceDescriptionFunc)( + FfxInterface* backendInterface, + FfxResourceInternal resource); + +/// Destroy a resource +/// +/// This callback is intended for the backend to release an internal resource. +/// +/// @param [in] backendInterface A pointer to the backend interface. +/// @param [in] resource A pointer to a FfxResource object. +/// @param [in] effectContextId The context space to be used for the effect in question. +/// +/// @retval +/// FFX_OK The operation completed successfully. +/// @retval +/// Anything else The operation failed. +/// +/// @ingroup FfxInterface +typedef FfxErrorCode (*FfxDestroyResourceFunc)( + FfxInterface* backendInterface, + FfxResourceInternal resource, + FfxUInt32 effectContextId); + +/// Map resource memory +/// +/// Maps the memory of the resource to a pointer and returns it. +/// +/// @param [in] backendInterface A pointer to the backend interface. +/// @param [in] resource A pointer to a FfxResource object. +/// @param [out] ptr A pointer to the mapped memory. +/// +/// @retval +/// FFX_OK The operation completed successfully. +/// @retval +/// Anything else The operation failed. +/// +/// @ingroup FfxInterface +typedef FfxErrorCode (*FfxMapResourceFunc)(FfxInterface* backendInterface, FfxResourceInternal resource, void** ptr); + +/// Unmap resource memory +/// +/// Unmaps previously mapped memory of a resource. +/// +/// @param [in] backendInterface A pointer to the backend interface. +/// @param [in] resource A pointer to a FfxResource object. +/// +/// @retval +/// FFX_OK The operation completed successfully. +/// @retval +/// Anything else The operation failed. +/// +/// @ingroup FfxInterface +typedef FfxErrorCode (*FfxUnmapResourceFunc)(FfxInterface* backendInterface, FfxResourceInternal resource); + +/// Destroy a resource +/// +/// This callback is intended for the backend to release an internal resource. +/// +/// @param [in] backendInterface A pointer to the backend interface. +/// @param [in] resource A pointer to a FfxResource object. +/// +/// @retval +/// FFX_OK The operation completed successfully. +/// @retval +/// Anything else The operation failed. +/// +/// @ingroup FfxInterface +typedef FfxErrorCode (*FfxStageConstantBufferDataFunc)( + FfxInterface* backendInterface, + void* data, + FfxUInt32 size, + FfxConstantBuffer* constantBuffer); + +/// Create a render pipeline. +/// +/// A rendering pipeline contains the shader as well as resource bindpoints +/// and samplers. +/// +/// @param [in] backendInterface A pointer to the backend interface. +/// @param [in] pass The identifier for the pass. +/// @param [in] pipelineDescription A pointer to a FfxPipelineDescription describing the pipeline to be created. +/// @param [in] effectContextId The context space to be used for the effect in question. +/// @param [out] outPipeline A pointer to a FfxPipelineState structure which should be populated. +/// +/// @retval +/// FFX_OK The operation completed successfully. +/// @retval +/// Anything else The operation failed. +/// +/// @ingroup FfxInterface +typedef FfxErrorCode (*FfxCreatePipelineFunc)( + FfxInterface* backendInterface, + FfxEffect effect, + FfxPass pass, + uint32_t permutationOptions, + const FfxPipelineDescription* pipelineDescription, + FfxUInt32 effectContextId, + FfxPipelineState* outPipeline); + +typedef FfxErrorCode(*FfxGetPermutationBlobByIndexFunc)(FfxEffect effectId, + FfxPass passId, + FfxBindStage bindStage, + uint32_t permutationOptions, + FfxShaderBlob* outBlob); + +/// Destroy a render pipeline. +/// +/// @param [in] backendInterface A pointer to the backend interface. +/// @param [in] effectContextId The context space to be used for the effect in question. +/// @param [out] pipeline A pointer to a FfxPipelineState structure which should be released. +/// @param [in] effectContextId The context space to be used for the effect in question. +/// +/// @retval +/// FFX_OK The operation completed successfully. +/// @retval +/// Anything else The operation failed. +/// +/// @ingroup FfxInterface +typedef FfxErrorCode (*FfxDestroyPipelineFunc)( + FfxInterface* backendInterface, + FfxPipelineState* pipeline, + FfxUInt32 effectContextId); + +/// Schedule a render job to be executed on the next call of +/// FfxExecuteGpuJobsFunc. +/// +/// Render jobs can perform one of three different tasks: clear, copy or +/// compute dispatches. +/// +/// @param [in] backendInterface A pointer to the backend interface. +/// @param [in] job A pointer to a FfxGpuJobDescription structure. +/// +/// @retval +/// FFX_OK The operation completed successfully. +/// @retval +/// Anything else The operation failed. +/// +/// @ingroup FfxInterface +typedef FfxErrorCode (*FfxScheduleGpuJobFunc)( + FfxInterface* backendInterface, + const FfxGpuJobDescription* job); + +/// Execute scheduled render jobs on the comandList provided. +/// +/// The recording of the graphics API commands should take place in this +/// callback function, the render jobs which were previously enqueued (via +/// callbacks made to FfxScheduleGpuJobFunc) should be +/// processed in the order they were received. Advanced users might choose to +/// reorder the rendering jobs, but should do so with care to respect the +/// resource dependencies. +/// +/// Depending on the precise contents of FfxDispatchDescription a +/// different number of render jobs might have previously been enqueued (for +/// example if sharpening is toggled on and off). +/// +/// @param [in] backendInterface A pointer to the backend interface. +/// @param [in] commandList A pointer to a FfxCommandList structure. +/// @param [in] effectContextId The context space to be used for the effect in question. +/// +/// @retval +/// FFX_OK The operation completed successfully. +/// @retval +/// Anything else The operation failed. +/// +/// @ingroup FfxInterface +typedef FfxErrorCode (*FfxExecuteGpuJobsFunc)( + FfxInterface* backendInterface, + FfxCommandList commandList, + FfxUInt32 effectContextId); + +typedef enum FfxUiCompositionFlags +{ + FFX_UI_COMPOSITION_FLAG_USE_PREMUL_ALPHA = (1 << 0), ///< A bit indicating that we use premultiplied alpha for UI composition + FFX_UI_COMPOSITION_FLAG_ENABLE_INTERNAL_UI_DOUBLE_BUFFERING = (1 << 1), ///< A bit indicating that the swapchain should doublebuffer the UI resource +} FfxUiCompositionFlags; + +typedef FfxErrorCode(*FfxPresentCallbackFunc)(const FfxPresentCallbackDescription* params, void*); +typedef FfxErrorCode(*FfxFrameGenerationDispatchFunc)(const FfxFrameGenerationDispatchDescription* params, void*); +typedef FfxErrorCode(*FfxWaitCallbackFunc)(wchar_t* fenceName, uint64_t fenceValueToWaitFor); + +/// A structure representing the configuration options to pass to FrameInterpolationSwapChain +/// +/// @ingroup FfxInterface +typedef struct FfxFrameGenerationConfig +{ + FfxSwapchain swapChain; ///< The FfxSwapchain to use with frame interpolation + FfxPresentCallbackFunc presentCallback; ///< A UI composition callback to call when finalizing the frame image + void* presentCallbackContext; ///< A pointer to be passed to the UI composition callback + FfxFrameGenerationDispatchFunc frameGenerationCallback; ///< The frame generation callback to use to generate the interpolated frame + void* frameGenerationCallbackContext; ///< A pointer to be passed to the frame generation callback + bool frameGenerationEnabled; ///< Sets the state of frame generation. Set to false to disable frame generation + bool allowAsyncWorkloads; ///< Sets the state of async workloads. Set to true to enable interpolation work on async compute + bool allowAsyncPresent; ///< Sets the state of async presentation (console only). Set to true to enable present from async command queue + FfxResource HUDLessColor; ///< The hudless back buffer image to use for UI extraction from backbuffer resource + FfxUInt32 flags; ///< Flags + bool onlyPresentInterpolated; ///< Set to true to only present interpolated frame + FfxRect2D interpolationRect; ///< Set the area in the backbuffer that will be interpolated + uint64_t frameID; ///< A frame identifier used to synchronize resource usage in workloads + bool drawDebugPacingLines; ///< Sets the state of pacing debug lines. Set to true to display debug lines +} FfxFrameGenerationConfig; + +typedef FfxErrorCode (*FfxSwapChainConfigureFrameGenerationFunc)(FfxFrameGenerationConfig const* config); + +/// Allocate AMD FidelityFX Breadcrumbs Library markers buffer. +/// +/// @param [in] backendInterface A pointer to the backend interface. +/// @param [in] blockBytes Size in bytes of the buffer to be allocated. +/// @param [out] blockData Output information about allocated AMD FidelityFX Breadcrumbs Library buffer. Filled only on success of operation. +/// +/// @retval +/// FFX_OK The operation completed successfully. +/// @retval +/// Anything else The operation failed. +/// +/// @ingroup FfxInterface +typedef FfxErrorCode (*FfxBreadcrumbsAllocBlockFunc)( + FfxInterface* backendInterface, + uint64_t blockBytes, + FfxBreadcrumbsBlockData* blockData + ); + +/// Deallocate AMD FidelityFX Breadcrumbs Library markers buffer. +/// +/// @param [in] backendInterface A pointer to the backend interface. +/// @param [out] blockData Information about buffer to be freed. All resource handles are cleared after this operation. +/// +/// @ingroup FfxInterface +typedef void (*FfxBreadcrumbsFreeBlockFunc)( + FfxInterface* backendInterface, + FfxBreadcrumbsBlockData* blockData + ); + +/// Write marker to AMD FidelityFX Breadcrumbs Library buffer on the comandList provided. +/// +/// @param [in] backendInterface A pointer to the backend interface. +/// @param [in] commandList GPU command list to record marker writing command. +/// @param [in] value Marker value to be written. +/// @param [in] gpuLocation GPU destination address where marker will be written. +/// @param [in] gpuBuffer Destination AMD FidelityFX Breadcrumbs Library buffer. +/// @param [in] isBegin true for writing opening marker and false for ending marker. +/// +/// @ingroup FfxInterface +typedef void (*FfxBreadcrumbsWriteFunc)( + FfxInterface* backendInterface, + FfxCommandList commandList, + uint32_t value, + uint64_t gpuLocation, + void* gpuBuffer, + bool isBegin + ); + +/// Printing GPU specific info to the AMD FidelityFX Breadcrumbs Library status buffer. +/// +/// @param [in] backendInterface A pointer to the backend interface. +/// @param [in] allocs A pointer to the allocation callbacks. +/// @param [in] extendedInfo true if should print more verbose device info and false for standard output. +/// @param [out] printBuffer String buffer for writing GPU info. +/// @param [out] printSize Size of string buffer for writing GPU info. +/// +/// @ingroup FfxInterface +typedef void (*FfxBreadcrumbsPrintDeviceInfoFunc)( + FfxInterface* backendInterface, + FfxAllocationCallbacks* allocs, + bool extendedInfo, + char** printBuffer, + size_t* printSize + ); + +/// Register a Thread Safe constant buffer allocator to be used by the backend. +/// +/// @param [in] backendInterface A pointer to the backend interface. +/// @param [in] constantAllocator An FfxConstantBufferAllocator callback to be used by the backend. +/// +/// @ingroup FfxInterface +typedef void(*FfxRegisterConstantBufferAllocatorFunc)(FfxInterface* backendInterface, + FfxConstantBufferAllocator constantAllocator); + +/// A structure encapsulating the interface between the core implementation of +/// the FfxInterface and any graphics API that it should ultimately call. +/// +/// This set of functions serves as an abstraction layer between FfxInterfae and the +/// API used to implement it. While the FidelityFX SDK ships with backends for DirectX12 and +/// Vulkan, it is possible to implement your own backend for other platforms +/// which sit on top of your engine's own abstraction layer. For details on the +/// expectations of what each function should do you should refer the +/// description of the following function pointer types: +/// - FfxCreateDeviceFunc +/// - FfxGetDeviceCapabilitiesFunc +/// - FfxDestroyDeviceFunc +/// - FfxCreateResourceFunc +/// - FfxRegisterResourceFunc +/// - FfxGetResourceFunc +/// - FfxUnregisterResourcesFunc +/// - FfxGetResourceDescriptionFunc +/// - FfxDestroyResourceFunc +/// - FfxCreatePipelineFunc +/// - FfxDestroyPipelineFunc +/// - FfxScheduleGpuJobFunc +/// - FfxExecuteGpuJobsFunc +/// - FfxBeginMarkerFunc +/// - FfxEndMarkerFunc +/// - FfxRegisterConstantBufferAllocatorFunc +/// +/// Depending on the graphics API that is abstracted by the backend, it may be +/// required that the backend is to some extent stateful. To ensure that +/// applications retain full control to manage the memory used by the FidelityFX SDK, the +/// scratchBuffer and scratchBufferSize fields are +/// provided. A backend should provide a means of specifying how much scratch +/// memory is required for its internal implementation (e.g: via a function +/// or constant value). The application is then responsible for allocating that +/// memory and providing it when setting up the SDK backend. Backends provided +/// with the FidelityFX SDK do not perform dynamic memory allocations, and instead +/// sub-allocate all memory from the scratch buffers provided. +/// +/// The scratchBuffer and scratchBufferSize fields +/// should be populated according to the requirements of each backend. For +/// example, if using the DirectX 12 backend you should call the +/// ffxGetScratchMemorySizeDX12 function. It is not required +/// that custom backend implementations use a scratch buffer. +/// +/// Any functional addition to this interface mandates a version +/// bump to ensure full functionality across effects and backends. +/// +/// @ingroup FfxInterface +typedef struct FfxInterface { + + // FidelityFX SDK 1.0 callback handles + FfxGetSDKVersionFunc fpGetSDKVersion; ///< A callback function to query the SDK version. + FfxGetEffectGpuMemoryUsageFunc fpGetEffectGpuMemoryUsage; ///< A callback function to query effect Gpu memory usage + FfxCreateBackendContextFunc fpCreateBackendContext; ///< A callback function to create and initialize the backend context. + FfxGetDeviceCapabilitiesFunc fpGetDeviceCapabilities; ///< A callback function to query device capabilites. + FfxDestroyBackendContextFunc fpDestroyBackendContext; ///< A callback function to destroy the backendcontext. This also dereferences the device. + FfxCreateResourceFunc fpCreateResource; ///< A callback function to create a resource. + FfxRegisterResourceFunc fpRegisterResource; ///< A callback function to register an external resource. + FfxGetResourceFunc fpGetResource; ///< A callback function to convert an internal resource to external resource type + FfxUnregisterResourcesFunc fpUnregisterResources; ///< A callback function to unregister external resource. + FfxRegisterStaticResourceFunc fpRegisterStaticResource; ///< A callback function to register a static resource. + FfxGetResourceDescriptionFunc fpGetResourceDescription; ///< A callback function to retrieve a resource description. + FfxDestroyResourceFunc fpDestroyResource; ///< A callback function to destroy a resource. + FfxMapResourceFunc fpMapResource; ///< A callback function to map a resource. + FfxUnmapResourceFunc fpUnmapResource; ///< A callback function to unmap a resource. + FfxStageConstantBufferDataFunc fpStageConstantBufferDataFunc; ///< A callback function to copy constant buffer data into staging memory. + FfxCreatePipelineFunc fpCreatePipeline; ///< A callback function to create a render or compute pipeline. + FfxDestroyPipelineFunc fpDestroyPipeline; ///< A callback function to destroy a render or compute pipeline. + FfxScheduleGpuJobFunc fpScheduleGpuJob; ///< A callback function to schedule a render job. + FfxExecuteGpuJobsFunc fpExecuteGpuJobs; ///< A callback function to execute all queued render jobs. + + // FidelityFX SDK 1.1 callback handles + FfxBreadcrumbsAllocBlockFunc fpBreadcrumbsAllocBlock; ///< A callback function to allocate block of memory for AMD FidelityFX Breadcrumbs Library buffer. + FfxBreadcrumbsFreeBlockFunc fpBreadcrumbsFreeBlock; ///< A callback function to free AMD FidelityFX Breadcrumbs Library buffer. + FfxBreadcrumbsWriteFunc fpBreadcrumbsWrite; ///< A callback function to write marker into AMD FidelityFX Breadcrumbs Library. + FfxBreadcrumbsPrintDeviceInfoFunc fpBreadcrumbsPrintDeviceInfo; ///< A callback function to print active GPU info for AMD FidelityFX Breadcrumbs Library log. + + FfxGetPermutationBlobByIndexFunc fpGetPermutationBlobByIndex; + FfxSwapChainConfigureFrameGenerationFunc fpSwapChainConfigureFrameGeneration; ///< A callback function to configure swap chain present callback. + + FfxRegisterConstantBufferAllocatorFunc fpRegisterConstantBufferAllocator; ///< A callback function to register a custom Thread Safe constant buffer allocator. + + void* scratchBuffer; ///< A preallocated buffer for memory utilized internally by the backend. + size_t scratchBufferSize; ///< Size of the buffer pointed to by scratchBuffer. + FfxDevice device; ///< A backend specific device + +} FfxInterface; + +#if defined(__cplusplus) +} +#endif // #if defined(__cplusplus) diff --git a/thirdparty/amd-ffx/ffx_message.cpp b/thirdparty/amd-ffx/ffx_message.cpp new file mode 100644 index 000000000000..66755ba19bde --- /dev/null +++ b/thirdparty/amd-ffx/ffx_message.cpp @@ -0,0 +1,69 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2025 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "ffx_message.h" + +// GODOT BEGINS +// On non-Windows Platforms this file uses the macro `FFX_UNUSED`, we have to include it here +#include "ffx_util.h" +// GODOT ENDS + +#ifdef _WIN32 +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN +#endif +#include // required for OutputDebugString() +#endif // #ifdef _WIN32 + +static ffxMessageCallback s_messageCallback; +static uint32_t s_debugLevel; + +// set the printing callback function +void ffxSetPrintMessageCallback(ffxMessageCallback callback, uint32_t debugLevel) +{ + s_messageCallback = callback; + s_debugLevel = debugLevel; + return; +} + +void ffxPrintMessage(uint32_t type, const wchar_t* message) +{ +#ifdef _WIN32 + if (!s_messageCallback) { + // Format the message string + wchar_t buffer[512]; + if (type == FFX_MESSAGE_TYPE_ERROR) { + swprintf_s(buffer, 512, L"FSR_API_DEBUG_ERROR: %ls\n", message); + } + else if (type == FFX_MESSAGE_TYPE_WARNING) { + swprintf_s(buffer, 512, L"FSR_API_DEBUG_WARNING: %ls\n", message); + } + OutputDebugStringW(buffer); + } else { + s_messageCallback(type, message); + } +#else + FFX_UNUSED(type); + FFX_UNUSED(message); +#endif + return; +} diff --git a/thirdparty/amd-ffx/ffx_message.h b/thirdparty/amd-ffx/ffx_message.h new file mode 100644 index 000000000000..442849f4d3b8 --- /dev/null +++ b/thirdparty/amd-ffx/ffx_message.h @@ -0,0 +1,63 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +#include "ffx_types.h" + +#ifdef __cplusplus +extern "C" { +#endif // #ifdef __cplusplus + +/// @defgroup Messages Messages +/// Messages used by FidelityFX SDK functions +/// +/// @ingroup ffxHost + +/// Provides the ability to set a callback for print messages. +/// +/// @param [in] callback The callback function that will receive assert messages. +/// +/// @ingroup Messages +FFX_API void ffxSetPrintMessageCallback(ffxMessageCallback callback, uint32_t debugLevel); + +/// Function to print a message. +/// +/// @param [in] type See FfxMsgType +/// @param [in] message The message to print. +/// +/// @ingroup Messages +FFX_API void ffxPrintMessage(uint32_t type, const wchar_t* message); + +/// Macro to print message +/// by calling application registered callback, +/// otherwise to debugger's TTY +/// +/// @ingroup Messages +#define FFX_PRINT_MESSAGE( type, msg) \ + do \ + { \ + ffxPrintMessage( type, msg); \ + } while (0) +#ifdef __cplusplus +} +#endif // #ifdef __cplusplus diff --git a/thirdparty/amd-ffx/ffx_object_management.cpp b/thirdparty/amd-ffx/ffx_object_management.cpp new file mode 100644 index 000000000000..016cd0ed1b94 --- /dev/null +++ b/thirdparty/amd-ffx/ffx_object_management.cpp @@ -0,0 +1,48 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "ffx_interface.h" +#include "ffx_object_management.h" + +void ffxSafeReleasePipeline(FfxInterface* backendInterface, FfxPipelineState* pipeline, FfxUInt32 effectContextId) +{ + FFX_ASSERT(pipeline); + FFX_ASSERT(backendInterface->fpDestroyPipeline); + + backendInterface->fpDestroyPipeline(backendInterface, pipeline, effectContextId); +} + +void ffxSafeReleaseCopyResource(FfxInterface* backendInterface, FfxResourceInternal resource, FfxUInt32 effectContextId) +{ + FFX_ASSERT(backendInterface->fpDestroyResource); + + FfxResourceInternal copyResource; + copyResource.internalIndex = resource.internalIndex + 1; + backendInterface->fpDestroyResource(backendInterface, copyResource, effectContextId); +} + +void ffxSafeReleaseResource(FfxInterface* backendInterface, FfxResourceInternal resource, FfxUInt32 effectContextId) +{ + FFX_ASSERT(backendInterface->fpDestroyResource); + + backendInterface->fpDestroyResource(backendInterface, resource, effectContextId); +} diff --git a/thirdparty/amd-ffx/ffx_object_management.h b/thirdparty/amd-ffx/ffx_object_management.h new file mode 100644 index 000000000000..b665ffa847d9 --- /dev/null +++ b/thirdparty/amd-ffx/ffx_object_management.h @@ -0,0 +1,38 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +#include "ffx_types.h" +#include "ffx_interface.h" + +#if defined(__cplusplus) +extern "C" { +#endif // #if defined(__cplusplus) + +FFX_API void ffxSafeReleasePipeline(FfxInterface* backendInterface, FfxPipelineState* pipeline, FfxUInt32 effectContextId); +FFX_API void ffxSafeReleaseCopyResource(FfxInterface* backendInterface, FfxResourceInternal resource, FfxUInt32 effectContextId); +FFX_API void ffxSafeReleaseResource(FfxInterface* backendInterface, FfxResourceInternal resource, FfxUInt32 effectContextId); + +#if defined(__cplusplus) +} +#endif // #if defined(__cplusplus) diff --git a/thirdparty/amd-ffx/ffx_opticalflow.cpp b/thirdparty/amd-ffx/ffx_opticalflow.cpp new file mode 100644 index 000000000000..3f0ed0f02ee1 --- /dev/null +++ b/thirdparty/amd-ffx/ffx_opticalflow.cpp @@ -0,0 +1,987 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include // for max used inside SPD CPU code. +#include // for fabs, abs, sinf, sqrt, etc. +#include // for memset +#include // for FLT_EPSILON +#include "ffx_opticalflow.h" + +#ifdef __clang__ +#pragma clang diagnostic ignored "-Wunused-function" +#pragma clang diagnostic ignored "-Wsign-compare" +#endif + +#define FFX_CPU +#include "gpu/ffx_core.h" +#include "gpu/spd/ffx_spd.h" +#include "gpu/opticalflow/ffx_opticalflow_callbacks_hlsl.h" +#include "ffx_object_management.h" + +#define FFX_OPTICALFLOW_MAX_QUEUED_FRAMES 16 + +#include "ffx_opticalflow_private.h" + +typedef struct Binding +{ + uint32_t index; + wchar_t name[64]; +}Binding; + +static const Binding srvBindingNames[] = +{ + {FFX_OF_BINDING_IDENTIFIER_INPUT_COLOR, L"r_input_color"}, + {FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_INPUT, L"r_optical_flow_input"}, + {FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_PREVIOUS_INPUT, L"r_optical_flow_previous_input"}, + {FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW, L"r_optical_flow"}, + {FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_PREVIOUS, L"r_optical_flow_previous"}, +}; + +static const Binding uavBindingNames[] = +{ + {FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_INPUT, L"rw_optical_flow_input"}, + {FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_INPUT_LEVEL_1, L"rw_optical_flow_input_level_1"}, + {FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_INPUT_LEVEL_2, L"rw_optical_flow_input_level_2"}, + {FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_INPUT_LEVEL_3, L"rw_optical_flow_input_level_3"}, + {FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_INPUT_LEVEL_4, L"rw_optical_flow_input_level_4"}, + {FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_INPUT_LEVEL_5, L"rw_optical_flow_input_level_5"}, + {FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_INPUT_LEVEL_6, L"rw_optical_flow_input_level_6"}, + {FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW, L"rw_optical_flow"}, + {FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_NEXT_LEVEL, L"rw_optical_flow_next_level"}, + {FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_SCD_HISTOGRAM, L"rw_optical_flow_scd_histogram"}, // scene change detection histogram + {FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_SCD_PREVIOUS_HISTOGRAM, L"rw_optical_flow_scd_previous_histogram"}, + {FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_SCD_TEMP, L"rw_optical_flow_scd_temp"}, + {FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_SCD_OUTPUT, L"rw_optical_flow_scd_output"}, +}; + +static const Binding cbBindingNames[] = +{ + {FFX_OPTICALFLOW_CONSTANTBUFFER_IDENTIFIER, L"cbOF"}, + {FFX_OPTICALFLOW_CONSTANTBUFFER_IDENTIFIER_SPD, L"cbOF_SPD"} +}; + +// Broad structure of the root signature. +typedef enum OpticalFlowRootSignatureLayout { + + OPTICALFLOW_ROOT_SIGNATURE_LAYOUT_UAVS, + OPTICALFLOW_ROOT_SIGNATURE_LAYOUT_SRVS, + OPTICALFLOW_ROOT_SIGNATURE_LAYOUT_CONSTANTS, + OPTICALFLOW_ROOT_SIGNATURE_LAYOUT_CONSTANTS_REGISTER_1, + OPTICALFLOW_ROOT_SIGNATURE_LAYOUT_PARAMETER_COUNT +} OpticalFlowRootSignatureLayout; + +typedef struct OpticalFlowSpdConstants +{ + uint32_t mips; + uint32_t numworkGroups; + uint32_t workGroupOffset[2]; + + uint32_t numworkGroupsOpticalFlowInputPyramid; + uint32_t pad0_; + uint32_t pad1_; + uint32_t pad2_; + +} OpticalFlowSpdConstants; + +static FfxErrorCode patchResourceBindings(FfxPipelineState* inoutPipeline) +{ + for (uint32_t srvIndex = 0; srvIndex < inoutPipeline->srvTextureCount; ++srvIndex) + { + int32_t mapIndex = 0; + for (mapIndex = 0; mapIndex < _countof(srvBindingNames); ++mapIndex) + { + if (0 == wcscmp(srvBindingNames[mapIndex].name, inoutPipeline->srvTextureBindings[srvIndex].name)) + break; + } + FFX_ASSERT(mapIndex < _countof(srvBindingNames)); + if (mapIndex == _countof(srvBindingNames)) + return FFX_ERROR_INVALID_ARGUMENT; + + inoutPipeline->srvTextureBindings[srvIndex].resourceIdentifier = srvBindingNames[mapIndex].index; + } + + for (uint32_t uavIndex = 0; uavIndex < inoutPipeline->uavTextureCount; ++uavIndex) + { + int32_t mapIndex = 0; + for (mapIndex = 0; mapIndex < _countof(uavBindingNames); ++mapIndex) + { + if (0 == wcscmp(uavBindingNames[mapIndex].name, inoutPipeline->uavTextureBindings[uavIndex].name)) + break; + } + FFX_ASSERT(mapIndex < _countof(uavBindingNames)); + if (mapIndex == _countof(uavBindingNames)) + return FFX_ERROR_INVALID_ARGUMENT; + + inoutPipeline->uavTextureBindings[uavIndex].resourceIdentifier = uavBindingNames[mapIndex].index; + } + + for (uint32_t cbIndex = 0; cbIndex < inoutPipeline->constCount; ++cbIndex) + { + int32_t mapIndex = 0; + for (mapIndex = 0; mapIndex < _countof(cbBindingNames); ++mapIndex) + { + if (0 == wcscmp(cbBindingNames[mapIndex].name, inoutPipeline->constantBufferBindings[cbIndex].name)) + break; + } + FFX_ASSERT(mapIndex < _countof(cbBindingNames)); + if (mapIndex == _countof(cbBindingNames)) + return FFX_ERROR_INVALID_ARGUMENT; + + inoutPipeline->constantBufferBindings[cbIndex].resourceIdentifier = cbBindingNames[mapIndex].index; + } + + return FFX_OK; +} + +static uint32_t getPipelinePermutationFlags(uint32_t, FfxPass, bool fp16, bool force64, bool) +{ + uint32_t flags = 0; + flags |= (force64) ? OPTICALFLOW_SHADER_PERMUTATION_FORCE_WAVE64 : 0; + flags |= (fp16) ? OPTICALFLOW_SHADER_PERMUTATION_ALLOW_FP16 : 0; + return flags; +} + +static FfxErrorCode createPipelineStates(FfxOpticalflowContext_Private* context) +{ + FFX_ASSERT(context); + + constexpr size_t samplerCount = 2; + FfxSamplerDescription samplerDescs[samplerCount] = { + {FFX_FILTER_TYPE_MINMAGMIP_POINT, FFX_ADDRESS_MODE_CLAMP, FFX_ADDRESS_MODE_CLAMP, FFX_ADDRESS_MODE_CLAMP, FFX_BIND_COMPUTE_SHADER_STAGE}, + {FFX_FILTER_TYPE_MINMAGMIP_LINEAR, FFX_ADDRESS_MODE_CLAMP, FFX_ADDRESS_MODE_CLAMP, FFX_ADDRESS_MODE_CLAMP, FFX_BIND_COMPUTE_SHADER_STAGE} }; + + const size_t rootConstantCount = 2; + FfxRootConstantDescription rootConstantDescs[2] = { {sizeof(OpticalflowConstants) / sizeof(uint32_t), FFX_BIND_COMPUTE_SHADER_STAGE}, + {sizeof(OpticalFlowSpdConstants) / sizeof(uint32_t), FFX_BIND_COMPUTE_SHADER_STAGE} }; + FfxPipelineDescription pipelineDescription = {}; + pipelineDescription.stage = FFX_BIND_COMPUTE_SHADER_STAGE; + pipelineDescription.contextFlags = context->contextDescription.flags; + pipelineDescription.samplerCount = samplerCount; + pipelineDescription.samplers = samplerDescs; + pipelineDescription.rootConstantBufferCount = rootConstantCount; + pipelineDescription.rootConstants = rootConstantDescs; + + FfxDeviceCapabilities capabilities; + context->contextDescription.backendInterface.fpGetDeviceCapabilities(&context->contextDescription.backendInterface, &capabilities); + + bool haveShaderModel66 = capabilities.maximumSupportedShaderModel >= FFX_SHADER_MODEL_6_6; + bool supportedFP16 = capabilities.fp16Supported; + bool canForceWave64 = false; + bool useLut = false; + + const uint32_t waveLaneCountMin = capabilities.waveLaneCountMin; + const uint32_t waveLaneCountMax = capabilities.waveLaneCountMax; + if (waveLaneCountMin == 32 && waveLaneCountMax == 64) + { + useLut = true; + canForceWave64 = haveShaderModel66; + } + else + canForceWave64 = false; + + uint32_t contextFlags = context->contextDescription.flags; + + auto CreateComputePipeline = [&](FfxPass pass, const wchar_t* name, FfxPipelineState* pipeline) -> FfxErrorCode { + ffxSafeReleasePipeline(&context->contextDescription.backendInterface, pipeline, context->effectContextId); + wcscpy_s(pipelineDescription.name, name); + FFX_VALIDATE(context->contextDescription.backendInterface.fpCreatePipeline( + &context->contextDescription.backendInterface, + FFX_EFFECT_OPTICALFLOW, + pass, + getPipelinePermutationFlags(contextFlags, pass, supportedFP16, canForceWave64, useLut), + &pipelineDescription, + context->effectContextId, + pipeline)); + + patchResourceBindings(pipeline); + return FFX_OK; + }; + + CreateComputePipeline(FFX_OPTICALFLOW_PASS_GENERATE_OPTICAL_FLOW_INPUT_PYRAMID, L"Opticalflow_InputPyramid", & context->pipelineGenerateOpticalFlowInputPyramid); + pipelineDescription.rootConstantBufferCount = 1; + CreateComputePipeline(FFX_OPTICALFLOW_PASS_PREPARE_LUMA, L"Opticalflow_Luma", &context->pipelinePrepareLuma); + CreateComputePipeline(FFX_OPTICALFLOW_PASS_GENERATE_SCD_HISTOGRAM, L"Opticalflow_SCD_Histogram", &context->pipelineGenerateSCDHistogram); + CreateComputePipeline(FFX_OPTICALFLOW_PASS_COMPUTE_SCD_DIVERGENCE, L"Opticalflow_SCD_Divergence", &context->pipelineComputeSCDDivergence); + CreateComputePipeline(FFX_OPTICALFLOW_PASS_COMPUTE_OPTICAL_FLOW_ADVANCED_V5, L"Opticalflow_Search", &context->pipelineComputeOpticalFlowAdvancedV5); + CreateComputePipeline(FFX_OPTICALFLOW_PASS_FILTER_OPTICAL_FLOW_V5, L"Opticalflow_Filter", &context->pipelineFilterOpticalFlowV5); + CreateComputePipeline(FFX_OPTICALFLOW_PASS_SCALE_OPTICAL_FLOW_ADVANCED_V5, L"Opticalflow_Upscale", &context->pipelineScaleOpticalFlowAdvancedV5); + + return FFX_OK; +} + +constexpr uint32_t OpticalFlowMaxPyramidLevels = 7; +constexpr uint32_t HistogramBins = 256; +constexpr uint32_t HistogramsPerDim = 3; +constexpr uint32_t HistogramShifts = 3; + +static FfxDimensions2D GetOpticalFlowTextureSize(const FfxDimensions2D& displaySize, const uint32_t opticalFlowBlockSize) +{ + uint32_t width = (displaySize.width + opticalFlowBlockSize - 1) / opticalFlowBlockSize; + uint32_t height = (displaySize.height + opticalFlowBlockSize - 1) / opticalFlowBlockSize; + return { width, height }; +} + +static FfxDimensions2D GetOpticalFlowHistogramSize(int level) +{ + const uint32_t searchRadius = 8; + uint32_t maxVelocity = searchRadius * (1 << (OpticalFlowMaxPyramidLevels - 1 - level)); + uint32_t binsPerDimension = 2 * maxVelocity + 1; + return { binsPerDimension, binsPerDimension }; +} + +static FfxDimensions2D GetGlobalMotionSearchDispatchSize(int level) +{ + const uint32_t threadGroupSizeX = 16; + const uint32_t threadGroupSizeY = 16; + const FfxDimensions2D opticalFlowHistogramSize = GetOpticalFlowHistogramSize(level); + const uint32_t additionalElementsDueToShiftsX = opticalFlowHistogramSize.width / threadGroupSizeX; + const uint32_t additionalElementsDueToShiftsY = opticalFlowHistogramSize.height / threadGroupSizeY; + const uint32_t dispatchX = (opticalFlowHistogramSize.width + additionalElementsDueToShiftsX + threadGroupSizeX - 1) / threadGroupSizeX; + const uint32_t dispatchY = (opticalFlowHistogramSize.height + additionalElementsDueToShiftsY + threadGroupSizeY - 1) / threadGroupSizeY; + return { dispatchX, dispatchY }; +} + +static uint32_t GetSCDHistogramTextureWidth() +{ + return HistogramBins * (HistogramsPerDim * HistogramsPerDim); +} + +static FfxErrorCode opticalflowCreate(FfxOpticalflowContext_Private* context, const FfxOpticalflowContextDescription* contextDescription) +{ + FFX_ASSERT(context); + FFX_ASSERT(contextDescription); + FfxErrorCode errorCode = FFX_OK; + + memset(context, 0, sizeof(FfxOpticalflowContext_Private)); + context->device = contextDescription->backendInterface.device; + + memcpy(&context->contextDescription, contextDescription, sizeof(FfxOpticalflowContextDescription)); + + // Check version info - make sure we are linked with the right backend version + FfxVersionNumber version = context->contextDescription.backendInterface.fpGetSDKVersion(&context->contextDescription.backendInterface); + FFX_RETURN_ON_ERROR(version == FFX_SDK_MAKE_VERSION(1, 1, 4), FFX_ERROR_INVALID_VERSION); + + errorCode = context->contextDescription.backendInterface.fpCreateBackendContext(&context->contextDescription.backendInterface, FFX_EFFECT_OPTICALFLOW, nullptr, &context->effectContextId); + FFX_RETURN_ON_ERROR(errorCode == FFX_OK, errorCode); + + errorCode = context->contextDescription.backendInterface.fpGetDeviceCapabilities(&context->contextDescription.backendInterface, &context->deviceCapabilities); + FFX_RETURN_ON_ERROR(errorCode == FFX_OK, errorCode); + + context->firstExecution = true; + context->resourceFrameIndex = 0; + + context->constants.inputLumaResolution[0] = context->contextDescription.resolution.width; + context->constants.inputLumaResolution[1] = context->contextDescription.resolution.height; + + FfxDimensions2D opticalFlowInputTextureSize = context->contextDescription.resolution; + + const FfxResourceType texture1dResourceType = (context->contextDescription.flags & FFX_OPTICALFLOW_ENABLE_TEXTURE1D_USAGE) ? FFX_RESOURCE_TYPE_TEXTURE1D : FFX_RESOURCE_TYPE_TEXTURE2D; + + uint32_t minBlockSize = 8; + const FfxDimensions2D opticalFlowTextureSize = GetOpticalFlowTextureSize(contextDescription->resolution, minBlockSize); + + const FfxDimensions2D opticalFlowLevel1TextureSize = { FFX_ALIGN_UP(opticalFlowTextureSize.width, 2) / 2, FFX_ALIGN_UP(opticalFlowTextureSize.height, 2) / 2 }; + const FfxDimensions2D opticalFlowLevel2TextureSize = { FFX_ALIGN_UP(opticalFlowLevel1TextureSize.width, 2) / 2, FFX_ALIGN_UP(opticalFlowLevel1TextureSize.height, 2) / 2 }; + const FfxDimensions2D opticalFlowLevel3TextureSize = { FFX_ALIGN_UP(opticalFlowLevel2TextureSize.width, 2) / 2, FFX_ALIGN_UP(opticalFlowLevel2TextureSize.height, 2) / 2 }; + const FfxDimensions2D opticalFlowLevel4TextureSize = { FFX_ALIGN_UP(opticalFlowLevel3TextureSize.width, 2) / 2, FFX_ALIGN_UP(opticalFlowLevel3TextureSize.height, 2) / 2 }; + const FfxDimensions2D opticalFlowLevel5TextureSize = { FFX_ALIGN_UP(opticalFlowLevel4TextureSize.width, 2) / 2, FFX_ALIGN_UP(opticalFlowLevel4TextureSize.height, 2) / 2 }; + const FfxDimensions2D opticalFlowLevel6TextureSize = { FFX_ALIGN_UP(opticalFlowLevel5TextureSize.width, 2) / 2, FFX_ALIGN_UP(opticalFlowLevel5TextureSize.height, 2) / 2 }; + const FfxDimensions2D opticalFlowLevel7TextureSize = { FFX_ALIGN_UP(opticalFlowLevel6TextureSize.width, 2) / 2, FFX_ALIGN_UP(opticalFlowLevel6TextureSize.height, 2) / 2 }; + + const FfxDimensions2D opticalFlowHistogramTextureSize = GetOpticalFlowHistogramSize(0); + + const FfxDimensions2D globalMotionSearchMaxDispatchSize = GetGlobalMotionSearchDispatchSize(0); + const uint32_t globalMotionSearchTextureWidth = 4 + (globalMotionSearchMaxDispatchSize.width * globalMotionSearchMaxDispatchSize.height); + + const FfxInternalResourceDescription internalSurfaceDesc[] = { + { FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_1, L"OPTICALFLOW_OpticalFlowInput1", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV, + FFX_SURFACE_FORMAT_R8_UINT, opticalFlowInputTextureSize.width, opticalFlowInputTextureSize.height, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} }, + + { FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_1_LEVEL_1, L"OPTICALFLOW_OpticalFlowInput1Level1", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV, + FFX_SURFACE_FORMAT_R8_UINT, opticalFlowInputTextureSize.width >> 1, opticalFlowInputTextureSize.height >> 1, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} }, + + { FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_1_LEVEL_2, L"OPTICALFLOW_OpticalFlowInput1Level2", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV, + FFX_SURFACE_FORMAT_R8_UINT, opticalFlowInputTextureSize.width >> 2, opticalFlowInputTextureSize.height >> 2, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} }, + + { FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_1_LEVEL_3, L"OPTICALFLOW_OpticalFlowInput1Level3", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV, + FFX_SURFACE_FORMAT_R8_UINT, opticalFlowInputTextureSize.width >> 3, opticalFlowInputTextureSize.height >> 3, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} }, + + { FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_1_LEVEL_4, L"OPTICALFLOW_OpticalFlowInput1Level4", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV, + FFX_SURFACE_FORMAT_R8_UINT, opticalFlowInputTextureSize.width >> 4, opticalFlowInputTextureSize.height >> 4, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} }, + + { FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_1_LEVEL_5, L"OPTICALFLOW_OpticalFlowInput1Level5", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV, + FFX_SURFACE_FORMAT_R8_UINT, opticalFlowInputTextureSize.width >> 5, opticalFlowInputTextureSize.height >> 5, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} }, + + { FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_1_LEVEL_6, L"OPTICALFLOW_OpticalFlowInput1Level6", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV, + FFX_SURFACE_FORMAT_R8_UINT, opticalFlowInputTextureSize.width >> 6, opticalFlowInputTextureSize.height >> 6, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} }, + + { FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_2, L"OPTICALFLOW_OpticalFlowInput2", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV, + FFX_SURFACE_FORMAT_R8_UINT, opticalFlowInputTextureSize.width, opticalFlowInputTextureSize.height, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} }, + + { FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_2_LEVEL_1, L"OPTICALFLOW_OpticalFlowInput2Level1", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV, + FFX_SURFACE_FORMAT_R8_UINT, opticalFlowInputTextureSize.width >> 1, opticalFlowInputTextureSize.height >> 1, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} }, + + { FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_2_LEVEL_2, L"OPTICALFLOW_OpticalFlowInput2Level2", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV, + FFX_SURFACE_FORMAT_R8_UINT, opticalFlowInputTextureSize.width >> 2, opticalFlowInputTextureSize.height >> 2, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} }, + + { FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_2_LEVEL_3, L"OPTICALFLOW_OpticalFlowInput2Level3", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV, + FFX_SURFACE_FORMAT_R8_UINT, opticalFlowInputTextureSize.width >> 3, opticalFlowInputTextureSize.height >> 3, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} }, + + { FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_2_LEVEL_4, L"OPTICALFLOW_OpticalFlowInput2Level4", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV, + FFX_SURFACE_FORMAT_R8_UINT, opticalFlowInputTextureSize.width >> 4, opticalFlowInputTextureSize.height >> 4, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} }, + + { FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_2_LEVEL_5, L"OPTICALFLOW_OpticalFlowInput2Level5", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV, + FFX_SURFACE_FORMAT_R8_UINT, opticalFlowInputTextureSize.width >> 5, opticalFlowInputTextureSize.height >> 5, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} }, + + { FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_2_LEVEL_6, L"OPTICALFLOW_OpticalFlowInput2Level6", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV, + FFX_SURFACE_FORMAT_R8_UINT, opticalFlowInputTextureSize.width >> 6, opticalFlowInputTextureSize.height >> 6, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} }, + + { FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_1, L"OPTICALFLOW_OpticalFlow1", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV, + FFX_SURFACE_FORMAT_R16G16_SINT, opticalFlowTextureSize.width, opticalFlowTextureSize.height, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} }, + + { FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_1_LEVEL_1, L"OPTICALFLOW_OpticalFlow1Level1", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV, + FFX_SURFACE_FORMAT_R16G16_SINT, opticalFlowLevel1TextureSize.width, opticalFlowLevel1TextureSize.height, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} }, + + { FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_1_LEVEL_2, L"OPTICALFLOW_OpticalFlow1Level2", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV, + FFX_SURFACE_FORMAT_R16G16_SINT, opticalFlowLevel2TextureSize.width, opticalFlowLevel2TextureSize.height, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} }, + + { FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_1_LEVEL_3, L"OPTICALFLOW_OpticalFlow1Level3", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV, + FFX_SURFACE_FORMAT_R16G16_SINT, opticalFlowLevel3TextureSize.width, opticalFlowLevel3TextureSize.height, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} }, + + { FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_1_LEVEL_4, L"OPTICALFLOW_OpticalFlow1Level4", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV, + FFX_SURFACE_FORMAT_R16G16_SINT, opticalFlowLevel4TextureSize.width, opticalFlowLevel4TextureSize.height, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} }, + + { FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_1_LEVEL_5, L"OPTICALFLOW_OpticalFlow1Level5", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV, + FFX_SURFACE_FORMAT_R16G16_SINT, opticalFlowLevel5TextureSize.width, opticalFlowLevel5TextureSize.height, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} }, + + { FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_1_LEVEL_6, L"OPTICALFLOW_OpticalFlow1Level6", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV, + FFX_SURFACE_FORMAT_R16G16_SINT, opticalFlowLevel6TextureSize.width, opticalFlowLevel6TextureSize.height, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} }, + + { FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_2, L"OPTICALFLOW_OpticalFlow2", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV, + FFX_SURFACE_FORMAT_R16G16_SINT, opticalFlowTextureSize.width, opticalFlowTextureSize.height, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} }, + + { FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_2_LEVEL_1, L"OPTICALFLOW_OpticalFlow2Level1", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV, + FFX_SURFACE_FORMAT_R16G16_SINT, opticalFlowLevel1TextureSize.width, opticalFlowLevel1TextureSize.height, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} }, + + { FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_2_LEVEL_2, L"OPTICALFLOW_OpticalFlow2Level2", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV, + FFX_SURFACE_FORMAT_R16G16_SINT, opticalFlowLevel2TextureSize.width, opticalFlowLevel2TextureSize.height, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} }, + + { FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_2_LEVEL_3, L"OPTICALFLOW_OpticalFlow2Level3", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV, + FFX_SURFACE_FORMAT_R16G16_SINT, opticalFlowLevel3TextureSize.width, opticalFlowLevel3TextureSize.height, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} }, + + { FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_2_LEVEL_4, L"OPTICALFLOW_OpticalFlow2Level4", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV, + FFX_SURFACE_FORMAT_R16G16_SINT, opticalFlowLevel4TextureSize.width, opticalFlowLevel4TextureSize.height, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} }, + + { FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_2_LEVEL_5, L"OPTICALFLOW_OpticalFlow2Level5", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV, + FFX_SURFACE_FORMAT_R16G16_SINT, opticalFlowLevel5TextureSize.width, opticalFlowLevel5TextureSize.height, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} }, + + { FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_2_LEVEL_6, L"OPTICALFLOW_OpticalFlow2Level6", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV, + FFX_SURFACE_FORMAT_R16G16_SINT, opticalFlowLevel6TextureSize.width, opticalFlowLevel6TextureSize.height, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} }, + + { FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_SCD_HISTOGRAM, L"OPTICALFLOW_OpticalFlowSCDHistogram", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV, + FFX_SURFACE_FORMAT_R32_UINT, GetSCDHistogramTextureWidth(), 1, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} }, + + { FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_SCD_PREVIOUS_HISTOGRAM, L"OPTICALFLOW_OpticalFlowSCDPreviousHistogram", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV, + FFX_SURFACE_FORMAT_R32_FLOAT, GetSCDHistogramTextureWidth(), 1, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} }, + + { FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_SCD_TEMP, L"OPTICALFLOW_OpticalFlowSCDTemp", FFX_RESOURCE_TYPE_TEXTURE2D, FFX_RESOURCE_USAGE_UAV, + FFX_SURFACE_FORMAT_R32_UINT, 3, 1, 1, FFX_RESOURCE_FLAGS_NONE, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} }, + }; + + memset(context->resources, 0, sizeof(context->resources)); + + for (int32_t currentSurfaceIndex = 0; currentSurfaceIndex < FFX_ARRAY_ELEMENTS(internalSurfaceDesc); ++currentSurfaceIndex) { + + const FfxInternalResourceDescription* currentSurfaceDescription = &internalSurfaceDesc[currentSurfaceIndex]; + const FfxResourceType resourceType = currentSurfaceDescription->height > 1 ? FFX_RESOURCE_TYPE_TEXTURE2D : texture1dResourceType; + const FfxResourceDescription resourceDescription = { + resourceType, currentSurfaceDescription->format, + currentSurfaceDescription->width, currentSurfaceDescription->height, 1, + currentSurfaceDescription->mipCount, FFX_RESOURCE_FLAGS_NONE, currentSurfaceDescription->usage }; + const FfxResourceStates initialState = FFX_RESOURCE_STATE_UNORDERED_ACCESS; + const FfxCreateResourceDescription createResourceDescription = { + FFX_HEAP_TYPE_DEFAULT, resourceDescription, initialState, currentSurfaceDescription->name, currentSurfaceDescription->id, currentSurfaceDescription->initData }; + + FFX_VALIDATE(context->contextDescription.backendInterface.fpCreateResource( + &context->contextDescription.backendInterface, + &createResourceDescription, + context->effectContextId, + &context->resources[currentSurfaceDescription->id])); + } + + memset(context->srvBindings, 0, sizeof(context->srvBindings)); + memset(context->uavBindings, 0, sizeof(context->uavBindings)); + + { + context->refreshPipelineStates = false; + errorCode = createPipelineStates(context); + FFX_RETURN_ON_ERROR(errorCode == FFX_OK, errorCode); + } + + return FFX_OK; +} + +static FfxErrorCode opticalflowRelease(FfxOpticalflowContext_Private* context) +{ + FFX_ASSERT(context); + + ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelinePrepareLuma, context->effectContextId); + ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineGenerateOpticalFlowInputPyramid, context->effectContextId); + ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineGenerateSCDHistogram, context->effectContextId); + ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineComputeSCDDivergence, context->effectContextId); + ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineComputeOpticalFlowAdvancedV5, context->effectContextId); + ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineFilterOpticalFlowV5, context->effectContextId); + ffxSafeReleasePipeline(&context->contextDescription.backendInterface, &context->pipelineScaleOpticalFlowAdvancedV5, context->effectContextId); + + for (int32_t currentResourceIndex = 0; currentResourceIndex < FFX_OF_RESOURCE_IDENTIFIER_COUNT; ++currentResourceIndex) { + + ffxSafeReleaseResource(&context->contextDescription.backendInterface, context->resources[currentResourceIndex], context->effectContextId); + } + + context->contextDescription.backendInterface.fpDestroyBackendContext(&context->contextDescription.backendInterface, context->effectContextId); + + return FFX_OK; +} + +static void scheduleDispatch(FfxOpticalflowContext_Private* context, const FfxPipelineState* pipeline, const wchar_t* pipelineName, uint32_t dispatchX, uint32_t dispatchY, uint32_t dispatchZ = 1) +{ + FfxComputeJobDescription jobDescriptor = {}; + + for (uint32_t currentShaderResourceViewIndex = 0; currentShaderResourceViewIndex < pipeline->srvTextureCount; ++currentShaderResourceViewIndex) { + + const uint32_t bindingIdentifier = pipeline->srvTextureBindings[currentShaderResourceViewIndex].resourceIdentifier; + const FfxResourceInternal currentResource = context->srvBindings[bindingIdentifier]; + jobDescriptor.srvTextures[currentShaderResourceViewIndex].resource = currentResource; +#ifdef FFX_DEBUG + wcscpy_s(jobDescriptor.srvTextures[currentShaderResourceViewIndex].name, pipeline->srvTextureBindings[currentShaderResourceViewIndex].name); +#endif + + FFX_ASSERT(bindingIdentifier != FFX_OF_BINDING_IDENTIFIER_NULL); + FFX_ASSERT(bindingIdentifier < FFX_OF_BINDING_IDENTIFIER_COUNT); + } + + for (uint32_t currentUnorderedAccessViewIndex = 0; currentUnorderedAccessViewIndex < pipeline->uavTextureCount; ++currentUnorderedAccessViewIndex) { + + const uint32_t bindingIdentifier = pipeline->uavTextureBindings[currentUnorderedAccessViewIndex].resourceIdentifier; + const FfxResourceInternal currentResource = context->uavBindings[bindingIdentifier]; + jobDescriptor.uavTextures[currentUnorderedAccessViewIndex].resource = currentResource; + jobDescriptor.uavTextures[currentUnorderedAccessViewIndex].mip = 0; +#ifdef FFX_DEBUG + wcscpy_s(jobDescriptor.uavTextures[currentUnorderedAccessViewIndex].name, pipeline->uavTextureBindings[currentUnorderedAccessViewIndex].name); +#endif + + FFX_ASSERT(bindingIdentifier != FFX_OF_BINDING_IDENTIFIER_NULL); + FFX_ASSERT(bindingIdentifier < FFX_OF_BINDING_IDENTIFIER_COUNT); + } + + jobDescriptor.dimensions[0] = dispatchX; + jobDescriptor.dimensions[1] = dispatchY; + jobDescriptor.dimensions[2] = dispatchZ; + jobDescriptor.pipeline = *pipeline; + + for (uint32_t currentRootConstantIndex = 0; currentRootConstantIndex < pipeline->constCount; ++currentRootConstantIndex) { +#ifdef FFX_DEBUG + wcscpy_s(jobDescriptor.cbNames[currentRootConstantIndex], pipeline->constantBufferBindings[currentRootConstantIndex].name); +#endif + jobDescriptor.cbs[currentRootConstantIndex] = context->constantBuffers[pipeline->constantBufferBindings[currentRootConstantIndex].resourceIdentifier]; + } + + FfxGpuJobDescription dispatchJob = { FFX_GPU_JOB_COMPUTE }; + wcscpy_s(dispatchJob.jobLabel, pipelineName); + dispatchJob.computeJobDescriptor = jobDescriptor; + + context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &dispatchJob); +} + +static FfxErrorCode dispatch(FfxOpticalflowContext_Private* context, const FfxOpticalflowDispatchDescription* params) +{ + context->contextDescription.backendInterface.fpRegisterResource( + &context->contextDescription.backendInterface, + ¶ms->opticalFlowVector, + context->effectContextId, + &context->uavBindings[FFX_OF_BINDING_IDENTIFIER_SHARED_OPTICAL_FLOW_VECTOR]); + context->contextDescription.backendInterface.fpRegisterResource( + &context->contextDescription.backendInterface, + ¶ms->opticalFlowSCD, + context->effectContextId, + &context->uavBindings[FFX_OF_BINDING_IDENTIFIER_SHARED_OPTICAL_FLOW_SCD_OUTPUT]); + + context->contextDescription.backendInterface.fpRegisterResource( + &context->contextDescription.backendInterface, + ¶ms->color, + context->effectContextId, + &context->srvBindings[FFX_OF_BINDING_IDENTIFIER_INPUT_COLOR]); + + FfxCommandList commandList = params->commandList; + int advancedAlgorithmIterations = 7; + uint32_t opticalFlowBlockSize = 8; + + if (context->refreshPipelineStates) { + + context->refreshPipelineStates = false; + + const FfxErrorCode errorCode = createPipelineStates(context); + FFX_RETURN_ON_ERROR(errorCode == FFX_OK, errorCode); + } + + const FfxResourceDescription resourceDescInputColor = context->contextDescription.backendInterface.fpGetResourceDescription( + &context->contextDescription.backendInterface, + context->srvBindings[FFX_OF_BINDING_IDENTIFIER_INPUT_COLOR]); + FFX_ASSERT(resourceDescInputColor.type == FFX_RESOURCE_TYPE_TEXTURE2D); + + context->constants.backbufferTransferFunction = params->backbufferTransferFunction; + context->constants.minMaxLuminance[0] = params->minMaxLuminance.x; + context->constants.minMaxLuminance[1] = params->minMaxLuminance.y; + + const bool resetAccumulation = params->reset || context->firstExecution; + context->firstExecution = false; + + if (resetAccumulation) { + context->constants.frameIndex = 0; + } + else { + context->constants.frameIndex++; + } + + if (resetAccumulation) + { + const float clearValuesToZeroFloat[]{ 0.f, 0.f, 0.f, 0.f }; + FfxGpuJobDescription clearJob = { FFX_GPU_JOB_CLEAR_FLOAT }; + memcpy(clearJob.clearJobDescriptor.color, clearValuesToZeroFloat, 4 * sizeof(float)); + + wcscpy_s(clearJob.jobLabel, L"Clear Optical Flow SCD Temp"); + clearJob.clearJobDescriptor.target = context->resources[FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_SCD_TEMP]; + context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &clearJob); + clearJob.clearJobDescriptor.target = context->uavBindings[FFX_OF_BINDING_IDENTIFIER_SHARED_OPTICAL_FLOW_SCD_OUTPUT]; + context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &clearJob); + wcscpy_s(clearJob.jobLabel, L"Clear Optical Flow SCD Histogram"); + clearJob.clearJobDescriptor.target = context->resources[FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_SCD_HISTOGRAM]; + context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &clearJob); + wcscpy_s(clearJob.jobLabel, L"Clear Optical Flow SCD Previous histogram"); + clearJob.clearJobDescriptor.target = context->resources[FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_SCD_PREVIOUS_HISTOGRAM]; + context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &clearJob); + wcscpy_s(clearJob.jobLabel, L"Clear Optical Flow Input 1"); + clearJob.clearJobDescriptor.target = context->resources[FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_1]; + context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &clearJob); + wcscpy_s(clearJob.jobLabel, L"Clear Optical Flow Input 1 Level 1"); + clearJob.clearJobDescriptor.target = context->resources[FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_1_LEVEL_1]; + context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &clearJob); + wcscpy_s(clearJob.jobLabel, L"Clear Optical Flow Input 1 Level 2"); + clearJob.clearJobDescriptor.target = context->resources[FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_1_LEVEL_2]; + context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &clearJob); + wcscpy_s(clearJob.jobLabel, L"Clear Optical Flow Input 1 Level 3"); + clearJob.clearJobDescriptor.target = context->resources[FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_1_LEVEL_3]; + context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &clearJob); + wcscpy_s(clearJob.jobLabel, L"Clear Optical Flow Input 1 Level 4"); + clearJob.clearJobDescriptor.target = context->resources[FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_1_LEVEL_4]; + context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &clearJob); + wcscpy_s(clearJob.jobLabel, L"Clear Optical Flow Input 1 Level 5"); + clearJob.clearJobDescriptor.target = context->resources[FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_1_LEVEL_5]; + context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &clearJob); + wcscpy_s(clearJob.jobLabel, L"Clear Optical Flow Input 1 Level 6"); + clearJob.clearJobDescriptor.target = context->resources[FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_1_LEVEL_6]; + context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &clearJob); + wcscpy_s(clearJob.jobLabel, L"Clear Optical Flow Input 2"); + clearJob.clearJobDescriptor.target = context->resources[FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_2]; + context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &clearJob); + wcscpy_s(clearJob.jobLabel, L"Clear Optical Flow Input 2 Level 1"); + clearJob.clearJobDescriptor.target = context->resources[FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_2_LEVEL_1]; + context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &clearJob); + wcscpy_s(clearJob.jobLabel, L"Clear Optical Flow Input 2 Level 2"); + clearJob.clearJobDescriptor.target = context->resources[FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_2_LEVEL_2]; + context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &clearJob); + wcscpy_s(clearJob.jobLabel, L"Clear Optical Flow Input 2 Level 3"); + clearJob.clearJobDescriptor.target = context->resources[FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_2_LEVEL_3]; + context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &clearJob); + wcscpy_s(clearJob.jobLabel, L"Clear Optical Flow Input 2 Level 4"); + clearJob.clearJobDescriptor.target = context->resources[FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_2_LEVEL_4]; + context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &clearJob); + wcscpy_s(clearJob.jobLabel, L"Clear Optical Flow Input 2 Level 5"); + clearJob.clearJobDescriptor.target = context->resources[FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_2_LEVEL_5]; + context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &clearJob); + wcscpy_s(clearJob.jobLabel, L"Clear Optical Flow Input 2 Level 6"); + clearJob.clearJobDescriptor.target = context->resources[FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_2_LEVEL_6]; + context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &clearJob); + } + + uint32_t resolutionMultiplier = 1; + + FfxUInt32x2 threadGroupSizeOpticalFlowInputPyramid; + FfxUInt32x2 workGroupOffset; + FfxUInt32x2 numWorkGroupsAndMips; + FfxUInt32x4 rectInfo = { 0, 0, + context->contextDescription.resolution.width * resolutionMultiplier, + context->contextDescription.resolution.height * resolutionMultiplier }; + ffxSpdSetup(threadGroupSizeOpticalFlowInputPyramid, workGroupOffset, numWorkGroupsAndMips, rectInfo, 4); + + OpticalFlowSpdConstants luminancePyramidConstants; + luminancePyramidConstants.numworkGroups = numWorkGroupsAndMips[0]; + luminancePyramidConstants.mips = numWorkGroupsAndMips[1]; + luminancePyramidConstants.workGroupOffset[0] = workGroupOffset[0]; + luminancePyramidConstants.workGroupOffset[1] = workGroupOffset[1]; + luminancePyramidConstants.numworkGroupsOpticalFlowInputPyramid = numWorkGroupsAndMips[0]; + + context->contextDescription.backendInterface.fpStageConstantBufferDataFunc(&context->contextDescription.backendInterface, &context->constants, sizeof(context->constants), &context->constantBuffers[FFX_OPTICALFLOW_CONSTANTBUFFER_IDENTIFIER]); + context->contextDescription.backendInterface.fpStageConstantBufferDataFunc(&context->contextDescription.backendInterface, &luminancePyramidConstants, sizeof(luminancePyramidConstants), &context->constantBuffers[FFX_OPTICALFLOW_CONSTANTBUFFER_IDENTIFIER_SPD]); + + { + context->uavBindings[FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_SCD_HISTOGRAM] = context->resources[FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_SCD_HISTOGRAM]; + context->uavBindings[FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_SCD_PREVIOUS_HISTOGRAM] = context->resources[FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_SCD_PREVIOUS_HISTOGRAM]; + context->uavBindings[FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_SCD_TEMP] = context->resources[FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_SCD_TEMP]; + context->uavBindings[FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_SCD_OUTPUT] = context->uavBindings[FFX_OF_BINDING_IDENTIFIER_SHARED_OPTICAL_FLOW_SCD_OUTPUT]; + + const bool isOddFrame = !!(context->resourceFrameIndex & 1); + + uint32_t opticalFlowInputResourceIndex = isOddFrame ? FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_2 : FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_1; + uint32_t opticalFlowPreviousInputResourceIndex = isOddFrame ? FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_1 : FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_2; + + uint32_t opticalFlowResourceIndex = isOddFrame ? FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_2 : FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_1; + uint32_t opticalFlowPreviousResourceIndex = isOddFrame ? FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_1 : FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_2; + + context->uavBindings[FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_INPUT] = context->resources[opticalFlowInputResourceIndex]; + context->uavBindings[FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_INPUT_LEVEL_1] = context->resources[opticalFlowInputResourceIndex + 1]; + context->uavBindings[FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_INPUT_LEVEL_2] = context->resources[opticalFlowInputResourceIndex + 2]; + context->uavBindings[FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_INPUT_LEVEL_3] = context->resources[opticalFlowInputResourceIndex + 3]; + context->uavBindings[FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_INPUT_LEVEL_4] = context->resources[opticalFlowInputResourceIndex + 4]; + context->uavBindings[FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_INPUT_LEVEL_5] = context->resources[opticalFlowInputResourceIndex + 5]; + context->uavBindings[FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_INPUT_LEVEL_6] = context->resources[opticalFlowInputResourceIndex + 6]; + + context->srvBindings[FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_INPUT] = context->resources[opticalFlowInputResourceIndex]; + context->srvBindings[FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_PREVIOUS_INPUT] = context->resources[opticalFlowPreviousInputResourceIndex]; + + context->srvBindings[FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW] = context->resources[opticalFlowResourceIndex]; + context->uavBindings[FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW] = context->resources[opticalFlowResourceIndex]; + context->srvBindings[FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_PREVIOUS] = context->resources[opticalFlowPreviousResourceIndex]; + + { + int32_t threadGroupSizeX = 16; + int32_t threadGroupSizeY = 16; + uint32_t threadPixelsX = 2; + uint32_t threadPixelsY = 2; + int32_t dispatchX = ((context->contextDescription.resolution.width + (threadPixelsX - 1)) / threadPixelsX + (threadGroupSizeX - 1)) / threadGroupSizeX; + int32_t dispatchY = ((context->contextDescription.resolution.height + (threadPixelsY - 1)) / threadPixelsY + (threadGroupSizeY - 1)) / threadGroupSizeY; + scheduleDispatch(context, &context->pipelinePrepareLuma, L"OF PrepareLuma", dispatchX, dispatchY); + } + + { + { + scheduleDispatch(context, + &context->pipelineGenerateOpticalFlowInputPyramid, + L"OF GenerateOpticalFlowInputPyramid", + threadGroupSizeOpticalFlowInputPyramid[0], + threadGroupSizeOpticalFlowInputPyramid[1] + ); + } + + { + { + const uint32_t threadGroupSizeX = 32; + const uint32_t threadGroupSizeY = 8; + const uint32_t strataWidth = (context->contextDescription.resolution.width / 4) / HistogramsPerDim; + const uint32_t strataHeight = context->contextDescription.resolution.height / HistogramsPerDim; + const uint32_t dispatchX = (strataWidth + threadGroupSizeX - 1) / threadGroupSizeX; + const uint32_t dispatchY = 16; + const uint32_t dispatchZ = HistogramsPerDim * HistogramsPerDim; + scheduleDispatch(context, &context->pipelineGenerateSCDHistogram, L"OF GenerateSCDHistogram", dispatchX, dispatchY, dispatchZ); + } + { + const uint32_t dispatchX = HistogramsPerDim * HistogramsPerDim; + const uint32_t dispatchY = HistogramShifts; + scheduleDispatch(context, &context->pipelineComputeSCDDivergence, L"OF ComputeSCDDivergence", dispatchX, dispatchY); + } + } + + FfxDimensions2D opticalFlowTextureSizes[OpticalFlowMaxPyramidLevels]; + const int pyramidMaxIterations = advancedAlgorithmIterations; + FFX_ASSERT(pyramidMaxIterations <= OpticalFlowMaxPyramidLevels); + + opticalFlowTextureSizes[0] = GetOpticalFlowTextureSize(context->contextDescription.resolution, opticalFlowBlockSize); + for (int i = 1; i < pyramidMaxIterations; i++) + { + opticalFlowTextureSizes[i] = { + (opticalFlowTextureSizes[i - 1].width + 1) / 2, + (opticalFlowTextureSizes[i - 1].height + 1) / 2 + }; + } + + for (int level = pyramidMaxIterations - 1; level >= 0; level--) + { + bool isOddLevel = !!(level & 1); + + uint32_t opticalFlowInputResourceIndexA = isOddFrame ? FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_2 : FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_1; + uint32_t opticalFlowInputResourceIndexB = isOddFrame ? FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_1 : FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_2; + uint32_t opticalFlowResourceIndexA = (isOddFrame != isOddLevel) ? FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_2 : FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_1; + uint32_t opticalFlowResourceIndexB = (isOddFrame != isOddLevel) ? FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_1 : FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_2; + context->constants.opticalFlowPyramidLevel = level; + context->constants.opticalFlowPyramidLevelCount = pyramidMaxIterations; + + context->contextDescription.backendInterface.fpStageConstantBufferDataFunc(&context->contextDescription.backendInterface, &context->constants, sizeof(context->constants), &context->constantBuffers[FFX_OPTICALFLOW_CONSTANTBUFFER_IDENTIFIER]); + + context->srvBindings[FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_INPUT] = context->resources[opticalFlowInputResourceIndexA + level]; + context->srvBindings[FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_PREVIOUS_INPUT] = context->resources[opticalFlowInputResourceIndexB + level]; + context->uavBindings[FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW] = context->resources[opticalFlowResourceIndexA + level]; + + context->srvBindings[FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_PREVIOUS] = context->resources[opticalFlowResourceIndexB + level]; + + { + const FfxUInt32 inputLumaWidth = ffxMax(context->contextDescription.resolution.width >> level, 1); + const FfxUInt32 inputLumaHeight = ffxMax(context->contextDescription.resolution.height >> level, 1); + std::wstring pipelineName = L"OF " + std::to_wstring(level) + L" Search"; + + { + uint32_t threadPixels = 4; + FFX_ASSERT(opticalFlowBlockSize >= threadPixels); + uint32_t threadGroupSizeY = 16; + uint32_t threadGroupSize = 64; + uint32_t dispatchX = ((inputLumaWidth + threadPixels - 1) / threadPixels * threadGroupSizeY + (threadGroupSize - 1)) / threadGroupSize; + uint32_t dispatchY = (inputLumaHeight + (threadGroupSizeY - 1)) / threadGroupSizeY; + scheduleDispatch(context, &context->pipelineComputeOpticalFlowAdvancedV5, pipelineName.c_str(), dispatchX, dispatchY); + } + } + + { + context->srvBindings[FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_PREVIOUS] = context->resources[opticalFlowResourceIndexA + level]; + context->uavBindings[FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW] = context->resources[opticalFlowResourceIndexB + level]; + } + + { + if (level == 0) + { + context->uavBindings[FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW] = context->uavBindings[FFX_OF_BINDING_IDENTIFIER_SHARED_OPTICAL_FLOW_VECTOR]; + } + + const uint32_t levelWidth = opticalFlowTextureSizes[level].width; + const uint32_t levelHeight = opticalFlowTextureSizes[level].height; + + const uint32_t threadGroupSizeX = 16; + const uint32_t threadGroupSizeY = 4; + const uint32_t dispatchX = (levelWidth + threadGroupSizeX - 1) / threadGroupSizeX; + const uint32_t dispatchY = (levelHeight + threadGroupSizeY - 1) / threadGroupSizeY; + std::wstring pipelineName = L"OF " + std::to_wstring(level) + L" Filter"; + + { + scheduleDispatch(context, &context->pipelineFilterOpticalFlowV5, pipelineName.c_str(), dispatchX, dispatchY); + } + } + + if (level > 0) + { + context->srvBindings[FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_ALIAS_LEVEL_1 + level - 1] = context->uavBindings[FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW]; + } + + if (level > 0) + { + { + context->srvBindings[FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW] = context->resources[opticalFlowResourceIndexB + level]; + context->uavBindings[FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_NEXT_LEVEL] = level > 0 ? context->resources[opticalFlowResourceIndexB + level - 1] : FfxResourceInternal{ FFX_OF_RESOURCE_IDENTIFIER_NULL }; + } + + FFX_ASSERT(opticalFlowBlockSize >= 2); + const uint32_t nextLevelWidth = opticalFlowTextureSizes[level - 1].width; + const uint32_t nextLevelHeight = opticalFlowTextureSizes[level - 1].height; + + const uint32_t threadGroupSizeX = opticalFlowBlockSize / 2; + const uint32_t threadGroupSizeY = opticalFlowBlockSize / 2; + const uint32_t threadGroupSizeZ = 4; + const uint32_t dispatchX = (nextLevelWidth + threadGroupSizeX - 1) / threadGroupSizeX; + const uint32_t dispatchY = (nextLevelHeight + threadGroupSizeY - 1) / threadGroupSizeY; + const uint32_t dispatchZ = 1; + std::wstring pipelineName = L"OF " + std::to_wstring(level) + L" Scale"; + + { + const uint32_t dispatchX = (nextLevelWidth + 3) / 4; + const uint32_t dispatchY = (nextLevelHeight + 3) / 4; + scheduleDispatch(context, &context->pipelineScaleOpticalFlowAdvancedV5, pipelineName.c_str(), dispatchX, dispatchY, dispatchZ); + } + + { + FfxGpuJobDescription barrierJob = {FFX_GPU_JOB_BARRIER}; + barrierJob.barrierDescriptor = { context->srvBindings[FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW], FFX_BARRIER_TYPE_TRANSITION, FFX_RESOURCE_STATE_COMPUTE_READ, FFX_RESOURCE_STATE_UNORDERED_ACCESS, 0}; + context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &barrierJob); + } + } + + { + FfxGpuJobDescription barrierJob = {FFX_GPU_JOB_BARRIER}; + barrierJob.barrierDescriptor = { context->srvBindings[FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_PREVIOUS], FFX_BARRIER_TYPE_TRANSITION, FFX_RESOURCE_STATE_COMPUTE_READ, FFX_RESOURCE_STATE_UNORDERED_ACCESS, 0}; + context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &barrierJob); + } + } + } + } + + { + FfxGpuJobDescription barrierJob = {FFX_GPU_JOB_BARRIER}; + + wcscpy_s(barrierJob.jobLabel, L"Transition Optical Flow Input 1"); + barrierJob.barrierDescriptor = { context->resources[FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_1], FFX_BARRIER_TYPE_TRANSITION, FFX_RESOURCE_STATE_COMPUTE_READ, FFX_RESOURCE_STATE_UNORDERED_ACCESS, 0}; + context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &barrierJob); + wcscpy_s(barrierJob.jobLabel, L"Transition Optical Flow Input 1 Level 1"); + barrierJob.barrierDescriptor = { context->resources[FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_1_LEVEL_1], FFX_BARRIER_TYPE_TRANSITION, FFX_RESOURCE_STATE_COMPUTE_READ, FFX_RESOURCE_STATE_UNORDERED_ACCESS, 0}; + context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &barrierJob); + wcscpy_s(barrierJob.jobLabel, L"Transition Optical Flow Input 1 Level 2"); + barrierJob.barrierDescriptor = { context->resources[FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_1_LEVEL_2], FFX_BARRIER_TYPE_TRANSITION, FFX_RESOURCE_STATE_COMPUTE_READ, FFX_RESOURCE_STATE_UNORDERED_ACCESS, 0}; + context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &barrierJob); + wcscpy_s(barrierJob.jobLabel, L"Transition Optical Flow Input 1 Level 3"); + barrierJob.barrierDescriptor = { context->resources[FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_1_LEVEL_3], FFX_BARRIER_TYPE_TRANSITION, FFX_RESOURCE_STATE_COMPUTE_READ, FFX_RESOURCE_STATE_UNORDERED_ACCESS, 0}; + context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &barrierJob); + wcscpy_s(barrierJob.jobLabel, L"Transition Optical Flow Input 1 Level 4"); + barrierJob.barrierDescriptor = { context->resources[FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_1_LEVEL_4], FFX_BARRIER_TYPE_TRANSITION, FFX_RESOURCE_STATE_COMPUTE_READ, FFX_RESOURCE_STATE_UNORDERED_ACCESS, 0}; + context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &barrierJob); + wcscpy_s(barrierJob.jobLabel, L"Transition Optical Flow Input 1 Level 5"); + barrierJob.barrierDescriptor = { context->resources[FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_1_LEVEL_5], FFX_BARRIER_TYPE_TRANSITION, FFX_RESOURCE_STATE_COMPUTE_READ, FFX_RESOURCE_STATE_UNORDERED_ACCESS, 0}; + context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &barrierJob); + wcscpy_s(barrierJob.jobLabel, L"Transition Optical Flow Input 1 Level 6"); + barrierJob.barrierDescriptor = { context->resources[FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_1_LEVEL_6], FFX_BARRIER_TYPE_TRANSITION, FFX_RESOURCE_STATE_COMPUTE_READ, FFX_RESOURCE_STATE_UNORDERED_ACCESS, 0}; + context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &barrierJob); + wcscpy_s(barrierJob.jobLabel, L"Transition Optical Flow Input 2"); + barrierJob.barrierDescriptor = { context->resources[FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_2], FFX_BARRIER_TYPE_TRANSITION, FFX_RESOURCE_STATE_COMPUTE_READ, FFX_RESOURCE_STATE_UNORDERED_ACCESS, 0}; + context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &barrierJob); + wcscpy_s(barrierJob.jobLabel, L"Transition Optical Flow Input 2 Level 1"); + barrierJob.barrierDescriptor = { context->resources[FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_2_LEVEL_1], FFX_BARRIER_TYPE_TRANSITION, FFX_RESOURCE_STATE_COMPUTE_READ, FFX_RESOURCE_STATE_UNORDERED_ACCESS, 0}; + context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &barrierJob); + wcscpy_s(barrierJob.jobLabel, L"Transition Optical Flow Input 2 Level 2"); + barrierJob.barrierDescriptor = { context->resources[FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_2_LEVEL_2], FFX_BARRIER_TYPE_TRANSITION, FFX_RESOURCE_STATE_COMPUTE_READ, FFX_RESOURCE_STATE_UNORDERED_ACCESS, 0}; + context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &barrierJob); + wcscpy_s(barrierJob.jobLabel, L"Transition Optical Flow Input 2 Level 3"); + barrierJob.barrierDescriptor = { context->resources[FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_2_LEVEL_3], FFX_BARRIER_TYPE_TRANSITION, FFX_RESOURCE_STATE_COMPUTE_READ, FFX_RESOURCE_STATE_UNORDERED_ACCESS, 0}; + context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &barrierJob); + wcscpy_s(barrierJob.jobLabel, L"Transition Optical Flow Input 2 Level 4"); + barrierJob.barrierDescriptor = { context->resources[FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_2_LEVEL_4], FFX_BARRIER_TYPE_TRANSITION, FFX_RESOURCE_STATE_COMPUTE_READ, FFX_RESOURCE_STATE_UNORDERED_ACCESS, 0}; + context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &barrierJob); + wcscpy_s(barrierJob.jobLabel, L"Transition Optical Flow Input 2 Level 5"); + barrierJob.barrierDescriptor = { context->resources[FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_2_LEVEL_5], FFX_BARRIER_TYPE_TRANSITION, FFX_RESOURCE_STATE_COMPUTE_READ, FFX_RESOURCE_STATE_UNORDERED_ACCESS, 0}; + context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &barrierJob); + wcscpy_s(barrierJob.jobLabel, L"Transition Optical Flow Input 2 Level 6"); + barrierJob.barrierDescriptor = { context->resources[FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_2_LEVEL_6], FFX_BARRIER_TYPE_TRANSITION, FFX_RESOURCE_STATE_COMPUTE_READ, FFX_RESOURCE_STATE_UNORDERED_ACCESS, 0}; + context->contextDescription.backendInterface.fpScheduleGpuJob(&context->contextDescription.backendInterface, &barrierJob); + } + + context->resourceFrameIndex = (context->resourceFrameIndex + 1) % FFX_OPTICALFLOW_MAX_QUEUED_FRAMES; + + FFX_VALIDATE(context->contextDescription.backendInterface.fpExecuteGpuJobs(&context->contextDescription.backendInterface, commandList, context->effectContextId)); + + context->contextDescription.backendInterface.fpUnregisterResources(&context->contextDescription.backendInterface, commandList, context->effectContextId); + + return FFX_OK; +} + +FfxErrorCode ffxOpticalflowContextCreate(FfxOpticalflowContext* context, FfxOpticalflowContextDescription* contextDescription) +{ + FFX_RETURN_ON_ERROR(context, FFX_ERROR_INVALID_POINTER); + FFX_RETURN_ON_ERROR(contextDescription, FFX_ERROR_INVALID_POINTER); + + FFX_RETURN_ON_ERROR(contextDescription->backendInterface.fpGetSDKVersion, FFX_ERROR_INCOMPLETE_INTERFACE); + FFX_RETURN_ON_ERROR(contextDescription->backendInterface.fpGetDeviceCapabilities, FFX_ERROR_INCOMPLETE_INTERFACE); + FFX_RETURN_ON_ERROR(contextDescription->backendInterface.fpCreateBackendContext, FFX_ERROR_INCOMPLETE_INTERFACE); + FFX_RETURN_ON_ERROR(contextDescription->backendInterface.fpDestroyBackendContext, FFX_ERROR_INCOMPLETE_INTERFACE); + + if (contextDescription->backendInterface.scratchBuffer) { + + FFX_RETURN_ON_ERROR(contextDescription->backendInterface.scratchBufferSize, FFX_ERROR_INCOMPLETE_INTERFACE); + } + + FFX_STATIC_ASSERT(sizeof(FfxOpticalflowContext) >= sizeof(FfxOpticalflowContext_Private)); + + FfxOpticalflowContext_Private* contextPrivate = (FfxOpticalflowContext_Private*)(context); + FfxErrorCode errorCode = opticalflowCreate(contextPrivate, contextDescription); + + return errorCode; +} + +FFX_API FfxErrorCode ffxOpticalflowContextGetGpuMemoryUsage(FfxOpticalflowContext* context, FfxEffectMemoryUsage* vramUsage) +{ + FFX_RETURN_ON_ERROR(context, FFX_ERROR_INVALID_POINTER); + FFX_RETURN_ON_ERROR(vramUsage, FFX_ERROR_INVALID_POINTER); + FfxOpticalflowContext_Private* contextPrivate = (FfxOpticalflowContext_Private*)(context); + + FFX_RETURN_ON_ERROR(contextPrivate->device, FFX_ERROR_NULL_DEVICE); + + FfxErrorCode errorCode = contextPrivate->contextDescription.backendInterface.fpGetEffectGpuMemoryUsage( + &contextPrivate->contextDescription.backendInterface, contextPrivate->effectContextId, vramUsage); + FFX_RETURN_ON_ERROR(errorCode == FFX_OK, errorCode); + + return FFX_OK; +} + +FfxErrorCode ffxOpticalflowContextDestroy(FfxOpticalflowContext* context) +{ + FFX_RETURN_ON_ERROR(context, FFX_ERROR_INVALID_POINTER); + + FfxOpticalflowContext_Private* contextPrivate = (FfxOpticalflowContext_Private*)(context); + const FfxErrorCode errorCode = opticalflowRelease(contextPrivate); + + return errorCode; +} + +FFX_API bool ffxOpticalflowResourceIsNull(FfxResource resource) +{ + return resource.resource == NULL; +} + +FFX_API FfxErrorCode ffxOpticalflowGetSharedResourceDescriptions(FfxOpticalflowContext* context, FfxOpticalflowSharedResourceDescriptions* SharedResources) +{ + FFX_RETURN_ON_ERROR(context, FFX_ERROR_INVALID_POINTER); + FFX_RETURN_ON_ERROR(SharedResources, FFX_ERROR_INVALID_POINTER); + + FfxOpticalflowContext_Private* contextPrivate = (FfxOpticalflowContext_Private*)(context); + const FfxDimensions2D opticalFlowTextureSize = GetOpticalFlowTextureSize(contextPrivate->contextDescription.resolution, 8); + const FfxDimensions2D globalMotionSearchMaxDispatchSize = GetGlobalMotionSearchDispatchSize(0); + const uint32_t globalMotionSearchTextureWidth = 4 /* predefined slots */ + (globalMotionSearchMaxDispatchSize.width * globalMotionSearchMaxDispatchSize.height); + + SharedResources->opticalFlowVector = { + FFX_HEAP_TYPE_DEFAULT, + { FFX_RESOURCE_TYPE_TEXTURE2D, FFX_SURFACE_FORMAT_R16G16_SINT, opticalFlowTextureSize.width, opticalFlowTextureSize.height, 1, 1, FFX_RESOURCE_FLAGS_NONE, FFX_RESOURCE_USAGE_UAV }, + FFX_RESOURCE_STATE_UNORDERED_ACCESS, L"OPTICALFLOW_Result", 0, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} }; + + SharedResources->opticalFlowSCD = { + FFX_HEAP_TYPE_DEFAULT, + { FFX_RESOURCE_TYPE_TEXTURE2D, FFX_SURFACE_FORMAT_R32_UINT, 3, 1, 1, 1, FFX_RESOURCE_FLAGS_NONE, FFX_RESOURCE_USAGE_UAV }, + FFX_RESOURCE_STATE_UNORDERED_ACCESS, L"OPTICALFLOW_SCDOutput", 0, {FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED} }; + + return FFX_OK; +} + +FfxErrorCode ffxOpticalflowContextDispatch(FfxOpticalflowContext* context, const FfxOpticalflowDispatchDescription* dispatchParams) +{ + FFX_RETURN_ON_ERROR(context, FFX_ERROR_INVALID_POINTER); + FFX_RETURN_ON_ERROR(dispatchParams, FFX_ERROR_INVALID_POINTER); + FFX_RETURN_ON_ERROR(dispatchParams->commandList, FFX_ERROR_INVALID_POINTER); + FFX_RETURN_ON_ERROR(!ffxOpticalflowResourceIsNull(dispatchParams->color), FFX_ERROR_INVALID_POINTER); + FFX_RETURN_ON_ERROR(dispatchParams->color.description.type == FFX_RESOURCE_TYPE_TEXTURE2D, FFX_ERROR_INVALID_ARGUMENT); + FFX_RETURN_ON_ERROR(!ffxOpticalflowResourceIsNull(dispatchParams->opticalFlowVector), FFX_ERROR_INVALID_POINTER); + FFX_RETURN_ON_ERROR(!ffxOpticalflowResourceIsNull(dispatchParams->opticalFlowSCD), FFX_ERROR_INVALID_POINTER); + + FfxOpticalflowContext_Private* contextPrivate = (FfxOpticalflowContext_Private*)(context); + + FFX_RETURN_ON_ERROR(contextPrivate->device, FFX_ERROR_NULL_DEVICE); + FFX_RETURN_ON_ERROR(dispatchParams->color.description.width <= contextPrivate->contextDescription.resolution.width, FFX_ERROR_INVALID_ARGUMENT); + FFX_RETURN_ON_ERROR(dispatchParams->color.description.height <= contextPrivate->contextDescription.resolution.height, FFX_ERROR_INVALID_ARGUMENT); + + const FfxErrorCode errorCode = dispatch(contextPrivate, dispatchParams); + return errorCode; +} + +FFX_API FfxVersionNumber ffxOpticalflowGetEffectVersion() +{ + return FFX_SDK_MAKE_VERSION(FFX_OPTICALFLOW_VERSION_MAJOR, FFX_OPTICALFLOW_VERSION_MINOR, FFX_OPTICALFLOW_VERSION_PATCH); +} diff --git a/thirdparty/amd-ffx/ffx_opticalflow.h b/thirdparty/amd-ffx/ffx_opticalflow.h new file mode 100644 index 000000000000..772bf7c71243 --- /dev/null +++ b/thirdparty/amd-ffx/ffx_opticalflow.h @@ -0,0 +1,212 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +// @defgroup OpticalFlow + +#pragma once + +// Include the interface for the backend of the OpticalFlow API. +#include "ffx_interface.h" + +/// FidelityFX OpticalFlow major version. +/// +/// @ingroup ffxOpticalflow +#define FFX_OPTICALFLOW_VERSION_MAJOR (1) + +/// FidelityFX OpticalFlow minor version. +/// +/// @ingroup ffxOpticalflow +#define FFX_OPTICALFLOW_VERSION_MINOR (1) + +/// FidelityFX OpticalFlow patch version. +/// +/// @ingroup ffxOpticalflow +#define FFX_OPTICALFLOW_VERSION_PATCH (2) + +/// FidelityFX Optical Flow context count +/// +/// Defines the number of internal effect contexts required by Optical Flow +/// +/// @ingroup ffxOpticalFlow +#define FFX_OPTICALFLOW_CONTEXT_COUNT (1) + +/// The size of the context specified in 32bit size units. +/// +/// @ingroup ffxOpticalflow +#define FFX_OPTICALFLOW_CONTEXT_SIZE (FFX_SDK_DEFAULT_CONTEXT_SIZE) + +#if defined(__cplusplus) +extern "C" { +#endif // #if defined(__cplusplus) + +/// An enumeration of all the passes which constitute the OpticalFlow algorithm. +/// +/// @ingroup ffxOpticalflow +typedef enum FfxOpticalflowPass +{ + FFX_OPTICALFLOW_PASS_PREPARE_LUMA = 0, + FFX_OPTICALFLOW_PASS_GENERATE_OPTICAL_FLOW_INPUT_PYRAMID, + FFX_OPTICALFLOW_PASS_GENERATE_SCD_HISTOGRAM, + FFX_OPTICALFLOW_PASS_COMPUTE_SCD_DIVERGENCE, + FFX_OPTICALFLOW_PASS_COMPUTE_OPTICAL_FLOW_ADVANCED_V5, + FFX_OPTICALFLOW_PASS_FILTER_OPTICAL_FLOW_V5, + FFX_OPTICALFLOW_PASS_SCALE_OPTICAL_FLOW_ADVANCED_V5, + + FFX_OPTICALFLOW_PASS_COUNT +} FfxOpticalflowPass; + +/// An enumeration of bit flags used when creating a +/// FfxOpticalflowContext. See FfxOpticalflowDispatchDescription. +/// +/// @ingroup ffxOpticalflow +typedef enum FfxOpticalflowInitializationFlagBits +{ + FFX_OPTICALFLOW_ENABLE_TEXTURE1D_USAGE = (1 << 0), + +} FfxOpticalflowInitializationFlagBits; + +/// A structure encapsulating the parameters required to initialize +/// FidelityFX OpticalFlow. +/// +/// @ingroup ffxOpticalflow +typedef struct FfxOpticalflowContextDescription { + + FfxInterface backendInterface; ///< A set of pointers to the backend implementation for FidelityFX SDK + uint32_t flags; ///< A collection of FfxOpticalflowInitializationFlagBits. + FfxDimensions2D resolution; +} FfxOpticalflowContextDescription; + +/// A structure encapsulating the parameters for dispatching the various passes +/// of FidelityFX Opticalflow. +/// +/// @ingroup ffxOpticalflow +typedef struct FfxOpticalflowDispatchDescription +{ + FfxCommandList commandList; ///< The FfxCommandList to record rendering commands into. + FfxResource color; ///< A FfxResource containing the input color buffer + FfxResource opticalFlowVector; ///< A FfxResource containing the output motion buffer + FfxResource opticalFlowSCD; ///< A FfxResource containing the output scene change detection buffer + bool reset; ///< A boolean value which when set to true, indicates the camera has moved discontinuously. + int backbufferTransferFunction; + FfxFloatCoords2D minMaxLuminance; +} FfxOpticalflowDispatchDescription; + +typedef struct FfxOpticalflowSharedResourceDescriptions { + + FfxCreateResourceDescription opticalFlowVector; + FfxCreateResourceDescription opticalFlowSCD; + +} FfxOpticalflowSharedResourceDescriptions; + +/// A structure encapsulating the FidelityFX OpticalFlow context. +/// +/// This sets up an object which contains all persistent internal data and +/// resources that are required by OpticalFlow. +/// +/// The FfxOpticalflowContext object should have a lifetime matching +/// your use of OpticalFlow. Before destroying the OpticalFlow context care should be taken +/// to ensure the GPU is not accessing the resources created or used by OpticalFlow. +/// It is therefore recommended that the GPU is idle before destroying OpticalFlow +/// OpticalFlow context. +/// +/// @ingroup ffxOpticalflow +typedef struct FfxOpticalflowContext +{ + uint32_t data[FFX_OPTICALFLOW_CONTEXT_SIZE]; ///< An opaque set of uint32_t which contain the data for the context. +} FfxOpticalflowContext; + + +/// Create a FidelityFX OpticalFlow context from the parameters +/// programmed to the FfxOpticalflowContextDescription structure. +/// +/// The context structure is the main object used to interact with the OpticalFlow +/// API, and is responsible for the management of the internal resources used +/// by the OpticalFlow algorithm. When this API is called, multiple calls will be +/// made via the pointers contained in the callbacks structure. +/// These callbacks will attempt to retreive the device capabilities, and +/// create the internal resources, and pipelines required by OpticalFlow's +/// frame-to-frame function. Depending on the precise configuration used when +/// creating the FfxOpticalflowContext a different set of resources and +/// pipelines might be requested via the callback functions. +/// +/// The flags included in the flags field of +/// FfxOpticalflowContext how match the configuration of your +/// application as well as the intended use of OpticalFlow. It is important that these +/// flags are set correctly (as well as a correct programmed +/// FfxOpticalflowContextDescription) to ensure correct operation. It is +/// recommended to consult the overview documentation for further details on +/// how OpticalFlow should be integerated into an application. +/// +/// When the FfxOpticalflowContext is created, you should use the +/// ffxOpticalflowContextDispatch function each frame where FSR3 +/// upscaling should be applied. See the documentation of +/// ffxOpticalflowContextDispatch for more details. +/// +/// The FfxOpticalflowContext should be destroyed when use of it is +/// completed, typically when an application is unloaded or OpticalFlow is +/// disabled by a user. To destroy the OpticalFlow context you should call +/// ffxOpticalflowContextDestroy. +/// +/// @param [out] context A pointer to a FfxOpticalflowContext structure to populate. +/// @param [in] contextDescription A pointer to a FfxOpticalflowContextDescription structure. +/// +/// @retval +/// FFX_OK The operation completed successfully. +/// @retval +/// FFX_ERROR_CODE_NULL_POINTER The operation failed because either context or contextDescription was NULL. +/// @retval +/// FFX_ERROR_INCOMPLETE_INTERFACE The operation failed because the FfxOpticalflowContextDescription.callbacks was not fully specified. +/// @retval +/// FFX_ERROR_BACKEND_API_ERROR The operation failed because of an error returned from the backend. +/// +/// @ingroup ffxOpticalflow +FFX_API FfxErrorCode ffxOpticalflowContextCreate(FfxOpticalflowContext* context, FfxOpticalflowContextDescription* contextDescription); + +FFX_API FfxErrorCode ffxOpticalflowContextGetGpuMemoryUsage(FfxOpticalflowContext* pContext, FfxEffectMemoryUsage* vramUsage); + +FFX_API FfxErrorCode ffxOpticalflowGetSharedResourceDescriptions(FfxOpticalflowContext* context, FfxOpticalflowSharedResourceDescriptions* SharedResources); + +FFX_API FfxErrorCode ffxOpticalflowContextDispatch(FfxOpticalflowContext* context, const FfxOpticalflowDispatchDescription* dispatchDescription); + +/// Destroy the FidelityFX OpticalFlow context. +/// +/// @param [out] context A pointer to a FfxOpticalflowContext structure to destroy. +/// +/// @retval +/// FFX_OK The operation completed successfully. +/// @retval +/// FFX_ERROR_CODE_NULL_POINTER The operation failed because either context was NULL. +/// +/// @ingroup ffxOpticalflow +FFX_API FfxErrorCode ffxOpticalflowContextDestroy(FfxOpticalflowContext* context); + +/// Queries the effect version number. +/// +/// @returns +/// The SDK version the effect was built with. +/// +/// @ingroup ffxOpticalflow +FFX_API FfxVersionNumber ffxOpticalflowGetEffectVersion(); + +#if defined(__cplusplus) +} +#endif // #if defined(__cplusplus) diff --git a/thirdparty/amd-ffx/ffx_opticalflow_private.h b/thirdparty/amd-ffx/ffx_opticalflow_private.h new file mode 100644 index 000000000000..a325288755d9 --- /dev/null +++ b/thirdparty/amd-ffx/ffx_opticalflow_private.h @@ -0,0 +1,108 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +#define FFX_CPU +#include "gpu/opticalflow/ffx_opticalflow_resources.h" + +typedef enum OpticalFlowBindingIdentifiers +{ + FFX_OF_BINDING_IDENTIFIER_NULL = 0, + FFX_OF_BINDING_IDENTIFIER_INPUT_COLOR, + + FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_INPUT, + FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_INPUT_LEVEL_1, + FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_INPUT_LEVEL_2, + FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_INPUT_LEVEL_3, + FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_INPUT_LEVEL_4, + FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_INPUT_LEVEL_5, + FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_INPUT_LEVEL_6, + + FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_PREVIOUS_INPUT, + + FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_SCD_HISTOGRAM, + FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_SCD_PREVIOUS_HISTOGRAM, + FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_SCD_TEMP, + FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_SCD_OUTPUT, + + FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW, + FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_NEXT_LEVEL, + FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_PREVIOUS, + + FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_ALIAS_LEVEL_1, + FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_ALIAS_LEVEL_2, + FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_ALIAS_LEVEL_3, + FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_ALIAS_LEVEL_4, + FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_ALIAS_LEVEL_5, + FFX_OF_BINDING_IDENTIFIER_OPTICAL_FLOW_ALIAS_LEVEL_6, + + FFX_OF_BINDING_IDENTIFIER_SHARED_OPTICAL_FLOW_VECTOR, + FFX_OF_BINDING_IDENTIFIER_SHARED_OPTICAL_FLOW_SCD_OUTPUT, + + FFX_OF_BINDING_IDENTIFIER_COUNT +} OpticalFlowBindingIdentifiers; + +typedef enum OpticalflowShaderPermutationOptions +{ + OPTICALFLOW_SHADER_PERMUTATION_FORCE_WAVE64 = (1 << 0), ///< doesn't map to a define, selects different table + OPTICALFLOW_SHADER_PERMUTATION_ALLOW_FP16 = (1 << 1), ///< Enables fast math computations where possible + OPTICALFLOW_HDR_COLOR_INPUT = (1 << 2), +} OpticalflowShaderPermutationOptions; + +typedef struct OpticalflowConstants +{ + int32_t inputLumaResolution[2]; + uint32_t opticalFlowPyramidLevel; + uint32_t opticalFlowPyramidLevelCount; + + int32_t frameIndex; + uint32_t backbufferTransferFunction; + float minMaxLuminance[2]; +} OpticalflowConstants; + +typedef struct FfxOpticalflowContext_Private +{ + FfxOpticalflowContextDescription contextDescription; + FfxUInt32 effectContextId; + OpticalflowConstants constants; + FfxDevice device; + FfxDeviceCapabilities deviceCapabilities; + + FfxPipelineState pipelinePrepareLuma; + FfxPipelineState pipelineGenerateOpticalFlowInputPyramid; + FfxPipelineState pipelineGenerateSCDHistogram; + FfxPipelineState pipelineComputeSCDDivergence; + FfxPipelineState pipelineComputeOpticalFlowAdvancedV5; + FfxPipelineState pipelineFilterOpticalFlowV5; + FfxPipelineState pipelineScaleOpticalFlowAdvancedV5; + + FfxResourceInternal resources[FFX_OF_RESOURCE_IDENTIFIER_COUNT]; + FfxResourceInternal srvBindings[FFX_OF_BINDING_IDENTIFIER_COUNT]; + FfxResourceInternal uavBindings[FFX_OF_BINDING_IDENTIFIER_COUNT]; + + FfxConstantBuffer constantBuffers[FFX_OPTICALFLOW_CONSTANTBUFFER_COUNT]; + + bool firstExecution; + bool refreshPipelineStates; + uint32_t resourceFrameIndex; +} FfxOpticalflowContext_Private; diff --git a/thirdparty/amd-ffx/ffx_types.h b/thirdparty/amd-ffx/ffx_types.h new file mode 100644 index 000000000000..dfd6ef8e045f --- /dev/null +++ b/thirdparty/amd-ffx/ffx_types.h @@ -0,0 +1,1319 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +#include + +/// +/// @defgroup ffxSDK SDK +/// The SDK module provides detailed descriptions of the various class, structs, and function which comprise the FidelityFX SDK. It is divided into several sub-modules. +/// + +/// @defgroup ffxHost Host +/// The FidelityFX SDK host (CPU-side) references +/// +/// @ingroup ffxSDK + +/// @defgroup Defines Defines +/// Top level defines used by the FidelityFX SDK +/// +/// @ingroup ffxHost + +// When defining custom mutex you have to also define: +// FFX_MUTEX_LOCK - for exclusive locking of mutex +// FFX_MUTEX_LOCK_SHARED - for shared locking of mutex +// FFX_MUTEX_UNLOCK - for exclusive unlocking of mutex +// FFX_MUTEX_UNLOCK_SHARED - for shared unlocking of mutex +// +// If your mutex type doesn't support shared locking mechanism you can rely +// on exclusive locks only (define _SHARED variants to the same exclusive operation). +#ifndef FFX_MUTEX +#if __cplusplus >= 201703L +#include +/// FidelityFX mutex wrapper. +/// +/// @ingroup SDKTypes +#define FFX_MUTEX std::shared_mutex +#define FFX_MUTEX_IMPL_SHARED +#else +#include +/// FidelityFX mutex wrapper. +/// +/// @ingroup SDKTypes +#define FFX_MUTEX std::mutex +#define FFX_MUTEX_IMPL_STANDARD +#endif // #if __cplusplus >= 201703L +#endif // #ifndef FFX_MUTEX + +#if defined(FFX_GCC) || !defined(FFX_BUILD_AS_DLL) +/// FidelityFX exported functions +/// +/// @ingroup Defines +#define FFX_API +#else +/// FidelityFX exported functions +/// +/// @ingroup Defines +#define FFX_API __declspec(dllexport) +#endif // #if defined (FFX_GCC) + +// GODOT BEGINS +// On non-Windows platforms `wchar_t` is 32 bytes rather than 16 bytes, +// So we increased the size of the context by 2x. +#define FFX_SDK_DEFAULT_CONTEXT_SIZE (1024 * 256) +// GODOT ENDS + +/// Maximum supported number of simultaneously bound SRVs. +/// +/// @ingroup Defines +#define FFX_MAX_NUM_SRVS 64 + +/// Maximum supported number of simultaneously bound UAVs. +/// +/// @ingroup Defines +#define FFX_MAX_NUM_UAVS 64 + +/// Maximum number of constant buffers bound. +/// +/// @ingroup Defines +#define FFX_MAX_NUM_CONST_BUFFERS 3 + +/// Maximum number of characters in a resource name +/// +/// @ingroup Defines +#define FFX_RESOURCE_NAME_SIZE 64 + +/// Maximum number of queued frames in the backend +/// +/// @ingroup Defines +#define FFX_MAX_QUEUED_FRAMES (4) + +/// Maximum number of resources per effect context +/// +/// @ingroup Defines +#define FFX_MAX_RESOURCE_COUNT (512) + +/// Maximum number of passes per effect component +/// +/// @ingroup Defines +#define FFX_MAX_PASS_COUNT (50) + +/// Total number of descriptors in ring buffer needed for a single effect context +/// +/// @ingroup Defines +#define FFX_RING_BUFFER_DESCRIPTOR_COUNT (FFX_MAX_QUEUED_FRAMES * FFX_MAX_PASS_COUNT * FFX_MAX_RESOURCE_COUNT) + +/// Size of constant buffer entry in the ring buffer table +/// +/// @ingroup Defines +#define FFX_BUFFER_SIZE (4096) + +/// Total constant buffer ring buffer size for a single effect context +/// +/// @ingroup Defines +#define FFX_CONSTANT_BUFFER_RING_BUFFER_SIZE (FFX_MAX_QUEUED_FRAMES * FFX_MAX_PASS_COUNT * FFX_BUFFER_SIZE) + +/// Maximum number of barriers per flush +/// +/// @ingroup Defines +#define FFX_MAX_BARRIERS (128) + +/// Maximum number of GPU jobs per submission +/// +/// @ingroup Defines +#define FFX_MAX_GPU_JOBS (256) + +/// Maximum number of samplers supported +/// +/// @ingroup Defines +#define FFX_MAX_SAMPLERS (16) + +/// Maximum number of simultaneous upload jobs +/// +/// @ingroup Defines +#define UPLOAD_JOB_COUNT (16) + +// GODOT BEGINS + +// Disable warnings also for non-MSVC compilers +#if defined(_MSC_VER) && !defined(__clang__) +// Off by default warnings +#pragma warning(disable : 4365 4710 4820 5039) +#else +#pragma GCC diagnostic ignored "-Wmissing-field-initializers" +#pragma GCC diagnostic ignored "-Wunused-variable" +#pragma GCC diagnostic ignored "-Wmissing-braces" +#pragma GCC diagnostic ignored "-Wswitch" +#endif + +// GODOT ENDS + +#ifdef __cplusplus +extern "C" { +#endif // #ifdef __cplusplus + +/// @defgroup CPUTypes CPU Types +/// CPU side type defines for all commonly used variables +/// +/// @ingroup ffxHost + +/// A typedef for version numbers returned from functions in the FidelityFX SDK. +/// +/// @ingroup CPUTypes + typedef uint32_t FfxVersionNumber; + +/// A typedef for a boolean value. +/// +/// @ingroup CPUTypes +typedef bool FfxBoolean; + +/// A typedef for a unsigned 8bit integer. +/// +/// @ingroup CPUTypes +typedef uint8_t FfxUInt8; + +/// A typedef for a unsigned 16bit integer. +/// +/// @ingroup CPUTypes +typedef uint16_t FfxUInt16; + +/// A typedef for a unsigned 32bit integer. +/// +/// @ingroup CPUTypes +typedef uint32_t FfxUInt32; + +/// A typedef for a unsigned 64bit integer. +/// +/// @ingroup CPUTypes +typedef uint64_t FfxUInt64; + +/// A typedef for a signed 8bit integer. +/// +/// @ingroup CPUTypes +typedef int8_t FfxInt8; + +/// A typedef for a signed 16bit integer. +/// +/// @ingroup CPUTypes +typedef int16_t FfxInt16; + +/// A typedef for a signed 32bit integer. +/// +/// @ingroup CPUTypes +typedef int32_t FfxInt32; + +/// A typedef for a signed 64bit integer. +/// +/// @ingroup CPUTypes +typedef int64_t FfxInt64; + +/// A typedef for a floating point value. +/// +/// @ingroup CPUTypes +typedef float FfxFloat32; + +/// A typedef for a 2-dimensional floating point value. +/// +/// @ingroup CPUTypes +typedef float FfxFloat32x2[2]; + +/// A typedef for a 3-dimensional floating point value. +/// +/// @ingroup CPUTypes +typedef float FfxFloat32x3[3]; + +/// A typedef for a 4-dimensional floating point value. +/// +/// @ingroup CPUTypes +typedef float FfxFloat32x4[4]; + +/// A typedef for a 4x4 floating point matrix. +/// +/// @ingroup CPUTypes +typedef float FfxFloat32x4x4[16]; + +/// A typedef for a 2-dimensional 32bit unsigned integer. +/// +/// @ingroup CPUTypes +typedef uint32_t FfxUInt32x2[2]; + +/// A typedef for a 3-dimensional 32bit unsigned integer. +/// +/// @ingroup CPUTypes +typedef uint32_t FfxUInt32x3[3]; + +/// A typedef for a 4-dimensional 32bit unsigned integer. +/// +/// @ingroup CPUTypes +typedef uint32_t FfxUInt32x4[4]; + +/// A typedef for a 2-dimensional 32bit signed integer. +/// +/// @ingroup CPUTypes +typedef int32_t FfxInt32x2[2]; + +/// A typedef for a 3-dimensional 32bit signed integer. +/// +/// @ingroup CPUTypes +typedef int32_t FfxInt32x3[3]; + +/// A typedef for a 4-dimensional 32bit signed integer. +/// +/// @ingroup CPUTypes +typedef int32_t FfxInt32x4[4]; + +/// @defgroup SDKTypes SDK Types +/// Structure and enumeration definitions used by the FidelityFX SDK +/// +/// @ingroup ffxHost + + +/// An enumeration of surface formats. Needs to match enum FfxApiSurfaceFormat +/// +/// @ingroup SDKTypes +typedef enum FfxSurfaceFormat { + + FFX_SURFACE_FORMAT_UNKNOWN, ///< Unknown format + FFX_SURFACE_FORMAT_R32G32B32A32_TYPELESS, ///< 32 bit per channel, 4 channel typeless format + FFX_SURFACE_FORMAT_R32G32B32A32_UINT, ///< 32 bit per channel, 4 channel uint format + FFX_SURFACE_FORMAT_R32G32B32A32_FLOAT, ///< 32 bit per channel, 4 channel float format + FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT, ///< 16 bit per channel, 4 channel float format + FFX_SURFACE_FORMAT_R32G32B32_FLOAT, ///< 32 bit per channel, 3 channel float format + FFX_SURFACE_FORMAT_R32G32_FLOAT, ///< 32 bit per channel, 2 channel float format + FFX_SURFACE_FORMAT_R8_UINT, ///< 8 bit per channel, 1 channel float format + FFX_SURFACE_FORMAT_R32_UINT, ///< 32 bit per channel, 1 channel float format + FFX_SURFACE_FORMAT_R8G8B8A8_TYPELESS, ///< 8 bit per channel, 4 channel typeless format + FFX_SURFACE_FORMAT_R8G8B8A8_UNORM, ///< 8 bit per channel, 4 channel unsigned normalized format + FFX_SURFACE_FORMAT_R8G8B8A8_SNORM, ///< 8 bit per channel, 4 channel signed normalized format + FFX_SURFACE_FORMAT_R8G8B8A8_SRGB, ///< 8 bit per channel, 4 channel srgb normalized + FFX_SURFACE_FORMAT_B8G8R8A8_TYPELESS, ///< 8 bit per channel, 4 channel typeless format + FFX_SURFACE_FORMAT_B8G8R8A8_UNORM, ///< 8 bit per channel, 4 channel unsigned normalized format + FFX_SURFACE_FORMAT_B8G8R8A8_SRGB, ///< 8 bit per channel, 4 channel srgb normalized + FFX_SURFACE_FORMAT_R11G11B10_FLOAT, ///< 32 bit 3 channel float format + FFX_SURFACE_FORMAT_R10G10B10A2_UNORM, ///< 10 bit per 3 channel, 2 bit for 1 channel normalized format + FFX_SURFACE_FORMAT_R16G16_FLOAT, ///< 16 bit per channel, 2 channel float format + FFX_SURFACE_FORMAT_R16G16_UINT, ///< 16 bit per channel, 2 channel unsigned int format + FFX_SURFACE_FORMAT_R16G16_SINT, ///< 16 bit per channel, 2 channel signed int format + FFX_SURFACE_FORMAT_R16_FLOAT, ///< 16 bit per channel, 1 channel float format + FFX_SURFACE_FORMAT_R16_UINT, ///< 16 bit per channel, 1 channel unsigned int format + FFX_SURFACE_FORMAT_R16_UNORM, ///< 16 bit per channel, 1 channel unsigned normalized format + FFX_SURFACE_FORMAT_R16_SNORM, ///< 16 bit per channel, 1 channel signed normalized format + FFX_SURFACE_FORMAT_R8_UNORM, ///< 8 bit per channel, 1 channel unsigned normalized format + FFX_SURFACE_FORMAT_R8G8_UNORM, ///< 8 bit per channel, 2 channel unsigned normalized format + FFX_SURFACE_FORMAT_R8G8_UINT, ///< 8 bit per channel, 2 channel unsigned integer format + FFX_SURFACE_FORMAT_R32_FLOAT, ///< 32 bit per channel, 1 channel float format + FFX_SURFACE_FORMAT_R9G9B9E5_SHAREDEXP, ///< 9 bit per channel, 5 bit exponent format + + FFX_SURFACE_FORMAT_R16G16B16A16_TYPELESS, ///< 16 bit per channel, 4 channel typeless format + FFX_SURFACE_FORMAT_R32G32_TYPELESS, ///< 32 bit per channel, 2 channel typeless format + FFX_SURFACE_FORMAT_R10G10B10A2_TYPELESS, ///< 10 bit per 3 channel, 2 bit for 1 channel typeless format + FFX_SURFACE_FORMAT_R16G16_TYPELESS, ///< 16 bit per channel, 2 channel typeless format + FFX_SURFACE_FORMAT_R16_TYPELESS, ///< 16 bit per channel, 1 channel typeless format + FFX_SURFACE_FORMAT_R8_TYPELESS, ///< 8 bit per channel, 1 channel typeless format + FFX_SURFACE_FORMAT_R8G8_TYPELESS, ///< 8 bit per channel, 2 channel typeless format + FFX_SURFACE_FORMAT_R32_TYPELESS, ///< 32 bit per channel, 1 channel typeless format +} FfxSurfaceFormat; + +typedef enum FfxIndexFormat +{ + FFX_INDEX_TYPE_UINT32, + FFX_INDEX_TYPE_UINT16 +} FfxIndexFormat; + +/// An enumeration of resource usage. +/// +/// @ingroup SDKTypes +typedef enum FfxResourceUsage { + + FFX_RESOURCE_USAGE_READ_ONLY = 0, ///< No usage flags indicate a resource is read only. + FFX_RESOURCE_USAGE_RENDERTARGET = (1<<0), ///< Indicates a resource will be used as render target. + FFX_RESOURCE_USAGE_UAV = (1<<1), ///< Indicates a resource will be used as UAV. + FFX_RESOURCE_USAGE_DEPTHTARGET = (1<<2), ///< Indicates a resource will be used as depth target. + FFX_RESOURCE_USAGE_INDIRECT = (1<<3), ///< Indicates a resource will be used as indirect argument buffer + FFX_RESOURCE_USAGE_ARRAYVIEW = (1<<4), ///< Indicates a resource that will generate array views. Works on 2D and cubemap textures + FFX_RESOURCE_USAGE_STENCILTARGET = (1<<5), ///< Indicates a resource will be used as stencil target. + FFX_RESOURCE_USAGE_DCC_RENDERTARGET = (1<<15), ///< Indicates a resource that should specify optimal render target memory access flags (for console use) +} FfxResourceUsage; + +/// An enumeration of resource states. +/// +/// @ingroup SDKTypes +typedef enum FfxResourceStates { + + FFX_RESOURCE_STATE_COMMON = (1 << 0), + FFX_RESOURCE_STATE_UNORDERED_ACCESS = (1 << 1), ///< Indicates a resource is in the state to be used as UAV. + FFX_RESOURCE_STATE_COMPUTE_READ = (1 << 2), ///< Indicates a resource is in the state to be read by compute shaders. + FFX_RESOURCE_STATE_PIXEL_READ = (1 << 3), ///< Indicates a resource is in the state to be read by pixel shaders. + FFX_RESOURCE_STATE_PIXEL_COMPUTE_READ = (FFX_RESOURCE_STATE_PIXEL_READ | FFX_RESOURCE_STATE_COMPUTE_READ), ///< Indicates a resource is in the state to be read by pixel or compute shaders. + FFX_RESOURCE_STATE_COPY_SRC = (1 << 4), ///< Indicates a resource is in the state to be used as source in a copy command. + FFX_RESOURCE_STATE_COPY_DEST = (1 << 5), ///< Indicates a resource is in the state to be used as destination in a copy command. + FFX_RESOURCE_STATE_GENERIC_READ = (FFX_RESOURCE_STATE_COPY_SRC | FFX_RESOURCE_STATE_COMPUTE_READ), ///< Indicates a resource is in generic (slow) read state. + FFX_RESOURCE_STATE_INDIRECT_ARGUMENT = (1 << 6), ///< Indicates a resource is in the state to be used as an indirect command argument + FFX_RESOURCE_STATE_PRESENT = (1 << 7), ///< Indicates a resource is in the state to be used to present to the swap chain + FFX_RESOURCE_STATE_RENDER_TARGET = (1 << 8), ///< Indicates a resource is in the state to be used as render target + FFX_RESOURCE_STATE_DEPTH_ATTACHEMENT = (1 << 9), ///< Indicates a resource is in the state to be used as depth attachment +} FfxResourceStates; + +/// An enumeration of surface dimensions. +/// +/// @ingroup SDKTypes +typedef enum FfxResourceDimension { + + FFX_RESOURCE_DIMENSION_TEXTURE_1D, ///< A resource with a single dimension. + FFX_RESOURCE_DIMENSION_TEXTURE_2D, ///< A resource with two dimensions. +} FfxResourceDimension; + +/// An enumeration of resource view dimensions. +/// +/// @ingroup SDKTypes +typedef enum FfxResourceViewDimension +{ + FFX_RESOURCE_VIEW_DIMENSION_BUFFER, ///< A resource view on a buffer. + FFX_RESOURCE_VIEW_DIMENSION_TEXTURE_1D, ///< A resource view on a single dimension. + FFX_RESOURCE_VIEW_DIMENSION_TEXTURE_1D_ARRAY, ///< A resource view on a single dimensional array. + FFX_RESOURCE_VIEW_DIMENSION_TEXTURE_2D, ///< A resource view on two dimensions. + FFX_RESOURCE_VIEW_DIMENSION_TEXTURE_2D_ARRAY, ///< A resource view on two dimensional array. + FFX_RESOURCE_VIEW_DIMENSION_TEXTURE_3D, ///< A resource view on three dimensions. +} FfxResourceViewDimension; + +/// An enumeration of surface dimensions. +/// +/// @ingroup SDKTypes +typedef enum FfxResourceFlags { + + FFX_RESOURCE_FLAGS_NONE = 0, ///< No flags. + FFX_RESOURCE_FLAGS_ALIASABLE = (1 << 0), ///< A bit indicating a resource does not need to persist across frames. + FFX_RESOURCE_FLAGS_UNDEFINED = (1 << 1), ///< Special case flag used internally when importing resources that require additional setup +} FfxResourceFlags; + +/// An enumeration of all resource view types. +/// +/// @ingroup SDKTypes +typedef enum FfxResourceViewType { + + FFX_RESOURCE_VIEW_UNORDERED_ACCESS, ///< The resource view is an unordered access view (UAV). + FFX_RESOURCE_VIEW_SHADER_READ, ///< The resource view is a shader resource view (SRV). +} FfxResourceViewType; + +/// The type of filtering to perform when reading a texture. +/// +/// @ingroup SDKTypes +typedef enum FfxFilterType { + + FFX_FILTER_TYPE_MINMAGMIP_POINT, ///< Point sampling. + FFX_FILTER_TYPE_MINMAGMIP_LINEAR, ///< Sampling with interpolation. + FFX_FILTER_TYPE_MINMAGLINEARMIP_POINT, ///< Use linear interpolation for minification and magnification; use point sampling for mip-level sampling. +} FfxFilterType; + +/// The address mode used when reading a texture. +/// +/// @ingroup SDKTypes +typedef enum FfxAddressMode { + + FFX_ADDRESS_MODE_WRAP, ///< Wrap when reading texture. + FFX_ADDRESS_MODE_MIRROR, ///< Mirror when reading texture. + FFX_ADDRESS_MODE_CLAMP, ///< Clamp when reading texture. + FFX_ADDRESS_MODE_BORDER, ///< Border color when reading texture. + FFX_ADDRESS_MODE_MIRROR_ONCE, ///< Mirror once when reading texture. +} FfxAddressMode; + +/// An enumeration of all supported shader models. +/// +/// @ingroup SDKTypes +typedef enum FfxShaderModel { + + FFX_SHADER_MODEL_5_1, ///< Shader model 5.1. + FFX_SHADER_MODEL_6_0, ///< Shader model 6.0. + FFX_SHADER_MODEL_6_1, ///< Shader model 6.1. + FFX_SHADER_MODEL_6_2, ///< Shader model 6.2. + FFX_SHADER_MODEL_6_3, ///< Shader model 6.3. + FFX_SHADER_MODEL_6_4, ///< Shader model 6.4. + FFX_SHADER_MODEL_6_5, ///< Shader model 6.5. + FFX_SHADER_MODEL_6_6, ///< Shader model 6.6. + FFX_SHADER_MODEL_6_7, ///< Shader model 6.7. +} FfxShaderModel; + +// An enumeration for different resource types +/// +/// @ingroup SDKTypes +typedef enum FfxResourceType { + + FFX_RESOURCE_TYPE_BUFFER, ///< The resource is a buffer. + FFX_RESOURCE_TYPE_TEXTURE1D, ///< The resource is a 1-dimensional texture. + FFX_RESOURCE_TYPE_TEXTURE2D, ///< The resource is a 2-dimensional texture. + FFX_RESOURCE_TYPE_TEXTURE_CUBE, ///< The resource is a cube map. + FFX_RESOURCE_TYPE_TEXTURE3D, ///< The resource is a 3-dimensional texture. +} FfxResourceType; + +/// An enumeration for different heap types +/// +/// @ingroup SDKTypes +typedef enum FfxHeapType { + + FFX_HEAP_TYPE_DEFAULT = 0, ///< Local memory. + FFX_HEAP_TYPE_UPLOAD, ///< Heap used for uploading resources. + FFX_HEAP_TYPE_READBACK ///< Heap used for reading back resources. +} FfxHeapType; + +/// An enumeration for different render job types +/// +/// @ingroup SDKTypes +typedef enum FfxGpuJobType { + + FFX_GPU_JOB_CLEAR_FLOAT = 0, ///< The GPU job is performing a floating-point clear. + FFX_GPU_JOB_COPY = 1, ///< The GPU job is performing a copy. + FFX_GPU_JOB_COMPUTE = 2, ///< The GPU job is performing a compute dispatch. + FFX_GPU_JOB_BARRIER = 3, ///< The GPU job is performing a barrier. + + FFX_GPU_JOB_DISCARD = 4, ///< The GPU job is performing a floating-point clear. + +} FfxGpuJobType; + +/// An enumeration for various descriptor types +/// +/// @ingroup SDKTypes +typedef enum FfxDescriptorType { + + //FFX_DESCRIPTOR_CBV = 0, // All CBVs currently mapped to root signature + //FFX_DESCRIPTOR_SAMPLER, // All samplers currently static + FFX_DESCRIPTOR_TEXTURE_SRV = 0, + FFX_DESCRIPTOR_BUFFER_SRV, + FFX_DESCRIPTOR_TEXTURE_UAV, + FFX_DESCRIPTOR_BUFFER_UAV, +} FfxDescriptiorType; + +/// An enumeration for view binding stages +/// +/// @ingroup SDKTypes +typedef enum FfxBindStage { + + FFX_BIND_PIXEL_SHADER_STAGE = 1 << 0, + FFX_BIND_VERTEX_SHADER_STAGE = 1 << 1, + FFX_BIND_COMPUTE_SHADER_STAGE = 1 << 2, + +} FfxBindStage; + +/// An enumeration for barrier types +/// +/// @ingroup SDKTypes +typedef enum FfxBarrierType +{ + FFX_BARRIER_TYPE_TRANSITION = 0, + FFX_BARRIER_TYPE_UAV, +} FfxBarrierType; + +typedef void (*ffxMessageCallback)(uint32_t type, const wchar_t* message); + +/// An enumeration for message types that can be passed +/// +/// @ingroup SDKTypes +typedef enum FfxMsgType { + FFX_MESSAGE_TYPE_ERROR = 0, + FFX_MESSAGE_TYPE_WARNING = 1, + FFX_MESSAGE_TYPE_COUNT +} FfxMsgType; + +/// An enumeration of all the effects which constitute the FidelityFX SDK. +/// +/// Dictates what effect shader blobs to fetch for pipeline creation +/// +/// @ingroup SDKTypes +typedef enum FfxEffect +{ + + FFX_EFFECT_FSR2 = 0, ///< FidelityFX Super Resolution v2 + FFX_EFFECT_FSR1, ///< FidelityFX Super Resolution + FFX_EFFECT_SPD, ///< FidelityFX Single Pass Downsampler + FFX_EFFECT_BLUR, ///< FidelityFX Blur + FFX_EFFECT_BREADCRUMBS, ///< FidelityFX Breadcrumbs + FFX_EFFECT_BRIXELIZER, ///< FidelityFX Brixelizer + FFX_EFFECT_BRIXELIZER_GI, ///< FidelityFX Brixelizer GI + FFX_EFFECT_CACAO, ///< FidelityFX Combined Adaptive Compute Ambient Occlusion + FFX_EFFECT_CAS, ///< FidelityFX Contrast Adaptive Sharpening + FFX_EFFECT_DENOISER, ///< FidelityFX Denoiser + FFX_EFFECT_LENS, ///< FidelityFX Lens + FFX_EFFECT_PARALLEL_SORT, ///< FidelityFX Parallel Sort + FFX_EFFECT_SSSR, ///< FidelityFX Stochastic Screen Space Reflections + FFX_EFFECT_VARIABLE_SHADING, ///< FidelityFX Variable Shading + FFX_EFFECT_LPM, ///< FidelityFX Luma Preserving Mapper + FFX_EFFECT_DOF, ///< FidelityFX Depth of Field + FFX_EFFECT_CLASSIFIER, ///< FidelityFX Classifier + FFX_EFFECT_FSR3UPSCALER, ///< FidelityFX Super Resolution v3 + FFX_EFFECT_FRAMEINTERPOLATION, ///< FidelityFX Frame Interpolation, part of FidelityFX Super Resolution v3 + FFX_EFFECT_OPTICALFLOW, ///< FidelityFX Optical Flow, part of FidelityFX Super Resolution v3 + + FFX_EFFECT_SHAREDRESOURCES = 127, ///< FidelityFX Shared resources effect ID + FFX_EFFECT_SHAREDAPIBACKEND = 128 ///< FidelityFX Shared backend context used with DLL API +} FfxEffect; + +typedef enum FfxBackbufferTransferFunction { + FFX_BACKBUFFER_TRANSFER_FUNCTION_SRGB, + FFX_BACKBUFFER_TRANSFER_FUNCTION_PQ, + FFX_BACKBUFFER_TRANSFER_FUNCTION_SCRGB +} FfxBackbufferTransferFunction; + +/// A typedef representing the graphics device. +/// +/// @ingroup SDKTypes +typedef void* FfxDevice; + +typedef void* FfxCommandQueue; + +typedef void* FfxSwapchain; + +/// A typedef representing a command list or command buffer. +/// +/// @ingroup SDKTypes +typedef void* FfxCommandList; + +/// A typedef for a root signature. +/// +/// @ingroup SDKTypes +typedef void* FfxRootSignature; + +/// A typedef for a command signature, used for indirect workloads +/// +/// @ingroup SDKTypes +typedef void* FfxCommandSignature; + +/// A typedef for a pipeline state object. +/// +/// @ingroup SDKTypes +typedef void* FfxPipeline; + +/// Allocate block of memory. +/// +/// The callback function for requesting memory of provided size. +/// size cannot be 0. +/// +/// @param [in] size Size in bytes of memory to allocate. +/// +/// @retval +/// NULL The operation failed. +/// @retval +/// Anything else The operation completed successfully. +/// +/// @ingroup SDKTypes +typedef void* (*FfxAllocFunc)( + size_t size); + +/// Reallocate block of memory. +/// +/// The callback function for reallocating provided block of memory to new location +/// with specified size. When provided with NULL as ptr +/// then it should behave as FfxBreadcrumbsAllocFunc. +/// If the operation failed then contents of ptr +/// cannot be changed. size cannot be 0. +/// +/// @param [in] ptr A pointer to previous block of memory. +/// @param [in] size Size in bytes of memory to allocate. +/// +/// @retval +/// NULL The operation failed. +/// @retval +/// Anything else The operation completed successfully. +/// +/// @ingroup SDKTypes +typedef void* (*FfxReallocFunc)( + void* ptr, + size_t size); + +/// Free block of memory. +/// +/// The callback function for freeing provided block of memory. +/// ptr cannot be NULL. +/// +/// @param [in] ptr A pointer to block of memory. +/// +/// @ingroup SDKTypes +typedef void (*FfxFreeFunc)( + void* ptr); + +/// A structure encapsulating a set of allocation callbacks. +/// +/// @ingroup SDKTypes +typedef struct FfxAllocationCallbacks { + + FfxAllocFunc fpAlloc; ///< Callback for allocating memory in the library. + FfxReallocFunc fpRealloc; ///< Callback for reallocating memory in the library. + FfxFreeFunc fpFree; ///< Callback for freeing allocated memory in the library. +} FfxAllocationCallbacks; + +/// A structure encapsulating the bindless descriptor configuration of an effect. +/// +/// @ingroup SDKTypes +typedef struct FfxEffectBindlessConfig { + uint32_t maxTextureSrvs; ///< Maximum number of texture SRVs needed in the bindless table. + uint32_t maxBufferSrvs; ///< Maximum number of buffer SRVs needed in the bindless table. + uint32_t maxTextureUavs; ///< Maximum number of texture UAVs needed in the bindless table. + uint32_t maxBufferUavs; ///< Maximum number of buffer UAVs needed in the bindless table. +} FfxEffectBindlessConfig; + +/// A structure encapsulating a collection of device capabilities. +/// +/// @ingroup SDKTypes +typedef struct FfxDeviceCapabilities { + + FfxShaderModel maximumSupportedShaderModel; ///< The maximum shader model supported by the device. + uint32_t waveLaneCountMin; ///< The minimum supported wavefront width. + uint32_t waveLaneCountMax; ///< The maximum supported wavefront width. + bool fp16Supported; ///< The device supports FP16 in hardware. + bool raytracingSupported; ///< The device supports ray tracing. + bool deviceCoherentMemorySupported; ///< The device supports AMD coherent memory. + bool dedicatedAllocationSupported; ///< The device supports dedicated allocations for resources. + bool bufferMarkerSupported; ///< The device supports AMD buffer markers. + bool extendedSynchronizationSupported; ///< The device supports extended synchronization mechanism. + bool shaderStorageBufferArrayNonUniformIndexing; ///< The device supports shader storage buffer array non uniform indexing. +} FfxDeviceCapabilities; + +/// A structure encapsulating a 2-dimensional point, using 32bit unsigned integers. +/// +/// @ingroup SDKTypes +typedef struct FfxDimensions2D { + + uint32_t width; ///< The width of a 2-dimensional range. + uint32_t height; ///< The height of a 2-dimensional range. +} FfxDimensions2D; + +/// A structure encapsulating a 2-dimensional point. +/// +/// @ingroup SDKTypes +typedef struct FfxIntCoords2D { + + int32_t x; ///< The x coordinate of a 2-dimensional point. + int32_t y; ///< The y coordinate of a 2-dimensional point. +} FfxIntCoords2D; + +/// A structure encapsulating a 2-dimensional rect. +/// +/// @ingroup SDKTypes +typedef struct FfxRect2D +{ + int32_t left; ///< Left most coordinate + int32_t top; ///< Top most coordinate + int32_t width; ///< Rect width + int32_t height; ///< Rect height +} FfxRect2D; + +/// A structure encapsulating a 2-dimensional set of floating point coordinates. +/// +/// @ingroup SDKTypes +typedef struct FfxFloatCoords2D { + + float x; ///< The x coordinate of a 2-dimensional point. + float y; ///< The y coordinate of a 2-dimensional point. +} FfxFloatCoords2D; + +/// A structure describing a resource. +/// +/// @ingroup SDKTypes +typedef struct FfxResourceDescription { + + FfxResourceType type; ///< The type of the resource. + FfxSurfaceFormat format; ///< The surface format. + union { + uint32_t width; ///< The width of the texture resource. + uint32_t size; ///< The size of the buffer resource. + }; + + union { + uint32_t height; ///< The height of the texture resource. + uint32_t stride; ///< The stride of the buffer resource. + }; + + union { + uint32_t depth; ///< The depth of the texture resource. + uint32_t alignment; ///< The alignment of the buffer resource. + }; + + uint32_t mipCount; ///< Number of mips (or 0 for full mipchain). + FfxResourceFlags flags; ///< A set of FfxResourceFlags flags. + FfxResourceUsage usage; ///< Resource usage flags. +} FfxResourceDescription; + +/// An outward facing structure containing a resource +/// +/// @ingroup SDKTypes +typedef struct FfxResource { + void* resource; ///< pointer to the resource. + FfxResourceDescription description; + FfxResourceStates state; + wchar_t name[FFX_RESOURCE_NAME_SIZE]; ///< (optional) Resource name. +} FfxResource; + +/// A structure describing a static resource. +/// +/// @ingroup SDKTypes +typedef struct FfxStaticResourceDescription +{ + const FfxResource* resource; ///< The resource to register. + FfxDescriptorType descriptorType; ///< The type of descriptor to create. + uint32_t descriptorIndex; ///< The destination index of the descriptor within the static table. + + union + { + uint32_t bufferOffset; ///< The buffer offset in bytes. + uint32_t textureUavMip; ///< The mip of the texture resource to create a UAV for. + }; + + uint32_t bufferSize; ///< The buffer size in bytes. + uint32_t bufferStride; ///< The buffer stride in bytes. +} FfxStaticResourceDescription; + +/// A structure describing a constant buffer allocation. +/// +/// @ingroup SDKTypes +typedef struct FfxConstantAllocation +{ + FfxResource resource; ///< The resource representing the constant buffer resource. + FfxUInt64 handle; ///< The binding handle for the constant buffer + +} FfxRootConstantAllocation; + +/// A function definition for a constant buffer allocation callback +/// +/// Used to provide a constant buffer allocator to the calling backend +/// +/// @param [in] data The constant buffer data. +/// @param [in] dataSize The size of the constant buffer data. +/// +/// +/// @ingroup SDKTypes +typedef FfxConstantAllocation(*FfxConstantBufferAllocator)( + void* data, + const FfxUInt64 dataSize); + +/// Information about single AMD FidelityFX Breadcrumbs Library GPU memory block. +/// +/// @ingroup SDKTypes +typedef struct FfxBreadcrumbsBlockData { + void* memory; ///< Pointer to CPU mapped GPU buffer memory. + void* heap; ///< GPU memory block handle. + void* buffer; ///< GPU buffer handle for memory block. + uint64_t baseAddress; ///< GPU address of memory block. + uint32_t nextMarker; ///< Index of next marker to be saved in memory block. +} FfxBreadcrumbsBlockData; + +/// An internal structure containing a handle to a resource and resource views +/// +/// @ingroup SDKTypes +typedef struct FfxResourceInternal { + int32_t internalIndex; ///< The index of the resource. +} FfxResourceInternal; + +/// An enumeration for resource init data types that can be passed +/// +/// @ingroup SDKTypes +typedef enum FfxResourceInitDataType { + FFX_RESOURCE_INIT_DATA_TYPE_INVALID = 0, + FFX_RESOURCE_INIT_DATA_TYPE_UNINITIALIZED, + FFX_RESOURCE_INIT_DATA_TYPE_BUFFER, + FFX_RESOURCE_INIT_DATA_TYPE_VALUE, +} FfxResourceInitDataType; + +/// An structure housing all that is needed for resource initialization +/// +/// @ingroup SDKTypes +typedef struct FfxResourceInitData +{ + FfxResourceInitDataType type; ///< Indicates that the resource will be initialized from a buffer or a value, or stay uninitialized. + size_t size; ///< The size, in bytes, of the resource that needed be initialized. + union + { + void* buffer; ///< The buffer used to initialize the resource. + unsigned char value; ///< Indicates that the resource will be filled up with this value. + }; + + static FfxResourceInitData FfxResourceInitValue(size_t dataSize, uint8_t initVal) + { + FfxResourceInitData initData = { FFX_RESOURCE_INIT_DATA_TYPE_VALUE }; + initData.size = dataSize; + initData.value = initVal; + return initData; + } + + static FfxResourceInitData FfxResourceInitBuffer(size_t dataSize, void* pInitData) + { + FfxResourceInitData initData = { FFX_RESOURCE_INIT_DATA_TYPE_BUFFER }; + initData.size = dataSize; + initData.buffer = pInitData; + return initData; + } + +} FfxResourceInitData; + +/// An internal structure housing all that is needed for backend resource descriptions +/// +/// @ingroup SDKTypes +typedef struct FfxInternalResourceDescription { + + uint32_t id; ///< Resource identifier + const wchar_t* name; ///< Name to set to the resource for easier debugging + FfxResourceType type; ///< The type of resource (see FfxResourceType) + FfxResourceUsage usage; ///< Resource usage flags (see FfxResourceUsage) + FfxSurfaceFormat format; ///< The resource format to use + uint32_t width; ///< The width (textures) or size (buffers) of the resource + uint32_t height; ///< The height (textures) or stride (buffers) of the resource + uint32_t mipCount; ///< Mip count (textures) of the resource + FfxResourceFlags flags; ///< Resource flags (see FfxResourceFlags) + FfxResourceInitData initData; ///< Resource initialization definition (see FfxResourceInitData) +} FfxInternalResourceDescription; + +/// A structure defining the view to create +/// +/// @ingroup SDKTypes +typedef struct FfxViewDescription +{ + bool uavView; ///< Indicates that the view is a UAV. + FfxResourceViewDimension viewDimension; ///< The view dimension to map + union { + int32_t mipLevel; ///< The mip level of the view, (-1) for default + int32_t firstElement; ///< The first element of a buffer view, (-1) for default + }; + + union { + int32_t arraySize; ///< The array size of the view, (-1) for full depth/array size + int32_t elementCount; ///< The number of elements in a buffer view, (-1) for full depth/array size + }; + + int32_t firstSlice; ///< The first slice to map to, (-1) for default first slice + wchar_t name[FFX_RESOURCE_NAME_SIZE]; +} FfxViewDescription; + +static FfxViewDescription s_FfxViewDescInit = { false, FFX_RESOURCE_VIEW_DIMENSION_TEXTURE_2D, -1, -1, -1, L"" }; + +/// A structure defining a resource bind point +/// +/// @ingroup SDKTypes +typedef struct FfxResourceBinding +{ + uint32_t slotIndex; ///< The slot into which to bind the resource + uint32_t arrayIndex; ///< The resource offset for mip/array access + uint32_t resourceIdentifier; ///< A unique resource identifier representing an internal resource index + wchar_t name[FFX_RESOURCE_NAME_SIZE]; ///< A debug name to help track the resource binding +}FfxResourceBinding; + +/// A structure encapsulating a single pass of an algorithm. +/// +/// @ingroup SDKTypes +typedef struct FfxPipelineState { + + FfxRootSignature rootSignature; ///< The pipelines rootSignature + uint32_t passId; ///< The id of the effect pass this pipeline corresponds to + FfxCommandSignature cmdSignature; ///< The command signature used for indirect workloads + FfxPipeline pipeline; ///< The pipeline object + uint32_t uavTextureCount; ///< Count of Texture UAVs used in this pipeline + uint32_t srvTextureCount; ///< Count of Texture SRVs used in this pipeline + uint32_t srvBufferCount; ///< Count of Buffer SRV used in this pipeline + uint32_t uavBufferCount; ///< Count of Buffer UAVs used in this pipeline + uint32_t staticTextureSrvCount; ///< Count of static Texture SRVs used in this pipeline + uint32_t staticBufferSrvCount; ///< Count of static Buffer SRVs used in this pipeline + uint32_t staticTextureUavCount; ///< Count of static Texture UAVs used in this pipeline + uint32_t staticBufferUavCount; ///< Count of static Buffer UAVs used in this pipeline + uint32_t constCount; ///< Count of constant buffers used in this pipeline + + FfxResourceBinding uavTextureBindings[FFX_MAX_NUM_UAVS]; ///< Array of ResourceIdentifiers bound as texture UAVs + FfxResourceBinding srvTextureBindings[FFX_MAX_NUM_SRVS]; ///< Array of ResourceIdentifiers bound as texture SRVs + FfxResourceBinding srvBufferBindings[FFX_MAX_NUM_SRVS]; ///< Array of ResourceIdentifiers bound as buffer SRVs + FfxResourceBinding uavBufferBindings[FFX_MAX_NUM_UAVS]; ///< Array of ResourceIdentifiers bound as buffer UAVs + FfxResourceBinding constantBufferBindings[FFX_MAX_NUM_CONST_BUFFERS]; ///< Array of ResourceIdentifiers bound as CBs + + wchar_t name[FFX_RESOURCE_NAME_SIZE]; ///< Pipeline name for debugging/profiling purposes +} FfxPipelineState; + +/// A structure containing the data required to create a resource. +/// +/// @ingroup SDKTypes +typedef struct FfxCreateResourceDescription { + + FfxHeapType heapType; ///< The heap type to hold the resource, typically FFX_HEAP_TYPE_DEFAULT. + FfxResourceDescription resourceDescription; ///< A resource description. + FfxResourceStates initialState; ///< The initial resource state. + const wchar_t* name; ///< Name of the resource. + uint32_t id; ///< Internal resource ID. + FfxResourceInitData initData; ///< A struct used to initialize the resource. +} FfxCreateResourceDescription; + +/// A structure containing the data required to create sampler mappings +/// +/// @ingroup SDKTypes +typedef struct FfxSamplerDescription { + + FfxFilterType filter; + FfxAddressMode addressModeU; + FfxAddressMode addressModeV; + FfxAddressMode addressModeW; + FfxBindStage stage; +} FfxSamplerDescription; + +/// A structure containing the data required to create root constant buffer mappings +/// +/// @ingroup SDKTypes +typedef struct FfxRootConstantDescription +{ + uint32_t size; + FfxBindStage stage; +} FfxRootConstantDescription; + +/// A structure containing the description used to create a +/// FfxPipeline structure. +/// +/// A pipeline is the name given to a shader and the collection of state that +/// is required to dispatch it. In the context of the FidelityFX SDK and its architecture +/// this means that a FfxPipelineDescription will map to either a +/// monolithic object in an explicit API (such as a +/// PipelineStateObject in DirectX 12). Or a shader and some +/// ancillary API objects (in something like DirectX 11). +/// +/// The contextFlags field contains a copy of the flags passed +/// to ffxContextCreate via the flags field of +/// the FfxInitializationParams structure. These flags are +/// used to determine which permutation of a pipeline for a specific +/// FfxPass should be used to implement the features required +/// by each application, as well as to achieve the best performance on specific +/// target hardware configurations. +/// +/// When using one of the provided backends for FidelityFX SDK (such as DirectX 12 or +/// Vulkan) the data required to create a pipeline is compiled off line and +/// included into the backend library that you are using. For cases where the +/// backend interface is overridden by providing custom callback function +/// implementations care should be taken to respect the contents of the +/// contextFlags field in order to correctly support the options +/// provided by the FidelityFX SDK, and achieve best performance. +/// /// +/// @ingroup SDKTypes +typedef struct FfxPipelineDescription { + + uint32_t contextFlags; ///< A collection of FfxInitializationFlagBits which were passed to the context. + const FfxSamplerDescription* samplers; ///< A collection of samplers to use when building the root signature for the pipeline + size_t samplerCount; ///< Number of samplers to create for the pipeline + const FfxRootConstantDescription* rootConstants; ///< A collection of root constant descriptions to use when building the root signature for the pipeline + uint32_t rootConstantBufferCount; ///< Number of root constant buffers to create for the pipeline + wchar_t name[64]; ///< Pipeline name with which to name the pipeline object + FfxBindStage stage; ///< The stage(s) for which this pipeline is being built + uint32_t indirectWorkload; ///< Whether this pipeline has an indirect workload + FfxSurfaceFormat backbufferFormat; ///< For raster pipelines this contains the backbuffer format +} FfxPipelineDescription; + +/// A structure containing the data required to create a barrier +/// +/// @ingroup SDKTypes +typedef struct FfxBarrierDescription +{ + FfxResourceInternal resource; ///< The resource representation + FfxBarrierType barrierType; ///< The type of barrier to execute + FfxResourceStates currentState; ///< The initial state of the resource + FfxResourceStates newState; ///< The new state of the resource after barrier + uint32_t subResourceID; ///< The subresource id to apply barrier operation to +} FfxBarrierDescription; + + +/// A structure containing a constant buffer. +/// +/// @ingroup SDKTypes +typedef struct FfxConstantBuffer { + + uint32_t num32BitEntries; ///< The size (expressed in 32-bit chunks) stored in data. + uint32_t* data; ///< Pointer to constant buffer data +}FfxConstantBuffer; + +/// A structure containing a shader resource view. +typedef struct FfxTextureSRV +{ + FfxResourceInternal resource; ///< Resource corresponding to the shader resource view. +#ifdef FFX_DEBUG + wchar_t name[FFX_RESOURCE_NAME_SIZE]; +#endif +} FfxTextureSRV; + +/// A structure containing a shader resource view. +typedef struct FfxBufferSRV +{ + uint32_t offset; ///< Offset of resource to bind in bytes. + uint32_t size; ///< Size of resource to bind in bytes. + uint32_t stride; ///< Size of resource to bind in bytes. + FfxResourceInternal resource; ///< Resource corresponding to the shader resource view. +#ifdef FFX_DEBUG + wchar_t name[FFX_RESOURCE_NAME_SIZE]; +#endif +} FfxBufferSRV; + +/// A structure containing a unordered access view. +typedef struct FfxTextureUAV +{ + uint32_t mip; ///< Mip level of resource to bind. + FfxResourceInternal resource; ///< Resource corresponding to the unordered access view. +#ifdef FFX_DEBUG + wchar_t name[FFX_RESOURCE_NAME_SIZE]; +#endif +} FfxTextureUAV; + +/// A structure containing a unordered access view. +typedef struct FfxBufferUAV +{ + uint32_t offset; ///< Offset of resource to bind in bytes. + uint32_t size; ///< Size of resource to bind in bytes. + uint32_t stride; ///< Size of resource to bind in bytes. + FfxResourceInternal resource; ///< Resource corresponding to the unordered access view. +#ifdef FFX_DEBUG + wchar_t name[FFX_RESOURCE_NAME_SIZE]; +#endif +} FfxBufferUAV; + +/// A structure describing a clear render job. +/// +/// @ingroup SDKTypes +typedef struct FfxClearFloatJobDescription { + + float color[4]; ///< The clear color of the resource. + FfxResourceInternal target; ///< The resource to be cleared. +} FfxClearFloatJobDescription; + +/// A structure describing a compute render job. +/// +/// @ingroup SDKTypes +typedef struct FfxComputeJobDescription { + + FfxPipelineState pipeline; ///< Compute pipeline for the render job. + uint32_t dimensions[3]; ///< Dispatch dimensions. + FfxResourceInternal cmdArgument; ///< Dispatch indirect cmd argument buffer + uint32_t cmdArgumentOffset; ///< Dispatch indirect offset within the cmd argument buffer + FfxTextureSRV srvTextures[FFX_MAX_NUM_SRVS]; ///< SRV texture resources to be bound in the compute job. + FfxBufferSRV srvBuffers[FFX_MAX_NUM_SRVS]; ///< SRV buffer resources to be bound in the compute job. + FfxTextureUAV uavTextures[FFX_MAX_NUM_UAVS]; ///< UAV texture resources to be bound in the compute job. + FfxBufferUAV uavBuffers[FFX_MAX_NUM_UAVS]; ///< UAV buffer resources to be bound in the compute job. + + FfxConstantBuffer cbs[FFX_MAX_NUM_CONST_BUFFERS]; ///< Constant buffers to be bound in the compute job. +#ifdef FFX_DEBUG + wchar_t cbNames[FFX_MAX_NUM_CONST_BUFFERS][FFX_RESOURCE_NAME_SIZE]; +#endif +} FfxComputeJobDescription; + +typedef struct FfxRasterJobDescription +{ + FfxPipelineState pipeline; ///< Raster pipeline for the render job. + uint32_t numVertices; + FfxResourceInternal renderTarget; + FfxTextureSRV srvTextures[FFX_MAX_NUM_SRVS]; ///< SRV texture resources to be bound in the compute job. + FfxTextureUAV uavTextures[FFX_MAX_NUM_UAVS]; ///< UAV texture resources to be bound in the compute job. + + FfxConstantBuffer cbs[FFX_MAX_NUM_CONST_BUFFERS]; ///< Constant buffers to be bound in the compute job. +#ifdef FFX_DEBUG + wchar_t cbNames[FFX_MAX_NUM_CONST_BUFFERS][FFX_RESOURCE_NAME_SIZE]; +#endif +} FfxRasterJobDescription; + +/// A structure describing a copy render job. +/// +/// @ingroup SDKTypes +typedef struct FfxCopyJobDescription +{ + FfxResourceInternal src; ///< Source resource for the copy. + uint32_t srcOffset; ///< Offset into the source buffer in bytes. + FfxResourceInternal dst; ///< Destination resource for the copy. + uint32_t dstOffset; ///< Offset into the destination buffer in bytes. + uint32_t size; ///< Number of bytes to copy (Set to 0 to copy entire buffer). +} FfxCopyJobDescription; + +typedef struct FfxDiscardJobDescription { + + FfxResourceInternal target; ///< The resource to be discarded. +} FfxDiscardJobDescription; + +/// A structure describing a single render job. +/// +/// @ingroup SDKTypes +typedef struct FfxGpuJobDescription{ + + FfxGpuJobType jobType; ///< Type of the job. + wchar_t jobLabel[FFX_RESOURCE_NAME_SIZE]; ///< Job label for markers + + union { + FfxClearFloatJobDescription clearJobDescriptor; ///< Clear job descriptor. Valid when jobType is FFX_RENDER_JOB_CLEAR_FLOAT. + FfxCopyJobDescription copyJobDescriptor; ///< Copy job descriptor. Valid when jobType is FFX_RENDER_JOB_COPY. + FfxComputeJobDescription computeJobDescriptor; ///< Compute job descriptor. Valid when jobType is FFX_RENDER_JOB_COMPUTE. + FfxRasterJobDescription rasterJobDescriptor; + FfxBarrierDescription barrierDescriptor; + FfxDiscardJobDescription discardJobDescriptor; + }; +} FfxGpuJobDescription; + +#if defined(POPULATE_SHADER_BLOB_FFX) +#undef POPULATE_SHADER_BLOB_FFX +#endif // #if defined(POPULATE_SHADER_BLOB_FFX) + +/// Macro definition to copy header shader blob information into its SDK structural representation +/// +/// @ingroup SDKTypes +#define POPULATE_SHADER_BLOB_FFX(info, index) \ + { \ + info[index].blobData, \ + info[index].blobSize, \ + info[index].numConstantBuffers, \ + info[index].numSRVTextures, \ + info[index].numUAVTextures, \ + info[index].numSRVBuffers, \ + info[index].numUAVBuffers, \ + info[index].numSamplers, \ + info[index].numRTAccelerationStructures, \ + info[index].constantBufferNames, \ + info[index].constantBufferBindings, \ + info[index].constantBufferCounts, \ + info[index].constantBufferSpaces, \ + info[index].srvTextureNames, \ + info[index].srvTextureBindings, \ + info[index].srvTextureCounts, \ + info[index].srvTextureSpaces, \ + info[index].uavTextureNames, \ + info[index].uavTextureBindings, \ + info[index].uavTextureCounts, \ + info[index].uavTextureSpaces, \ + info[index].srvBufferNames, \ + info[index].srvBufferBindings, \ + info[index].srvBufferCounts, \ + info[index].srvBufferSpaces, \ + info[index].uavBufferNames, \ + info[index].uavBufferBindings, \ + info[index].uavBufferCounts, \ + info[index].uavBufferSpaces, \ + info[index].samplerNames, \ + info[index].samplerBindings, \ + info[index].samplerCounts, \ + info[index].samplerSpaces, \ + info[index].rtAccelerationStructureNames, \ + info[index].rtAccelerationStructureBindings, \ + info[index].rtAccelerationStructureCounts, \ + info[index].rtAccelerationStructureSpaces \ + } + +/// A single shader blob and a description of its resources. +/// +/// @ingroup SDKTypes +typedef struct FfxShaderBlob { + + const uint8_t* data; ///< A pointer to the blob + const uint32_t size; ///< Size in bytes. + + const uint32_t cbvCount; ///< Number of CBs. + const uint32_t srvTextureCount; ///< Number of SRV Textures. + const uint32_t uavTextureCount; ///< Number of UAV Textures. + const uint32_t srvBufferCount; ///< Number of SRV Buffers. + const uint32_t uavBufferCount; ///< Number of UAV Buffers. + const uint32_t samplerCount; ///< Number of Samplers. + const uint32_t rtAccelStructCount; ///< Number of RT Acceleration structures. + + // constant buffers + const char** boundConstantBufferNames; + const uint32_t* boundConstantBuffers; ///< Pointer to an array of bound ConstantBuffers. + const uint32_t* boundConstantBufferCounts; ///< Pointer to an array of bound ConstantBuffer resource counts + const uint32_t* boundConstantBufferSpaces; ///< Pointer to an array of bound ConstantBuffer resource spaces + + // srv textures + const char** boundSRVTextureNames; + const uint32_t* boundSRVTextures; ///< Pointer to an array of bound SRV resources. + const uint32_t* boundSRVTextureCounts; ///< Pointer to an array of bound SRV resource counts + const uint32_t* boundSRVTextureSpaces; ///< Pointer to an array of bound SRV resource spaces + + // uav textures + const char** boundUAVTextureNames; + const uint32_t* boundUAVTextures; ///< Pointer to an array of bound UAV texture resources. + const uint32_t* boundUAVTextureCounts; ///< Pointer to an array of bound UAV texture resource counts + const uint32_t* boundUAVTextureSpaces; ///< Pointer to an array of bound UAV texture resource spaces + + // srv buffers + const char** boundSRVBufferNames; + const uint32_t* boundSRVBuffers; ///< Pointer to an array of bound SRV buffer resources. + const uint32_t* boundSRVBufferCounts; ///< Pointer to an array of bound SRV buffer resource counts + const uint32_t* boundSRVBufferSpaces; ///< Pointer to an array of bound SRV buffer resource spaces + + // uav buffers + const char** boundUAVBufferNames; + const uint32_t* boundUAVBuffers; ///< Pointer to an array of bound UAV buffer resources. + const uint32_t* boundUAVBufferCounts; ///< Pointer to an array of bound UAV buffer resource counts + const uint32_t* boundUAVBufferSpaces; ///< Pointer to an array of bound UAV buffer resource spaces + + // samplers + const char** boundSamplerNames; + const uint32_t* boundSamplers; ///< Pointer to an array of bound sampler resources. + const uint32_t* boundSamplerCounts; ///< Pointer to an array of bound sampler resource counts + const uint32_t* boundSamplerSpaces; ///< Pointer to an array of bound sampler resource spaces + + // rt acceleration structures + const char** boundRTAccelerationStructureNames; + const uint32_t* boundRTAccelerationStructures; ///< Pointer to an array of bound UAV buffer resources. + const uint32_t* boundRTAccelerationStructureCounts; ///< Pointer to an array of bound UAV buffer resource counts + const uint32_t* boundRTAccelerationStructureSpaces; ///< Pointer to an array of bound UAV buffer resource spaces + +} FfxShaderBlob; + +/// A structure describing the parameters passed from the +/// presentation thread to the ui composition callback function. +/// +/// @ingroup SDKTypes +typedef struct FfxPresentCallbackDescription +{ + FfxDevice device; ///< The active device + FfxCommandList commandList; ///< The command list on which to register render commands + FfxResource currentBackBuffer; ///< The backbuffer resource with scene information + FfxResource currentUI; ///< Optional UI texture (when doing backbuffer + ui blend) + FfxResource outputSwapChainBuffer; ///< The swapchain target into which to render ui composition + bool isInterpolatedFrame; ///< Whether this is an interpolated or real frame + bool usePremulAlpha; ///< Toggles whether UI gets premultiplied alpha blending or not + uint64_t frameID; +} FfxPresentCallbackDescription; + +/// A structure describing the parameters to pass to frame generation passes. +/// +/// @ingroup SDKTypes +typedef struct FfxFrameGenerationDispatchDescription { + FfxCommandList commandList; ///< The command list on which to register render commands + FfxResource presentColor; ///< The current presentation color, this will be used as interpolation source data. + FfxResource outputs[4]; ///< Interpolation destination targets (1 for each frame in numInterpolatedFrames) + uint32_t numInterpolatedFrames; ///< The number of frames to interpolate from the passed in color target + bool reset; ///< A boolean value which when set to true, indicates the camera has moved discontinuously. + FfxBackbufferTransferFunction backBufferTransferFunction; ///< The transfer function use to convert interpolation source color data to linear RGB. + float minMaxLuminance[2]; ///< Min and max luminance values, used when converting HDR colors to linear RGB + FfxRect2D interpolationRect; ///< The area of the backbuffer that should be used for interpolation in case only a part of the screen is used e.g. due to movie bars + uint64_t frameID; +} FfxFrameGenerationDispatchDescription; + +//struct definition matches FfxApiEffectMemoryUsage +typedef struct FfxEffectMemoryUsage +{ + uint64_t totalUsageInBytes; + uint64_t aliasableUsageInBytes; +} FfxEffectMemoryUsage; + +//struct definition matches FfxApiSwapchainFramePacingTuning +typedef struct FfxSwapchainFramePacingTuning +{ + float safetyMarginInMs; // in Millisecond + float varianceFactor; // valid range [0.0,1.0] + bool allowHybridSpin; //Allows pacing spinlock to sleep. + uint32_t hybridSpinTime; //How long to spin when hybridSpin is enabled. Measured in timer resolution units. Not recommended to go below 2. Will result in frequent overshoots. + bool allowWaitForSingleObjectOnFence; //Allows to call WaitForSingleObject() instead of spinning for fence value. +} FfxSwapchainFramePacingTuning; + +#ifdef __cplusplus +} +#endif // #ifdef __cplusplus diff --git a/thirdparty/amd-ffx/ffx_util.h b/thirdparty/amd-ffx/ffx_util.h new file mode 100644 index 000000000000..b9531b019a52 --- /dev/null +++ b/thirdparty/amd-ffx/ffx_util.h @@ -0,0 +1,190 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#pragma once + +#include "ffx_types.h" + +/// @defgroup Utils Utilities +/// Utility Macros used by the FidelityFX SDK +/// +/// @ingroup ffxHost + +/// The value of Pi. +/// +/// @ingroup Utils +const float FFX_PI = 3.141592653589793f; + +/// An epsilon value for floating point numbers. +/// +/// @ingroup Utils +const float FFX_EPSILON = 1e-06f; + +/// Helper macro to create the version number. +/// +/// @ingroup Utils +#define FFX_MAKE_VERSION(major, minor, patch) ((major << 22) | (minor << 12) | patch) + +///< Use this to specify no version. +/// +/// @ingroup Utils +#define FFX_UNSPECIFIED_VERSION 0xFFFFAD00 + +/// Helper macro to avoid warnings about unused variables. +/// +/// @ingroup Utils +#define FFX_UNUSED(x) ((void)(x)) + +/// Helper macro to align an integer to the specified power of 2 boundary +/// +/// @ingroup Utils +#define FFX_ALIGN_UP(x, y) (((x) + ((y)-1)) & ~((y)-1)) + +/// Helper macro to check if a value is aligned. +/// +/// @ingroup Utils +#define FFX_IS_ALIGNED(x) (((x) != 0) && ((x) & ((x)-1))) + +/// Helper macro to compute the rounded-up integer division of two unsigned integers +/// +/// @ingroup Utils +#define FFX_DIVIDE_ROUNDING_UP(x, y) ((x + y - 1) / y) + +/// Helper macro to stringify a value. +/// +/// @ingroup Utils +#define FFX_STR(s) FFX_XSTR(s) +#define FFX_XSTR(s) #s + +/// Helper macro to forward declare a structure. +/// +/// @ingroup Utils +#define FFX_FORWARD_DECLARE(x) typedef struct x x + +/// Helper macro to return the maximum of two values. +/// +/// @ingroup Utils +#define FFX_MAXIMUM(x, y) (((x) > (y)) ? (x) : (y)) + +/// Helper macro to return the minimum of two values. +/// +/// @ingroup Utils +#define FFX_MINIMUM(x, y) (((x) < (y)) ? (x) : (y)) + +/// Helper macro to do safe free on a pointer. +/// +/// @ingroup Utils +#define FFX_SAFE_FREE(x, freeFunc) \ + do { \ + if (x) \ + { \ + freeFunc(x); \ + x = nullptr; \ + } \ + } while (false) + +/// Helper macro to return the abs of an integer value. +/// +/// @ingroup Utils +#define FFX_ABSOLUTE(x) (((x) < 0) ? (-(x)) : (x)) + +/// Helper macro to return sign of a value. +/// +/// @ingroup Utils +#define FFX_SIGN(x) (((x) < 0) ? -1 : 1) + +/// Helper macro to work out the number of elements in an array. +/// +/// @ingroup Utils +#define FFX_ARRAY_ELEMENTS(x) (int32_t)((sizeof(x) / sizeof(0 [x])) / ((size_t)(!(sizeof(x) % sizeof(0 [x]))))) + +/// The maximum length of a path that can be specified to the FidelityFX API. +/// +/// @ingroup Utils +#define FFX_MAXIMUM_PATH (260) + +/// Helper macro to check if the specified key is set in a bitfield. +/// +/// @ingroup Utils +#define FFX_CONTAINS_FLAG(options, key) (((options) & key) == key) + +#if defined(FFX_MUTEX_IMPL_SHARED) +/// Lock mutex exclusively. +/// +/// @ingroup Utils +#define FFX_MUTEX_LOCK(x) x.lock() +/// Lock mutex for shared access. +/// +/// @ingroup Utils +#define FFX_MUTEX_LOCK_SHARED(x) x.lock_shared() +/// Unlock exclusive mutex lock. +/// +/// @ingroup Utils +#define FFX_MUTEX_UNLOCK(x) x.unlock() +/// Unlock shared mutex lock. +/// +/// @ingroup Utils +#define FFX_MUTEX_UNLOCK_SHARED(x) x.unlock_shared() +#elif defined(FFX_MUTEX_IMPL_STANDARD) +/// Lock mutex exclusively. +/// +/// @ingroup Utils +#define FFX_MUTEX_LOCK(x) x.lock() +/// Lock mutex for shared access. +/// +/// @ingroup Utils +#define FFX_MUTEX_LOCK_SHARED(x) FFX_MUTEX_LOCK(x) +/// Unlock exclusive mutex lock. +/// +/// @ingroup Utils +#define FFX_MUTEX_UNLOCK(x) x.unlock() +/// Unlock shared mutex lock. +/// +/// @ingroup Utils +#define FFX_MUTEX_UNLOCK_SHARED(x) FFX_MUTEX_UNLOCK(x) +#elif !defined(FFX_MUTEX_LOCK) || !defined(FFX_MUTEX_LOCK_SHARED) || !defined(FFX_MUTEX_UNLOCK) || !defined(FFX_MUTEX_UNLOCK_SHARED) +#error When using custom mutex you have to provide all following operations too: FFX_MUTEX_LOCK, FFX_MUTEX_LOCK_SHARED, FFX_MUTEX_UNLOCK, FFX_MUTEX_UNLOCK_SHARED! +#endif // #if defined(FFX_MUTEX_IMPL_SHARED) + +/// Computes the number of bits set to 1 in a integer. +/// +/// @param [in] val Integer mask. +/// +/// @return Number of bits set to 1 in provided val. +/// +/// @ingroup Utils +inline uint8_t ffxCountBitsSet(uint32_t val) noexcept +{ +#if __cplusplus >= 202002L + return static_cast(std::popcount(val)); +#elif defined(_MSVC_LANG) + return static_cast(__popcnt(val)); +#elif defined(__GNUC__) || defined(__clang__) + return static_cast(__builtin_popcount(val)); +#else + uint32_t c = val - ((val >> 1) & 0x55555555); + c = ((c >> 2) & 0x33333333) + (c & 0x33333333); + c = ((c >> 4) + c) & 0x0F0F0F0F; + c = ((c >> 8) + c) & 0x00FF00FF; + return static_cast(((c >> 16) + c) & 0x0000FFFF); +#endif +} diff --git a/thirdparty/amd-fsr2/shaders/ffx_common_types.h b/thirdparty/amd-ffx/gpu/ffx_common_types.h similarity index 76% rename from thirdparty/amd-fsr2/shaders/ffx_common_types.h rename to thirdparty/amd-ffx/gpu/ffx_common_types.h index ddd17862b61a..2c4f0ba02dfd 100644 --- a/thirdparty/amd-fsr2/shaders/ffx_common_types.h +++ b/thirdparty/amd-ffx/gpu/ffx_common_types.h @@ -1,13 +1,14 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. -// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// // Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal +// of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell // copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: +// furnished to do so, subject to the following conditions : +// // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // @@ -18,6 +19,7 @@ // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. + #ifndef FFX_COMMON_TYPES_H #define FFX_COMMON_TYPES_H @@ -25,127 +27,237 @@ #define FFX_PARAMETER_IN #define FFX_PARAMETER_OUT #define FFX_PARAMETER_INOUT +#define FFX_PARAMETER_UNIFORM #elif defined(FFX_HLSL) #define FFX_PARAMETER_IN in #define FFX_PARAMETER_OUT out #define FFX_PARAMETER_INOUT inout +#define FFX_PARAMETER_UNIFORM uniform #elif defined(FFX_GLSL) #define FFX_PARAMETER_IN in #define FFX_PARAMETER_OUT out #define FFX_PARAMETER_INOUT inout +#define FFX_PARAMETER_UNIFORM const //[cacao_placeholder] until a better fit is found! #endif // #if defined(FFX_CPU) #if defined(FFX_CPU) /// A typedef for a boolean value. /// -/// @ingroup CPU +/// @ingroup CPUTypes typedef bool FfxBoolean; /// A typedef for a unsigned 8bit integer. /// -/// @ingroup CPU +/// @ingroup CPUTypes typedef uint8_t FfxUInt8; /// A typedef for a unsigned 16bit integer. /// -/// @ingroup CPU +/// @ingroup CPUTypes typedef uint16_t FfxUInt16; /// A typedef for a unsigned 32bit integer. /// -/// @ingroup CPU +/// @ingroup CPUTypes typedef uint32_t FfxUInt32; /// A typedef for a unsigned 64bit integer. /// -/// @ingroup CPU +/// @ingroup CPUTypes typedef uint64_t FfxUInt64; /// A typedef for a signed 8bit integer. /// -/// @ingroup CPU +/// @ingroup CPUTypes typedef int8_t FfxInt8; /// A typedef for a signed 16bit integer. /// -/// @ingroup CPU +/// @ingroup CPUTypes typedef int16_t FfxInt16; /// A typedef for a signed 32bit integer. /// -/// @ingroup CPU +/// @ingroup CPUTypes typedef int32_t FfxInt32; /// A typedef for a signed 64bit integer. /// -/// @ingroup CPU +/// @ingroup CPUTypes typedef int64_t FfxInt64; /// A typedef for a floating point value. /// -/// @ingroup CPU +/// @ingroup CPUTypes typedef float FfxFloat32; /// A typedef for a 2-dimensional floating point value. /// -/// @ingroup CPU +/// @ingroup CPUTypes typedef float FfxFloat32x2[2]; /// A typedef for a 3-dimensional floating point value. /// -/// @ingroup CPU +/// @ingroup CPUTypes typedef float FfxFloat32x3[3]; /// A typedef for a 4-dimensional floating point value. /// -/// @ingroup CPU +/// @ingroup CPUTypes typedef float FfxFloat32x4[4]; -/// A typedef for a 2-dimensional 32bit unsigned integer. +/// A typedef for a 2x2 floating point matrix. +/// +/// @ingroup CPUTypes +typedef float FfxFloat32x2x2[4]; + +/// A typedef for a 3x3 floating point matrix. +/// +/// @ingroup CPUTypes +typedef float FfxFloat32x3x3[9]; + +/// A typedef for a 3x4 floating point matrix. +/// +/// @ingroup CPUTypes +typedef float FfxFloat32x3x4[12]; + +/// A typedef for a 4x4 floating point matrix. /// -/// @ingroup CPU +/// @ingroup CPUTypes +typedef float FfxFloat32x4x4[16]; + +/// A typedef for a 2-dimensional 32bit signed integer. +/// +/// @ingroup CPUTypes +typedef int32_t FfxInt32x2[2]; + +/// A typedef for a 3-dimensional 32bit signed integer. +/// +/// @ingroup CPUTypes +typedef int32_t FfxInt32x3[3]; + +/// A typedef for a 4-dimensional 32bit signed integer. +/// +/// @ingroup CPUTypes +typedef int32_t FfxInt32x4[4]; + +/// A typedef for a 2-dimensional 32bit usigned integer. +/// +/// @ingroup CPUTypes typedef uint32_t FfxUInt32x2[2]; /// A typedef for a 3-dimensional 32bit unsigned integer. /// -/// @ingroup CPU +/// @ingroup CPUTypes typedef uint32_t FfxUInt32x3[3]; /// A typedef for a 4-dimensional 32bit unsigned integer. /// -/// @ingroup CPU +/// @ingroup CPUTypes typedef uint32_t FfxUInt32x4[4]; #endif // #if defined(FFX_CPU) #if defined(FFX_HLSL) + +#define FfxFloat32Mat4 matrix +#define FfxFloat32Mat3 matrix + /// A typedef for a boolean value. /// -/// @ingroup GPU +/// @ingroup HLSLTypes typedef bool FfxBoolean; -#if FFX_HLSL_6_2 +#if FFX_HLSL_SM>=62 + +/// @defgroup HLSL62Types HLSL 6.2 And Above Types +/// HLSL 6.2 and above type defines for all commonly used variables +/// +/// @ingroup HLSLTypes + +/// A typedef for a floating point value. +/// +/// @ingroup HLSL62Types typedef float32_t FfxFloat32; + +/// A typedef for a 2-dimensional floating point value. +/// +/// @ingroup HLSL62Types typedef float32_t2 FfxFloat32x2; + +/// A typedef for a 3-dimensional floating point value. +/// +/// @ingroup HLSL62Types typedef float32_t3 FfxFloat32x3; + +/// A typedef for a 4-dimensional floating point value. +/// +/// @ingroup HLSL62Types typedef float32_t4 FfxFloat32x4; +/// A [cacao_placeholder] typedef for matrix type until confirmed. +typedef float4x4 FfxFloat32x4x4; +typedef float3x4 FfxFloat32x3x4; +typedef float3x3 FfxFloat32x3x3; +typedef float2x2 FfxFloat32x2x2; + /// A typedef for a unsigned 32bit integer. /// -/// @ingroup GPU +/// @ingroup HLSL62Types typedef uint32_t FfxUInt32; + +/// A typedef for a 2-dimensional 32bit unsigned integer. +/// +/// @ingroup HLSL62Types typedef uint32_t2 FfxUInt32x2; + +/// A typedef for a 3-dimensional 32bit unsigned integer. +/// +/// @ingroup HLSL62Types typedef uint32_t3 FfxUInt32x3; + +/// A typedef for a 4-dimensional 32bit unsigned integer. +/// +/// @ingroup HLSL62Types typedef uint32_t4 FfxUInt32x4; + +/// A typedef for a signed 32bit integer. +/// +/// @ingroup HLSL62Types typedef int32_t FfxInt32; + +/// A typedef for a 2-dimensional signed 32bit integer. +/// +/// @ingroup HLSL62Types typedef int32_t2 FfxInt32x2; + +/// A typedef for a 3-dimensional signed 32bit integer. +/// +/// @ingroup HLSL62Types typedef int32_t3 FfxInt32x3; + +/// A typedef for a 4-dimensional signed 32bit integer. +/// +/// @ingroup HLSL62Types typedef int32_t4 FfxInt32x4; -#else + +#else // #if FFX_HLSL_SM>=62 + +/// @defgroup HLSLBaseTypes HLSL 6.1 And Below Types +/// HLSL 6.1 and below type defines for all commonly used variables +/// +/// @ingroup HLSLTypes + #define FfxFloat32 float #define FfxFloat32x2 float2 #define FfxFloat32x3 float3 #define FfxFloat32x4 float4 +/// A [cacao_placeholder] typedef for matrix type until confirmed. +#define FfxFloat32x4x4 float4x4 +#define FfxFloat32x3x4 float3x4 +#define FfxFloat32x3x3 float3x3 +#define FfxFloat32x2x2 float2x2 + /// A typedef for a unsigned 32bit integer. /// /// @ingroup GPU @@ -153,14 +265,18 @@ typedef uint FfxUInt32; typedef uint2 FfxUInt32x2; typedef uint3 FfxUInt32x3; typedef uint4 FfxUInt32x4; + typedef int FfxInt32; typedef int2 FfxInt32x2; typedef int3 FfxInt32x3; typedef int4 FfxInt32x4; -#endif // #if defined(FFX_HLSL_6_2) + +#endif // #if FFX_HLSL_SM>=62 #if FFX_HALF -#if FFX_HLSL_6_2 + +#if FFX_HLSL_SM >= 62 + typedef float16_t FfxFloat16; typedef float16_t2 FfxFloat16x2; typedef float16_t3 FfxFloat16x3; @@ -168,7 +284,7 @@ typedef float16_t4 FfxFloat16x4; /// A typedef for an unsigned 16bit integer. /// -/// @ingroup GPU +/// @ingroup HLSLTypes typedef uint16_t FfxUInt16; typedef uint16_t2 FfxUInt16x2; typedef uint16_t3 FfxUInt16x3; @@ -176,12 +292,12 @@ typedef uint16_t4 FfxUInt16x4; /// A typedef for a signed 16bit integer. /// -/// @ingroup GPU +/// @ingroup HLSLTypes typedef int16_t FfxInt16; typedef int16_t2 FfxInt16x2; typedef int16_t3 FfxInt16x3; typedef int16_t4 FfxInt16x4; -#else +#else // #if FFX_HLSL_SM>=62 typedef min16float FfxFloat16; typedef min16float2 FfxFloat16x2; typedef min16float3 FfxFloat16x3; @@ -189,7 +305,7 @@ typedef min16float4 FfxFloat16x4; /// A typedef for an unsigned 16bit integer. /// -/// @ingroup GPU +/// @ingroup HLSLTypes typedef min16uint FfxUInt16; typedef min16uint2 FfxUInt16x2; typedef min16uint3 FfxUInt16x3; @@ -197,19 +313,25 @@ typedef min16uint4 FfxUInt16x4; /// A typedef for a signed 16bit integer. /// -/// @ingroup GPU +/// @ingroup HLSLTypes typedef min16int FfxInt16; typedef min16int2 FfxInt16x2; typedef min16int3 FfxInt16x3; typedef min16int4 FfxInt16x4; -#endif // FFX_HLSL_6_2 +#endif // #if FFX_HLSL_SM>=62 + #endif // FFX_HALF + #endif // #if defined(FFX_HLSL) #if defined(FFX_GLSL) + +#define FfxFloat32Mat4 mat4 +#define FfxFloat32Mat3 mat3 + /// A typedef for a boolean value. /// -/// @ingroup GPU +/// @ingroup GLSLTypes #define FfxBoolean bool #define FfxFloat32 float #define FfxFloat32x2 vec2 @@ -223,6 +345,13 @@ typedef min16int4 FfxInt16x4; #define FfxInt32x2 ivec2 #define FfxInt32x3 ivec3 #define FfxInt32x4 ivec4 + +/// A [cacao_placeholder] typedef for matrix type until confirmed. +#define FfxFloat32x4x4 mat4 +#define FfxFloat32x3x4 mat4x3 +#define FfxFloat32x3x3 mat3 +#define FfxFloat32x2x2 mat2 + #if FFX_HALF #define FfxFloat16 float16_t #define FfxFloat16x2 f16vec2 @@ -241,11 +370,11 @@ typedef min16int4 FfxInt16x4; // Global toggles: // #define FFX_HALF (1) -// #define FFX_HLSL_6_2 (1) +// #define FFX_HLSL_SM (62) #if FFX_HALF -#if FFX_HLSL_6_2 +#if FFX_HLSL_SM >= 62 #define FFX_MIN16_SCALAR( TypeName, BaseComponentType ) typedef BaseComponentType##16_t TypeName; #define FFX_MIN16_VECTOR( TypeName, BaseComponentType, COL ) typedef vector TypeName; @@ -255,7 +384,7 @@ typedef min16int4 FfxInt16x4; #define FFX_16BIT_VECTOR( TypeName, BaseComponentType, COL ) typedef vector TypeName; #define FFX_16BIT_MATRIX( TypeName, BaseComponentType, ROW, COL ) typedef matrix TypeName; -#else //FFX_HLSL_6_2 +#else //FFX_HLSL_SM>=62 #define FFX_MIN16_SCALAR( TypeName, BaseComponentType ) typedef min16##BaseComponentType TypeName; #define FFX_MIN16_VECTOR( TypeName, BaseComponentType, COL ) typedef vector TypeName; @@ -265,7 +394,7 @@ typedef min16int4 FfxInt16x4; #define FFX_16BIT_VECTOR( TypeName, BaseComponentType, COL ) FFX_MIN16_VECTOR( TypeName, BaseComponentType, COL ); #define FFX_16BIT_MATRIX( TypeName, BaseComponentType, ROW, COL ) FFX_MIN16_MATRIX( TypeName, BaseComponentType, ROW, COL ); -#endif //FFX_HLSL_6_2 +#endif //FFX_HLSL_SM>=62 #else //FFX_HALF diff --git a/thirdparty/amd-fsr2/shaders/ffx_core.h b/thirdparty/amd-ffx/gpu/ffx_core.h similarity index 61% rename from thirdparty/amd-fsr2/shaders/ffx_core.h rename to thirdparty/amd-ffx/gpu/ffx_core.h index 4e687d6e3d6b..d1ed14419b78 100644 --- a/thirdparty/amd-fsr2/shaders/ffx_core.h +++ b/thirdparty/amd-ffx/gpu/ffx_core.h @@ -1,13 +1,14 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. -// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// // Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal +// of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell // copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: +// furnished to do so, subject to the following conditions : +// // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // @@ -19,17 +20,39 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. -/// @defgroup Core -/// @defgroup HLSL -/// @defgroup GLSL -/// @defgroup GPU -/// @defgroup CPU -/// @defgroup CAS -/// @defgroup FSR1 +/// @defgroup FfxGPU GPU +/// The FidelityFX SDK GPU References +/// +/// @ingroup ffxSDK + +/// @defgroup FfxHLSL HLSL References +/// FidelityFX SDK HLSL GPU References +/// +/// @ingroup FfxGPU + +/// @defgroup FfxGLSL GLSL References +/// FidelityFX SDK GLSL GPU References +/// +/// @ingroup FfxGPU + +/// @defgroup FfxGPUEffects FidelityFX GPU References +/// FidelityFX Effect GPU Reference Documentation +/// +/// @ingroup FfxGPU + +/// @defgroup GPUCore GPU Core +/// GPU defines and functions +/// +/// @ingroup FfxGPU #if !defined(FFX_CORE_H) #define FFX_CORE_H +#ifdef __hlsl_dx_compiler +#pragma dxc diagnostic push +#pragma dxc diagnostic ignored "-Wambig-lit-shift" +#endif //__hlsl_dx_compiler + #include "ffx_common_types.h" #if defined(FFX_CPU) @@ -49,4 +72,9 @@ #include "ffx_core_gpu_common_half.h" #include "ffx_core_portability.h" #endif // #if defined(FFX_GPU) -#endif // #if !defined(FFX_CORE_H) \ No newline at end of file + +#ifdef __hlsl_dx_compiler +#pragma dxc diagnostic pop +#endif //__hlsl_dx_compiler + +#endif // #if !defined(FFX_CORE_H) diff --git a/thirdparty/amd-fsr2/shaders/ffx_core_cpu.h b/thirdparty/amd-ffx/gpu/ffx_core_cpu.h similarity index 89% rename from thirdparty/amd-fsr2/shaders/ffx_core_cpu.h rename to thirdparty/amd-ffx/gpu/ffx_core_cpu.h index 3bf0295bfc62..4b6c41aa5b28 100644 --- a/thirdparty/amd-fsr2/shaders/ffx_core_cpu.h +++ b/thirdparty/amd-ffx/gpu/ffx_core_cpu.h @@ -1,13 +1,14 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. -// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// // Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal +// of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell // copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: +// furnished to do so, subject to the following conditions : +// // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // @@ -21,21 +22,26 @@ /// A define for a true value in a boolean expression. /// -/// @ingroup CPU +/// @ingroup CPUTypes #define FFX_TRUE (1) /// A define for a false value in a boolean expression. /// -/// @ingroup CPU +/// @ingroup CPUTypes #define FFX_FALSE (0) #if !defined(FFX_STATIC) /// A define to abstract declaration of static variables and functions. /// -/// @ingroup CPU +/// @ingroup CPUTypes #define FFX_STATIC static #endif // #if !defined(FFX_STATIC) +/// @defgroup CPUCore CPU Core +/// Core CPU-side defines and functions +/// +/// @ingroup ffxHost + #ifdef __clang__ #pragma clang diagnostic ignored "-Wunused-variable" #endif @@ -47,7 +53,7 @@ /// @returns /// An unsigned 32bit integer value containing the bit pattern of x. /// -/// @ingroup CPU +/// @ingroup CPUCore FFX_STATIC FfxUInt32 ffxAsUInt32(FfxFloat32 x) { union @@ -89,7 +95,7 @@ FFX_STATIC FfxFloat32 ffxDot4(FfxFloat32x4 a, FfxFloat32x4 b) /// @returns /// A linearly interpolated value between x and y according to t. /// -/// @ingroup CPU +/// @ingroup CPUCore FFX_STATIC FfxFloat32 ffxLerp(FfxFloat32 x, FfxFloat32 y, FfxFloat32 t) { return y * t + (-x * t + x); @@ -102,10 +108,10 @@ FFX_STATIC FfxFloat32 ffxLerp(FfxFloat32 x, FfxFloat32 y, FfxFloat32 t) /// @returns /// The reciprocal value of x. /// -/// @ingroup CPU -FFX_STATIC FfxFloat32 ffxReciprocal(FfxFloat32 a) +/// @ingroup CPUCore +FFX_STATIC FfxFloat32 ffxReciprocal(FfxFloat32 x) { - return 1.0f / a; + return 1.0f / x; } /// Compute the square root of a value. @@ -115,13 +121,13 @@ FFX_STATIC FfxFloat32 ffxReciprocal(FfxFloat32 a) /// @returns /// The the square root of x. /// -/// @ingroup CPU +/// @ingroup CPUCore FFX_STATIC FfxFloat32 ffxSqrt(FfxFloat32 x) { - return sqrt(x); + return FfxFloat32(sqrt(x)); } -FFX_STATIC FfxUInt32 AShrSU1(FfxUInt32 a, FfxUInt32 b) +FFX_STATIC FfxUInt32 ffxAShrSU1(FfxUInt32 a, FfxUInt32 b) { return FfxUInt32(FfxInt32(a) >> FfxInt32(b)); } @@ -135,10 +141,10 @@ FFX_STATIC FfxUInt32 AShrSU1(FfxUInt32 a, FfxUInt32 b) /// @returns /// The fractional part of x. /// -/// @ingroup CPU -FFX_STATIC FfxFloat32 ffxFract(FfxFloat32 a) +/// @ingroup CPUCore +FFX_STATIC FfxFloat32 ffxFract(FfxFloat32 x) { - return a - floor(a); + return x - FfxFloat32(floor(x)); } /// Compute the reciprocal square root of a value. @@ -148,10 +154,10 @@ FFX_STATIC FfxFloat32 ffxFract(FfxFloat32 a) /// @returns /// The reciprocal square root value of x. /// -/// @ingroup CPU -FFX_STATIC FfxFloat32 rsqrt(FfxFloat32 a) +/// @ingroup CPUCore +FFX_STATIC FfxFloat32 ffxRsqrt(FfxFloat32 x) { - return ffxReciprocal(ffxSqrt(a)); + return ffxReciprocal(ffxSqrt(x)); } FFX_STATIC FfxFloat32 ffxMin(FfxFloat32 x, FfxFloat32 y) @@ -181,16 +187,16 @@ FFX_STATIC FfxUInt32 ffxMax(FfxUInt32 x, FfxUInt32 y) /// @returns /// The clamped version of x. /// -/// @ingroup CPU -FFX_STATIC FfxFloat32 ffxSaturate(FfxFloat32 a) +/// @ingroup CPUCore +FFX_STATIC FfxFloat32 ffxSaturate(FfxFloat32 x) { - return ffxMin(1.0f, ffxMax(0.0f, a)); + return ffxMin(1.0f, ffxMax(0.0f, x)); } //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -FFX_STATIC void opAAddOneF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b) +FFX_STATIC void ffxOpAAddOneF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b) { d[0] = a[0] + b; d[1] = a[1] + b; @@ -198,7 +204,7 @@ FFX_STATIC void opAAddOneF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b) return; } -FFX_STATIC void opACpyF3(FfxFloat32x3 d, FfxFloat32x3 a) +FFX_STATIC void ffxOpACpyF3(FfxFloat32x3 d, FfxFloat32x3 a) { d[0] = a[0]; d[1] = a[1]; @@ -206,7 +212,7 @@ FFX_STATIC void opACpyF3(FfxFloat32x3 d, FfxFloat32x3 a) return; } -FFX_STATIC void opAMulF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32x3 b) +FFX_STATIC void ffxOpAMulF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32x3 b) { d[0] = a[0] * b[0]; d[1] = a[1] * b[1]; @@ -214,7 +220,7 @@ FFX_STATIC void opAMulF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32x3 b) return; } -FFX_STATIC void opAMulOneF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b) +FFX_STATIC void ffxOpAMulOneF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b) { d[0] = a[0] * b; d[1] = a[1] * b; @@ -222,7 +228,7 @@ FFX_STATIC void opAMulOneF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b) return; } -FFX_STATIC void opARcpF3(FfxFloat32x3 d, FfxFloat32x3 a) +FFX_STATIC void ffxOpARcpF3(FfxFloat32x3 d, FfxFloat32x3 a) { d[0] = ffxReciprocal(a[0]); d[1] = ffxReciprocal(a[1]); @@ -245,8 +251,8 @@ FFX_STATIC void opARcpF3(FfxFloat32x3 d, FfxFloat32x3 a) /// @returns /// The closest 16bit floating point value to f. /// -/// @ingroup CPU -FFX_STATIC FfxUInt32 f32tof16(FfxFloat32 f) +/// @ingroup CPUCore +FFX_STATIC FfxUInt32 ffxF32ToF16(FfxFloat32 f) { static FfxUInt16 base[512] = { 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, @@ -320,13 +326,13 @@ FFX_STATIC FfxUInt32 f32tof16(FfxFloat32 f) /// point representation, and then stores the X and Y components in the lower and upper 16 bits of the /// 32bit unsigned integer respectively. /// -/// @param [in] value A 2-dimensional floating point value to convert and pack. +/// @param [in] x A 2-dimensional floating point value to convert and pack. /// /// @returns /// A packed 32bit value containing 2 16bit floating point values. /// -/// @ingroup CPU -FFX_STATIC FfxUInt32 packHalf2x16(FfxFloat32x2 a) +/// @ingroup CPUCore +FFX_STATIC FfxUInt32 ffxPackHalf2x16(FfxFloat32x2 x) { - return f32tof16(a[0]) + (f32tof16(a[1]) << 16); + return ffxF32ToF16(x[0]) + (ffxF32ToF16(x[1]) << 16); } diff --git a/thirdparty/amd-fsr2/shaders/ffx_core_glsl.h b/thirdparty/amd-ffx/gpu/ffx_core_glsl.h similarity index 81% rename from thirdparty/amd-fsr2/shaders/ffx_core_glsl.h rename to thirdparty/amd-ffx/gpu/ffx_core_glsl.h index 6ec58f3c625c..c8dccacdbc44 100644 --- a/thirdparty/amd-fsr2/shaders/ffx_core_glsl.h +++ b/thirdparty/amd-ffx/gpu/ffx_core_glsl.h @@ -1,13 +1,14 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. -// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// // Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal +// of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell // copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: +// furnished to do so, subject to the following conditions : +// // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // @@ -19,180 +20,230 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. +/// @defgroup GLSLCore GLSL Core +/// GLSL core defines and functions +/// +/// @ingroup FfxGLSL + +/// A define for abstracting select functionality for pre/post HLSL 21 +/// +/// @ingroup GLSLCore +#define FFX_SELECT(cond, arg1, arg2) cond ? arg1 : arg2 + /// A define for abstracting shared memory between shading languages. /// -/// @ingroup GPU +/// @ingroup GLSLCore #define FFX_GROUPSHARED shared /// A define for abstracting compute memory barriers between shading languages. /// -/// @ingroup GPU -#define FFX_GROUP_MEMORY_BARRIER() barrier() +/// @ingroup GLSLCore +#define FFX_GROUP_MEMORY_BARRIER groupMemoryBarrier(); barrier() + +/// A define for abstracting compute atomic additions between shading languages. +/// +/// @ingroup GLSLCore +#define FFX_ATOMIC_ADD(x, y) atomicAdd(x, y) + +/// A define for abstracting compute atomic additions between shading languages. +/// +/// @ingroup GLSLCore +#define FFX_ATOMIC_ADD_RETURN(x, y, r) r = atomicAdd(x, y) + +/// A define for abstracting compute atomic OR between shading languages. +/// +/// @ingroup GLSLCore +#define FFX_ATOMIC_OR(x, y) atomicOr(x, y) + +/// A define for abstracting compute atomic min between shading languages. +/// +/// @ingroup GLSLCore +#define FFX_ATOMIC_MIN(x, y) atomicMin(x, y) + +/// A define for abstracting compute atomic max between shading languages. +/// +/// @ingroup GLSLCore +#define FFX_ATOMIC_MAX(x, y) atomicMax(x, y) /// A define added to accept static markup on functions to aid CPU/GPU portability of code. /// -/// @ingroup GPU +/// @ingroup GLSLCore #define FFX_STATIC /// A define for abstracting loop unrolling between shading languages. /// -/// @ingroup GPU +/// @ingroup GLSLCore #define FFX_UNROLL /// A define for abstracting a 'greater than' comparison operator between two types. /// -/// @ingroup GPU +/// @ingroup GLSLCore #define FFX_GREATER_THAN(x, y) greaterThan(x, y) /// A define for abstracting a 'greater than or equal' comparison operator between two types. /// -/// @ingroup GPU +/// @ingroup GLSLCore #define FFX_GREATER_THAN_EQUAL(x, y) greaterThanEqual(x, y) /// A define for abstracting a 'less than' comparison operator between two types. /// -/// @ingroup GPU +/// @ingroup GLSLCore #define FFX_LESS_THAN(x, y) lessThan(x, y) /// A define for abstracting a 'less than or equal' comparison operator between two types. /// -/// @ingroup GPU +/// @ingroup GLSLCore #define FFX_LESS_THAN_EQUAL(x, y) lessThanEqual(x, y) /// A define for abstracting an 'equal' comparison operator between two types. /// -/// @ingroup GPU +/// @ingroup GLSLCore #define FFX_EQUAL(x, y) equal(x, y) /// A define for abstracting a 'not equal' comparison operator between two types. /// -/// @ingroup GPU +/// @ingroup GLSLCore #define FFX_NOT_EQUAL(x, y) notEqual(x, y) +/// A define for abstracting matrix multiply operations between shading languages. +/// +/// @ingroup GLSLCore +#define FFX_MATRIX_MULTIPLY(a, b) (a * b) + +/// A define for abstracting vector transformations between shading languages. +/// +/// @ingroup GLSLCore +#define FFX_TRANSFORM_VECTOR(a, b) (a * b) + +/// A define for abstracting modulo operations between shading languages. +/// +/// @ingroup GLSLCore +#define FFX_MODULO(a, b) (mod(a, b)) + /// Broadcast a scalar value to a 1-dimensional floating point vector. /// -/// @ingroup GPU +/// @ingroup GLSLCore #define FFX_BROADCAST_FLOAT32(x) FfxFloat32(x) /// Broadcast a scalar value to a 2-dimensional floating point vector. /// -/// @ingroup GPU +/// @ingroup GLSLCore #define FFX_BROADCAST_FLOAT32X2(x) FfxFloat32x2(FfxFloat32(x)) /// Broadcast a scalar value to a 3-dimensional floating point vector. /// -/// @ingroup GPU +/// @ingroup GLSLCore #define FFX_BROADCAST_FLOAT32X3(x) FfxFloat32x3(FfxFloat32(x)) /// Broadcast a scalar value to a 4-dimensional floating point vector. /// -/// @ingroup GPU +/// @ingroup GLSLCore #define FFX_BROADCAST_FLOAT32X4(x) FfxFloat32x4(FfxFloat32(x)) /// Broadcast a scalar value to a 1-dimensional unsigned integer vector. /// -/// @ingroup GPU +/// @ingroup GLSLCore #define FFX_BROADCAST_UINT32(x) FfxUInt32(x) /// Broadcast a scalar value to a 2-dimensional unsigned integer vector. /// -/// @ingroup GPU +/// @ingroup GLSLCore #define FFX_BROADCAST_UINT32X2(x) FfxUInt32x2(FfxUInt32(x)) /// Broadcast a scalar value to a 3-dimensional unsigned integer vector. /// -/// @ingroup GPU +/// @ingroup GLSLCore #define FFX_BROADCAST_UINT32X3(x) FfxUInt32x3(FfxUInt32(x)) /// Broadcast a scalar value to a 4-dimensional unsigned integer vector. /// -/// @ingroup GPU +/// @ingroup GLSLCore #define FFX_BROADCAST_UINT32X4(x) FfxUInt32x4(FfxUInt32(x)) /// Broadcast a scalar value to a 1-dimensional signed integer vector. /// -/// @ingroup GPU +/// @ingroup GLSLCore #define FFX_BROADCAST_INT32(x) FfxInt32(x) /// Broadcast a scalar value to a 2-dimensional signed integer vector. /// -/// @ingroup GPU +/// @ingroup GLSLCore #define FFX_BROADCAST_INT32X2(x) FfxInt32x2(FfxInt32(x)) /// Broadcast a scalar value to a 3-dimensional signed integer vector. /// -/// @ingroup GPU +/// @ingroup GLSLCore #define FFX_BROADCAST_INT32X3(x) FfxInt32x3(FfxInt32(x)) /// Broadcast a scalar value to a 4-dimensional signed integer vector. /// -/// @ingroup GPU +/// @ingroup GLSLCore #define FFX_BROADCAST_INT32X4(x) FfxInt32x4(FfxInt32(x)) /// Broadcast a scalar value to a 1-dimensional half-precision floating point vector. /// -/// @ingroup GPU +/// @ingroup GLSLCore #define FFX_BROADCAST_MIN_FLOAT16(x) FFX_MIN16_F(x) /// Broadcast a scalar value to a 2-dimensional half-precision floating point vector. /// -/// @ingroup GPU +/// @ingroup GLSLCore #define FFX_BROADCAST_MIN_FLOAT16X2(x) FFX_MIN16_F2(FFX_MIN16_F(x)) /// Broadcast a scalar value to a 3-dimensional half-precision floating point vector. /// -/// @ingroup GPU +/// @ingroup GLSLCore #define FFX_BROADCAST_MIN_FLOAT16X3(x) FFX_MIN16_F3(FFX_MIN16_F(x)) /// Broadcast a scalar value to a 4-dimensional half-precision floating point vector. /// -/// @ingroup GPU +/// @ingroup GLSLCore #define FFX_BROADCAST_MIN_FLOAT16X4(x) FFX_MIN16_F4(FFX_MIN16_F(x)) /// Broadcast a scalar value to a 1-dimensional half-precision unsigned integer vector. /// -/// @ingroup GPU +/// @ingroup GLSLCore #define FFX_BROADCAST_MIN_UINT16(x) FFX_MIN16_U(x) /// Broadcast a scalar value to a 2-dimensional half-precision unsigned integer vector. /// -/// @ingroup GPU +/// @ingroup GLSLCore #define FFX_BROADCAST_MIN_UINT16X2(x) FFX_MIN16_U2(FFX_MIN16_U(x)) /// Broadcast a scalar value to a 3-dimensional half-precision unsigned integer vector. /// -/// @ingroup GPU +/// @ingroup GLSLCore #define FFX_BROADCAST_MIN_UINT16X3(x) FFX_MIN16_U3(FFX_MIN16_U(x)) /// Broadcast a scalar value to a 4-dimensional half-precision unsigned integer vector. /// -/// @ingroup GPU +/// @ingroup GLSLCore #define FFX_BROADCAST_MIN_UINT16X4(x) FFX_MIN16_U4(FFX_MIN16_U(x)) /// Broadcast a scalar value to a 1-dimensional half-precision signed integer vector. /// -/// @ingroup GPU +/// @ingroup GLSLCore #define FFX_BROADCAST_MIN_INT16(x) FFX_MIN16_I(x) /// Broadcast a scalar value to a 2-dimensional half-precision signed integer vector. /// -/// @ingroup GPU +/// @ingroup GLSLCore #define FFX_BROADCAST_MIN_INT16X2(x) FFX_MIN16_I2(FFX_MIN16_I(x)) /// Broadcast a scalar value to a 3-dimensional half-precision signed integer vector. /// -/// @ingroup GPU +/// @ingroup GLSLCore #define FFX_BROADCAST_MIN_INT16X3(x) FFX_MIN16_I3(FFX_MIN16_I(x)) /// Broadcast a scalar value to a 4-dimensional half-precision signed integer vector. /// -/// @ingroup GPU +/// @ingroup GLSLCore #define FFX_BROADCAST_MIN_INT16X4(x) FFX_MIN16_I4(FFX_MIN16_I(x)) + #extension GL_EXT_shader_explicit_arithmetic_types : require #if !defined(FFX_SKIP_EXT) #if FFX_HALF #extension GL_EXT_shader_16bit_storage : require - #extension GL_EXT_shader_explicit_arithmetic_types : require #endif // FFX_HALF #if defined(FFX_LONG) @@ -216,12 +267,12 @@ FfxFloat32x4 ffxSqrt(FfxFloat32x4 x); /// Interprets the bit pattern of x as a floating-point number. /// -/// @param [in] value The input value. +/// @param [in] x The input value. /// /// @returns /// The input interpreted as a floating-point number. /// -/// @ingroup GLSL +/// @ingroup GLSLCore FfxFloat32 ffxAsFloat(FfxUInt32 x) { return uintBitsToFloat(x); @@ -229,12 +280,12 @@ FfxFloat32 ffxAsFloat(FfxUInt32 x) /// Interprets the bit pattern of x as a floating-point number. /// -/// @param [in] value The input value. +/// @param [in] x The input value. /// /// @returns /// The input interpreted as a floating-point number. /// -/// @ingroup GLSL +/// @ingroup GLSLCore FfxFloat32x2 ffxAsFloat(FfxUInt32x2 x) { return uintBitsToFloat(x); @@ -242,12 +293,12 @@ FfxFloat32x2 ffxAsFloat(FfxUInt32x2 x) /// Interprets the bit pattern of x as a floating-point number. /// -/// @param [in] value The input value. +/// @param [in] x The input value. /// /// @returns /// The input interpreted as a floating-point number. /// -/// @ingroup GLSL +/// @ingroup GLSLCore FfxFloat32x3 ffxAsFloat(FfxUInt32x3 x) { return uintBitsToFloat(x); @@ -255,12 +306,12 @@ FfxFloat32x3 ffxAsFloat(FfxUInt32x3 x) /// Interprets the bit pattern of x as a floating-point number. /// -/// @param [in] value The input value. +/// @param [in] x The input value. /// /// @returns /// The input interpreted as a floating-point number. /// -/// @ingroup GLSL +/// @ingroup GLSLCore FfxFloat32x4 ffxAsFloat(FfxUInt32x4 x) { return uintBitsToFloat(x); @@ -268,12 +319,12 @@ FfxFloat32x4 ffxAsFloat(FfxUInt32x4 x) /// Interprets the bit pattern of x as an unsigned integer. /// -/// @param [in] value The input value. +/// @param [in] x The input value. /// /// @returns /// The input interpreted as an unsigned integer. /// -/// @ingroup GLSL +/// @ingroup GLSLCore FfxUInt32 ffxAsUInt32(FfxFloat32 x) { return floatBitsToUint(x); @@ -281,12 +332,12 @@ FfxUInt32 ffxAsUInt32(FfxFloat32 x) /// Interprets the bit pattern of x as an unsigned integer. /// -/// @param [in] value The input value. +/// @param [in] x The input value. /// /// @returns /// The input interpreted as an unsigned integer. /// -/// @ingroup GLSL +/// @ingroup GLSLCore FfxUInt32x2 ffxAsUInt32(FfxFloat32x2 x) { return floatBitsToUint(x); @@ -294,12 +345,12 @@ FfxUInt32x2 ffxAsUInt32(FfxFloat32x2 x) /// Interprets the bit pattern of x as an unsigned integer. /// -/// @param [in] value The input value. +/// @param [in] x The input value. /// /// @returns /// The input interpreted as an unsigned integer. /// -/// @ingroup GLSL +/// @ingroup GLSLCore FfxUInt32x3 ffxAsUInt32(FfxFloat32x3 x) { return floatBitsToUint(x); @@ -307,26 +358,43 @@ FfxUInt32x3 ffxAsUInt32(FfxFloat32x3 x) /// Interprets the bit pattern of x as an unsigned integer. /// -/// @param [in] value The input value. +/// @param [in] x The input value. /// /// @returns /// The input interpreted as an unsigned integer. /// -/// @ingroup GLSL +/// @ingroup GLSLCore FfxUInt32x4 ffxAsUInt32(FfxFloat32x4 x) { return floatBitsToUint(x); } +/// Pack 2x32-bit floating point values in a single 32bit value. +/// +/// This function first converts each component of value into their nearest 16-bit floating +/// point representation, and then stores the X and Y components in the lower and upper 16 bits of the +/// 32bit unsigned integer respectively. +/// +/// @param [in] value A 2-dimensional floating point value to convert and pack. +/// +/// @returns +/// A packed 32bit value containing 2 16bit floating point values. +/// +/// @ingroup GLSLCore +FfxUInt32 ffxPackHalf2x16(FfxFloat32x2 value) +{ + return packHalf2x16(value); +} + /// Convert a 32bit IEEE 754 floating point value to its nearest 16bit equivalent. /// /// @param [in] value The value to convert. -/// +/// /// @returns /// The nearest 16bit equivalent of value. -/// -/// @ingroup GLSL -FfxUInt32 f32tof16(FfxFloat32 value) +/// +/// @ingroup GLSLCore +FfxUInt32 ffxF32ToF16(FfxFloat32 value) { return packHalf2x16(FfxFloat32x2(value, 0.0)); } @@ -338,7 +406,7 @@ FfxUInt32 f32tof16(FfxFloat32 value) /// @returns /// A 2-dimensional floating point vector with value in each component. /// -/// @ingroup GLSL +/// @ingroup GLSLCore FfxFloat32x2 ffxBroadcast2(FfxFloat32 value) { return FfxFloat32x2(value, value); @@ -351,7 +419,7 @@ FfxFloat32x2 ffxBroadcast2(FfxFloat32 value) /// @returns /// A 3-dimensional floating point vector with value in each component. /// -/// @ingroup GLSL +/// @ingroup GLSLCore FfxFloat32x3 ffxBroadcast3(FfxFloat32 value) { return FfxFloat32x3(value, value, value); @@ -364,7 +432,7 @@ FfxFloat32x3 ffxBroadcast3(FfxFloat32 value) /// @returns /// A 4-dimensional floating point vector with value in each component. /// -/// @ingroup GLSL +/// @ingroup GLSLCore FfxFloat32x4 ffxBroadcast4(FfxFloat32 value) { return FfxFloat32x4(value, value, value, value); @@ -377,7 +445,7 @@ FfxFloat32x4 ffxBroadcast4(FfxFloat32 value) /// @returns /// A 2-dimensional signed integer vector with value in each component. /// -/// @ingroup GLSL +/// @ingroup GLSLCore FfxInt32x2 ffxBroadcast2(FfxInt32 value) { return FfxInt32x2(value, value); @@ -390,7 +458,7 @@ FfxInt32x2 ffxBroadcast2(FfxInt32 value) /// @returns /// A 3-dimensional signed integer vector with value in each component. /// -/// @ingroup GLSL +/// @ingroup GLSLCore FfxInt32x3 ffxBroadcast3(FfxInt32 value) { return FfxInt32x3(value, value, value); @@ -403,7 +471,7 @@ FfxInt32x3 ffxBroadcast3(FfxInt32 value) /// @returns /// A 4-dimensional signed integer vector with value in each component. /// -/// @ingroup GLSL +/// @ingroup GLSLCore FfxInt32x4 ffxBroadcast4(FfxInt32 value) { return FfxInt32x4(value, value, value, value); @@ -416,7 +484,7 @@ FfxInt32x4 ffxBroadcast4(FfxInt32 value) /// @returns /// A 2-dimensional unsigned integer vector with value in each component. /// -/// @ingroup GLSL +/// @ingroup GLSLCore FfxUInt32x2 ffxBroadcast2(FfxUInt32 value) { return FfxUInt32x2(value, value); @@ -429,7 +497,7 @@ FfxUInt32x2 ffxBroadcast2(FfxUInt32 value) /// @returns /// A 3-dimensional unsigned integer vector with value in each component. /// -/// @ingroup GLSL +/// @ingroup GLSLCore FfxUInt32x3 ffxBroadcast3(FfxUInt32 value) { return FfxUInt32x3(value, value, value); @@ -442,7 +510,7 @@ FfxUInt32x3 ffxBroadcast3(FfxUInt32 value) /// @returns /// A 4-dimensional unsigned integer vector with value in each component. /// -/// @ingroup GLSL +/// @ingroup GLSLCore FfxUInt32x4 ffxBroadcast4(FfxUInt32 value) { return FfxUInt32x4(value, value, value, value); @@ -450,16 +518,16 @@ FfxUInt32x4 ffxBroadcast4(FfxUInt32 value) /// /// -/// @ingroup GLSL -FfxUInt32 bitfieldExtract(FfxUInt32 src, FfxUInt32 off, FfxUInt32 bits) +/// @ingroup GLSLCore +FfxUInt32 ffxBitfieldExtract(FfxUInt32 src, FfxUInt32 off, FfxUInt32 bits) { return bitfieldExtract(src, FfxInt32(off), FfxInt32(bits)); } /// /// -/// @ingroup GLSL -FfxUInt32 bitfieldInsert(FfxUInt32 src, FfxUInt32 ins, FfxUInt32 mask) +/// @ingroup GLSLCore +FfxUInt32 ffxBitfieldInsert(FfxUInt32 src, FfxUInt32 ins, FfxUInt32 mask) { return (ins & mask) | (src & (~mask)); } @@ -467,8 +535,8 @@ FfxUInt32 bitfieldInsert(FfxUInt32 src, FfxUInt32 ins, FfxUInt32 mask) // Proxy for V_BFI_B32 where the 'mask' is set as 'bits', 'mask=(1<x and y according to t. /// -/// @ingroup GLSL +/// @ingroup GLSLCore FfxFloat32 ffxLerp(FfxFloat32 x, FfxFloat32 y, FfxFloat32 t) { return mix(x, y, t); @@ -507,7 +575,7 @@ FfxFloat32 ffxLerp(FfxFloat32 x, FfxFloat32 y, FfxFloat32 t) /// @returns /// A linearly interpolated value between x and y according to t. /// -/// @ingroup GLSL +/// @ingroup GLSLCore FfxFloat32x2 ffxLerp(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32 t) { return mix(x, y, t); @@ -527,7 +595,7 @@ FfxFloat32x2 ffxLerp(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32 t) /// @returns /// A linearly interpolated value between x and y according to t. /// -/// @ingroup GLSL +/// @ingroup GLSLCore FfxFloat32x2 ffxLerp(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 t) { return mix(x, y, t); @@ -547,7 +615,7 @@ FfxFloat32x2 ffxLerp(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 t) /// @returns /// A linearly interpolated value between x and y according to t. /// -/// @ingroup GLSL +/// @ingroup GLSLCore FfxFloat32x3 ffxLerp(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32 t) { return mix(x, y, t); @@ -567,7 +635,7 @@ FfxFloat32x3 ffxLerp(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32 t) /// @returns /// A linearly interpolated value between x and y according to t. /// -/// @ingroup GLSL +/// @ingroup GLSLCore FfxFloat32x3 ffxLerp(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 t) { return mix(x, y, t); @@ -587,7 +655,7 @@ FfxFloat32x3 ffxLerp(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 t) /// @returns /// A linearly interpolated value between x and y according to t. /// -/// @ingroup GLSL +/// @ingroup GLSLCore FfxFloat32x4 ffxLerp(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32 t) { return mix(x, y, t); @@ -607,7 +675,7 @@ FfxFloat32x4 ffxLerp(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32 t) /// @returns /// A linearly interpolated value between x and y according to t. /// -/// @ingroup GLSL +/// @ingroup GLSLCore FfxFloat32x4 ffxLerp(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 t) { return mix(x, y, t); @@ -625,7 +693,7 @@ FfxFloat32x4 ffxLerp(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 t) /// @returns /// The maximum value of x, y, and z. /// -/// @ingroup GLSL +/// @ingroup GLSLCore FfxFloat32 ffxMax3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z) { return max(x, max(y, z)); @@ -643,7 +711,7 @@ FfxFloat32 ffxMax3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z) /// @returns /// The maximum value of x, y, and z. /// -/// @ingroup GLSL +/// @ingroup GLSLCore FfxFloat32x2 ffxMax3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z) { return max(x, max(y, z)); @@ -661,7 +729,7 @@ FfxFloat32x2 ffxMax3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z) /// @returns /// The maximum value of x, y, and z. /// -/// @ingroup GLSL +/// @ingroup GLSLCore FfxFloat32x3 ffxMax3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z) { return max(x, max(y, z)); @@ -679,7 +747,7 @@ FfxFloat32x3 ffxMax3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z) /// @returns /// The maximum value of x, y, and z. /// -/// @ingroup GLSL +/// @ingroup GLSLCore FfxFloat32x4 ffxMax3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z) { return max(x, max(y, z)); @@ -697,7 +765,7 @@ FfxFloat32x4 ffxMax3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z) /// @returns /// The maximum value of x, y, and z. /// -/// @ingroup GLSL +/// @ingroup GLSLCore FfxUInt32 ffxMax3(FfxUInt32 x, FfxUInt32 y, FfxUInt32 z) { return max(x, max(y, z)); @@ -715,7 +783,7 @@ FfxUInt32 ffxMax3(FfxUInt32 x, FfxUInt32 y, FfxUInt32 z) /// @returns /// The maximum value of x, y, and z. /// -/// @ingroup GLSL +/// @ingroup GLSLCore FfxUInt32x2 ffxMax3(FfxUInt32x2 x, FfxUInt32x2 y, FfxUInt32x2 z) { return max(x, max(y, z)); @@ -733,7 +801,7 @@ FfxUInt32x2 ffxMax3(FfxUInt32x2 x, FfxUInt32x2 y, FfxUInt32x2 z) /// @returns /// The maximum value of x, y, and z. /// -/// @ingroup GLSL +/// @ingroup GLSLCore FfxUInt32x3 ffxMax3(FfxUInt32x3 x, FfxUInt32x3 y, FfxUInt32x3 z) { return max(x, max(y, z)); @@ -751,7 +819,7 @@ FfxUInt32x3 ffxMax3(FfxUInt32x3 x, FfxUInt32x3 y, FfxUInt32x3 z) /// @returns /// The maximum value of x, y, and z. /// -/// @ingroup GLSL +/// @ingroup GLSLCore FfxUInt32x4 ffxMax3(FfxUInt32x4 x, FfxUInt32x4 y, FfxUInt32x4 z) { return max(x, max(y, z)); @@ -769,7 +837,7 @@ FfxUInt32x4 ffxMax3(FfxUInt32x4 x, FfxUInt32x4 y, FfxUInt32x4 z) /// @returns /// The median value of x, y, and z. /// -/// @ingroup GLSL +/// @ingroup GLSLCore FfxFloat32 ffxMed3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z) { return max(min(x, y), min(max(x, y), z)); @@ -787,7 +855,7 @@ FfxFloat32 ffxMed3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z) /// @returns /// The median value of x, y, and z. /// -/// @ingroup GLSL +/// @ingroup GLSLCore FfxFloat32x2 ffxMed3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z) { return max(min(x, y), min(max(x, y), z)); @@ -805,7 +873,7 @@ FfxFloat32x2 ffxMed3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z) /// @returns /// The median value of x, y, and z. /// -/// @ingroup GLSL +/// @ingroup GLSLCore FfxFloat32x3 ffxMed3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z) { return max(min(x, y), min(max(x, y), z)); @@ -823,7 +891,7 @@ FfxFloat32x3 ffxMed3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z) /// @returns /// The median value of x, y, and z. /// -/// @ingroup GLSL +/// @ingroup GLSLCore FfxFloat32x4 ffxMed3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z) { return max(min(x, y), min(max(x, y), z)); @@ -841,7 +909,7 @@ FfxFloat32x4 ffxMed3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z) /// @returns /// The median value of x, y, and z. /// -/// @ingroup GLSL +/// @ingroup GLSLCore FfxInt32 ffxMed3(FfxInt32 x, FfxInt32 y, FfxInt32 z) { return max(min(x, y), min(max(x, y), z)); @@ -859,7 +927,7 @@ FfxInt32 ffxMed3(FfxInt32 x, FfxInt32 y, FfxInt32 z) /// @returns /// The median value of x, y, and z. /// -/// @ingroup GLSL +/// @ingroup GLSLCore FfxInt32x2 ffxMed3(FfxInt32x2 x, FfxInt32x2 y, FfxInt32x2 z) { return max(min(x, y), min(max(x, y), z)); @@ -877,7 +945,7 @@ FfxInt32x2 ffxMed3(FfxInt32x2 x, FfxInt32x2 y, FfxInt32x2 z) /// @returns /// The median value of x, y, and z. /// -/// @ingroup GLSL +/// @ingroup GLSLCore FfxInt32x3 ffxMed3(FfxInt32x3 x, FfxInt32x3 y, FfxInt32x3 z) { return max(min(x, y), min(max(x, y), z)); @@ -895,7 +963,7 @@ FfxInt32x3 ffxMed3(FfxInt32x3 x, FfxInt32x3 y, FfxInt32x3 z) /// @returns /// The median value of x, y, and z. /// -/// @ingroup GLSL +/// @ingroup GLSLCore FfxInt32x4 ffxMed3(FfxInt32x4 x, FfxInt32x4 y, FfxInt32x4 z) { return max(min(x, y), min(max(x, y), z)); @@ -914,7 +982,7 @@ FfxInt32x4 ffxMed3(FfxInt32x4 x, FfxInt32x4 y, FfxInt32x4 z) /// @returns /// The minimum value of x, y, and z. /// -/// @ingroup GLSL +/// @ingroup GLSLCore FfxFloat32 ffxMin3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z) { return min(x, min(y, z)); @@ -932,7 +1000,7 @@ FfxFloat32 ffxMin3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z) /// @returns /// The minimum value of x, y, and z. /// -/// @ingroup GLSL +/// @ingroup GLSLCore FfxFloat32x2 ffxMin3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z) { return min(x, min(y, z)); @@ -950,7 +1018,7 @@ FfxFloat32x2 ffxMin3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z) /// @returns /// The minimum value of x, y, and z. /// -/// @ingroup GLSL +/// @ingroup GLSLCore FfxFloat32x3 ffxMin3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z) { return min(x, min(y, z)); @@ -968,7 +1036,7 @@ FfxFloat32x3 ffxMin3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z) /// @returns /// The minimum value of x, y, and z. /// -/// @ingroup GLSL +/// @ingroup GLSLCore FfxFloat32x4 ffxMin3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z) { return min(x, min(y, z)); @@ -986,7 +1054,7 @@ FfxFloat32x4 ffxMin3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z) /// @returns /// The minimum value of x, y, and z. /// -/// @ingroup GLSL +/// @ingroup GLSLCore FfxUInt32 ffxMin3(FfxUInt32 x, FfxUInt32 y, FfxUInt32 z) { return min(x, min(y, z)); @@ -1004,7 +1072,7 @@ FfxUInt32 ffxMin3(FfxUInt32 x, FfxUInt32 y, FfxUInt32 z) /// @returns /// The minimum value of x, y, and z. /// -/// @ingroup GLSL +/// @ingroup GLSLCore FfxUInt32x2 ffxMin3(FfxUInt32x2 x, FfxUInt32x2 y, FfxUInt32x2 z) { return min(x, min(y, z)); @@ -1022,7 +1090,7 @@ FfxUInt32x2 ffxMin3(FfxUInt32x2 x, FfxUInt32x2 y, FfxUInt32x2 z) /// @returns /// The minimum value of x, y, and z. /// -/// @ingroup GLSL +/// @ingroup GLSLCore FfxUInt32x3 ffxMin3(FfxUInt32x3 x, FfxUInt32x3 y, FfxUInt32x3 z) { return min(x, min(y, z)); @@ -1040,7 +1108,7 @@ FfxUInt32x3 ffxMin3(FfxUInt32x3 x, FfxUInt32x3 y, FfxUInt32x3 z) /// @returns /// The minimum value of x, y, and z. /// -/// @ingroup GLSL +/// @ingroup GLSLCore FfxUInt32x4 ffxMin3(FfxUInt32x4 x, FfxUInt32x4 y, FfxUInt32x4 z) { return min(x, min(y, z)); @@ -1054,9 +1122,9 @@ FfxUInt32x4 ffxMin3(FfxUInt32x4 x, FfxUInt32x4 y, FfxUInt32x4 z) /// /// @returns /// The reciprocal value of x. -/// -/// @ingroup GLSL -FfxFloat32 rcp(FfxFloat32 x) +/// +/// @ingroup GLSLCore +FfxFloat32 ffxReciprocal(FfxFloat32 x) { return FfxFloat32(1.0) / x; } @@ -1070,8 +1138,8 @@ FfxFloat32 rcp(FfxFloat32 x) /// @returns /// The reciprocal value of x. /// -/// @ingroup GLSL -FfxFloat32x2 rcp(FfxFloat32x2 x) +/// @ingroup GLSLCore +FfxFloat32x2 ffxReciprocal(FfxFloat32x2 x) { return ffxBroadcast2(1.0) / x; } @@ -1085,8 +1153,8 @@ FfxFloat32x2 rcp(FfxFloat32x2 x) /// @returns /// The reciprocal value of x. /// -/// @ingroup GLSL -FfxFloat32x3 rcp(FfxFloat32x3 x) +/// @ingroup GLSLCore +FfxFloat32x3 ffxReciprocal(FfxFloat32x3 x) { return ffxBroadcast3(1.0) / x; } @@ -1100,8 +1168,8 @@ FfxFloat32x3 rcp(FfxFloat32x3 x) /// @returns /// The reciprocal value of x. /// -/// @ingroup GLSL -FfxFloat32x4 rcp(FfxFloat32x4 x) +/// @ingroup GLSLCore +FfxFloat32x4 ffxReciprocal(FfxFloat32x4 x) { return ffxBroadcast4(1.0) / x; } @@ -1115,8 +1183,8 @@ FfxFloat32x4 rcp(FfxFloat32x4 x) /// @returns /// The reciprocal square root value of x. /// -/// @ingroup GLSL -FfxFloat32 rsqrt(FfxFloat32 x) +/// @ingroup GLSLCore +FfxFloat32 ffxRsqrt(FfxFloat32 x) { return FfxFloat32(1.0) / ffxSqrt(x); } @@ -1130,8 +1198,8 @@ FfxFloat32 rsqrt(FfxFloat32 x) /// @returns /// The reciprocal square root value of x. /// -/// @ingroup GLSL -FfxFloat32x2 rsqrt(FfxFloat32x2 x) +/// @ingroup GLSLCore +FfxFloat32x2 ffxRsqrt(FfxFloat32x2 x) { return ffxBroadcast2(1.0) / ffxSqrt(x); } @@ -1145,8 +1213,8 @@ FfxFloat32x2 rsqrt(FfxFloat32x2 x) /// @returns /// The reciprocal square root value of x. /// -/// @ingroup GLSL -FfxFloat32x3 rsqrt(FfxFloat32x3 x) +/// @ingroup GLSLCore +FfxFloat32x3 ffxRsqrt(FfxFloat32x3 x) { return ffxBroadcast3(1.0) / ffxSqrt(x); } @@ -1160,7 +1228,7 @@ FfxFloat32x3 rsqrt(FfxFloat32x3 x) /// @returns /// The reciprocal square root value of x. /// -/// @ingroup GLSL +/// @ingroup GLSLCore FfxFloat32x4 rsqrt(FfxFloat32x4 x) { return ffxBroadcast4(1.0) / ffxSqrt(x); @@ -1173,7 +1241,7 @@ FfxFloat32x4 rsqrt(FfxFloat32x4 x) /// @returns /// The clamped version of x. /// -/// @ingroup GLSL +/// @ingroup GLSLCore FfxFloat32 ffxSaturate(FfxFloat32 x) { return clamp(x, FfxFloat32(0.0), FfxFloat32(1.0)); @@ -1186,7 +1254,7 @@ FfxFloat32 ffxSaturate(FfxFloat32 x) /// @returns /// The clamped version of x. /// -/// @ingroup GLSL +/// @ingroup GLSLCore FfxFloat32x2 ffxSaturate(FfxFloat32x2 x) { return clamp(x, ffxBroadcast2(0.0), ffxBroadcast2(1.0)); @@ -1199,7 +1267,7 @@ FfxFloat32x2 ffxSaturate(FfxFloat32x2 x) /// @returns /// The clamped version of x. /// -/// @ingroup GLSL +/// @ingroup GLSLCore FfxFloat32x3 ffxSaturate(FfxFloat32x3 x) { return clamp(x, ffxBroadcast3(0.0), ffxBroadcast3(1.0)); @@ -1212,7 +1280,7 @@ FfxFloat32x3 ffxSaturate(FfxFloat32x3 x) /// @returns /// The clamped version of x. /// -/// @ingroup GLSL +/// @ingroup GLSLCore FfxFloat32x4 ffxSaturate(FfxFloat32x4 x) { return clamp(x, ffxBroadcast4(0.0), ffxBroadcast4(1.0)); @@ -1231,7 +1299,7 @@ FfxFloat32x4 ffxSaturate(FfxFloat32x4 x) /// @returns /// The fractional part of x. /// -/// @ingroup HLSL +/// @ingroup GLSLCore FfxFloat32 ffxFract(FfxFloat32 x) { return fract(x); @@ -1250,7 +1318,7 @@ FfxFloat32 ffxFract(FfxFloat32 x) /// @returns /// The fractional part of x. /// -/// @ingroup HLSL +/// @ingroup GLSLCore FfxFloat32x2 ffxFract(FfxFloat32x2 x) { return fract(x); @@ -1269,7 +1337,7 @@ FfxFloat32x2 ffxFract(FfxFloat32x2 x) /// @returns /// The fractional part of x. /// -/// @ingroup HLSL +/// @ingroup GLSLCore FfxFloat32x3 ffxFract(FfxFloat32x3 x) { return fract(x); @@ -1288,21 +1356,131 @@ FfxFloat32x3 ffxFract(FfxFloat32x3 x) /// @returns /// The fractional part of x. /// -/// @ingroup HLSL +/// @ingroup GLSLCore FfxFloat32x4 ffxFract(FfxFloat32x4 x) { return fract(x); } -FfxUInt32 AShrSU1(FfxUInt32 a, FfxUInt32 b) +/// Rounds to the nearest integer. In case the fractional part is 0.5, it will round to the nearest even integer. +/// +/// @param [in] x The value to be rounded. +/// +/// @returns +/// The nearest integer from x. The nearest even integer from x if equidistant from 2 integer. +/// +/// @ingroup GLSLCore +FfxFloat32 ffxRound(FfxFloat32 x) +{ + return roundEven(x); +} + +/// Rounds to the nearest integer. In case the fractional part is 0.5, it will round to the nearest even integer. +/// +/// @param [in] x The value to be rounded. +/// +/// @returns +/// The nearest integer from x. The nearest even integer from x if equidistant from 2 integer. +/// +/// @ingroup GLSLCore +FfxFloat32x2 ffxRound(FfxFloat32x2 x) +{ + return roundEven(x); +} + +/// Rounds to the nearest integer. In case the fractional part is 0.5, it will round to the nearest even integer. +/// +/// @param [in] x The value to be rounded. +/// +/// @returns +/// The nearest integer from x. The nearest even integer from x if equidistant from 2 integer. +/// +/// @ingroup GLSLCore +FfxFloat32x3 ffxRound(FfxFloat32x3 x) +{ + return roundEven(x); +} + +/// Rounds to the nearest integer. In case the fractional part is 0.5, it will round to the nearest even integer. +/// +/// @param [in] x The value to be rounded. +/// +/// @returns +/// The nearest integer from x. The nearest even integer from x if equidistant from 2 integer. +/// +/// @ingroup GLSLCore +FfxFloat32x4 ffxRound(FfxFloat32x4 x) +{ + return roundEven(x); +} + +FfxUInt32 ffxAShrSU1(FfxUInt32 a, FfxUInt32 b) { return FfxUInt32(FfxInt32(a) >> FfxInt32(b)); } -#if FFX_HALF +FfxUInt32 ffxPackF32(FfxFloat32x2 v){ + return packHalf2x16(v); +} +FfxFloat32x2 ffxUnpackF32(FfxUInt32 u){ + return unpackHalf2x16(u); +} + +FfxUInt32x2 ffxPackF32x2(FfxFloat32x4 v){ + return FfxUInt32x2(ffxPackF32(v.xy), ffxPackF32(v.zw)); +} + +FfxFloat32x4 ffxUnpackF32x2(FfxUInt32x2 a){ + return FfxFloat32x4(ffxUnpackF32(a.x), ffxUnpackF32(a.y)); +} + +/// @brief Inverts the value while avoiding division by zero. If the value is zero, zero is returned. +/// @param v Value to invert. +/// @return If v = 0 returns 0. If v != 0 returns 1/v. +FfxFloat32 ffxInvertSafe(FfxFloat32 v){ + FfxFloat32 s = sign(v); + FfxFloat32 s2 = s*s; + return s2/(v + s2 - 1.0); +} + +/// @brief Inverts the value while avoiding division by zero. If the value is zero, zero is returned. +/// @param v Value to invert. +/// @return If v = 0 returns 0. If v != 0 returns 1/v. +FfxFloat32x2 ffxInvertSafe(FfxFloat32x2 v){ + FfxFloat32x2 s = sign(v); + FfxFloat32x2 s2 = s*s; + return s2/(v + s2 - FfxFloat32x2(1.0, 1.0)); +} + +/// @brief Inverts the value while avoiding division by zero. If the value is zero, zero is returned. +/// @param v Value to invert. +/// @return If v = 0 returns 0. If v != 0 returns 1/v. +FfxFloat32x3 ffxInvertSafe(FfxFloat32x3 v){ + FfxFloat32x3 s = sign(v); + FfxFloat32x3 s2 = s*s; + return s2/(v + s2 - FfxFloat32x3(1.0, 1.0, 1.0)); +} + +/// @brief Inverts the value while avoiding division by zero. If the value is zero, zero is returned. +/// @param v Value to invert. +/// @return If v = 0 returns 0. If v != 0 returns 1/v. +FfxFloat32x4 ffxInvertSafe(FfxFloat32x4 v){ + FfxFloat32x4 s = sign(v); + FfxFloat32x4 s2 = s*s; + return s2/(v + s2 - FfxFloat32x4(1.0, 1.0, 1.0, 1.0)); +} +#if FFX_HALF #define FFX_UINT32_TO_FLOAT16X2(x) unpackFloat2x16(FfxUInt32(x)) +FfxUInt32 ffxPackF16(FfxFloat16x2 v){ + return packHalf2x16(v); +} + +FfxFloat16x2 ffxUnpackF16(FfxUInt32 u){ + return FfxFloat16x2(unpackHalf2x16(u)); +} + FfxFloat16x4 ffxUint32x2ToFloat16x4(FfxUInt32x2 x) { return FfxFloat16x4(unpackFloat2x16(x.x), unpackFloat2x16(x.y)); @@ -1473,40 +1651,6 @@ FfxFloat16x4 ffxLerp(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16x4 a) return mix(x, y, a); } //------------------------------------------------------------------------------------------------------------------------------ -// No packed version of ffxMid3. -FfxFloat16 ffxMed3Half(FfxFloat16 x, FfxFloat16 y, FfxFloat16 z) -{ - return max(min(x, y), min(max(x, y), z)); -} -FfxFloat16x2 ffxMed3Half(FfxFloat16x2 x, FfxFloat16x2 y, FfxFloat16x2 z) -{ - return max(min(x, y), min(max(x, y), z)); -} -FfxFloat16x3 ffxMed3Half(FfxFloat16x3 x, FfxFloat16x3 y, FfxFloat16x3 z) -{ - return max(min(x, y), min(max(x, y), z)); -} -FfxFloat16x4 ffxMed3Half(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16x4 z) -{ - return max(min(x, y), min(max(x, y), z)); -} -FfxInt16 ffxMed3Half(FfxInt16 x, FfxInt16 y, FfxInt16 z) -{ - return max(min(x, y), min(max(x, y), z)); -} -FfxInt16x2 ffxMed3Half(FfxInt16x2 x, FfxInt16x2 y, FfxInt16x2 z) -{ - return max(min(x, y), min(max(x, y), z)); -} -FfxInt16x3 ffxMed3Half(FfxInt16x3 x, FfxInt16x3 y, FfxInt16x3 z) -{ - return max(min(x, y), min(max(x, y), z)); -} -FfxInt16x4 ffxMed3Half(FfxInt16x4 x, FfxInt16x4 y, FfxInt16x4 z) -{ - return max(min(x, y), min(max(x, y), z)); -} -//------------------------------------------------------------------------------------------------------------------------------ // No packed version of ffxMax3. FfxFloat16 ffxMax3Half(FfxFloat16 x, FfxFloat16 y, FfxFloat16 z) { @@ -1543,6 +1687,23 @@ FfxFloat16x4 ffxMin3Half(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16x4 z) return min(x, min(y, z)); } //------------------------------------------------------------------------------------------------------------------------------ +FfxFloat16 ffxMed3Half(FfxFloat16 x, FfxFloat16 y, FfxFloat16 z) +{ + return max(min(x, y), min(max(x, y), z)); +} +FfxFloat16x2 ffxMed3Half(FfxFloat16x2 x, FfxFloat16x2 y, FfxFloat16x2 z) +{ + return max(min(x, y), min(max(x, y), z)); +} +FfxFloat16x3 ffxMed3Half(FfxFloat16x3 x, FfxFloat16x3 y, FfxFloat16x3 z) +{ + return max(min(x, y), min(max(x, y), z)); +} +FfxFloat16x4 ffxMed3Half(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16x4 z) +{ + return max(min(x, y), min(max(x, y), z)); +} +//------------------------------------------------------------------------------------------------------------------------------ FfxFloat16 ffxReciprocalHalf(FfxFloat16 x) { return FFX_BROADCAST_FLOAT16(1.0) / x; @@ -1614,38 +1775,124 @@ FfxUInt16x4 ffxBitShiftRightHalf(FfxUInt16x4 a, FfxUInt16x4 b) #if defined(FFX_WAVE) // Where 'x' must be a compile time literal. -FfxFloat32 AWaveXorF1(FfxFloat32 v, FfxUInt32 x) +FfxFloat32 ffxWaveXorF1(FfxFloat32 v, FfxUInt32 x) { return subgroupShuffleXor(v, x); } -FfxFloat32x2 AWaveXorF2(FfxFloat32x2 v, FfxUInt32 x) +FfxFloat32x2 ffxWaveXorF2(FfxFloat32x2 v, FfxUInt32 x) { return subgroupShuffleXor(v, x); } -FfxFloat32x3 AWaveXorF3(FfxFloat32x3 v, FfxUInt32 x) +FfxFloat32x3 ffxWaveXorF3(FfxFloat32x3 v, FfxUInt32 x) { return subgroupShuffleXor(v, x); } -FfxFloat32x4 AWaveXorF4(FfxFloat32x4 v, FfxUInt32 x) +FfxFloat32x4 ffxWaveXorF4(FfxFloat32x4 v, FfxUInt32 x) { return subgroupShuffleXor(v, x); } -FfxUInt32 AWaveXorU1(FfxUInt32 v, FfxUInt32 x) +FfxUInt32 ffxWaveXorU1(FfxUInt32 v, FfxUInt32 x) { return subgroupShuffleXor(v, x); } -FfxUInt32x2 AWaveXorU2(FfxUInt32x2 v, FfxUInt32 x) +FfxUInt32x2 ffxWaveXorU2(FfxUInt32x2 v, FfxUInt32 x) { return subgroupShuffleXor(v, x); } -FfxUInt32x3 AWaveXorU3(FfxUInt32x3 v, FfxUInt32 x) +FfxUInt32x3 ffxWaveXorU3(FfxUInt32x3 v, FfxUInt32 x) { return subgroupShuffleXor(v, x); } -FfxUInt32x4 AWaveXorU4(FfxUInt32x4 v, FfxUInt32 x) +FfxUInt32x4 ffxWaveXorU4(FfxUInt32x4 v, FfxUInt32 x) { return subgroupShuffleXor(v, x); } +FfxBoolean ffxWaveIsFirstLane() +{ + return subgroupElect(); +} +FfxUInt32 ffxWaveLaneIndex() +{ + return gl_SubgroupInvocationID; +} +FfxBoolean ffxWaveReadAtLaneIndexB1(FfxBoolean v, FfxUInt32 x ) +{ + return subgroupShuffle(v, x); +} +FfxUInt32 ffxWavePrefixCountBits(FfxBoolean v) +{ + return subgroupBallotExclusiveBitCount(subgroupBallot(v)); +} +FfxUInt32 ffxWaveActiveCountBits(FfxBoolean v) +{ + return subgroupBallotBitCount(subgroupBallot(v)); +} +FfxUInt32 ffxWaveReadLaneFirstU1(FfxUInt32 v) +{ + return subgroupBroadcastFirst(v); +} +FfxUInt32x2 ffxWaveReadLaneFirstU2(FfxUInt32x2 v) +{ + return subgroupBroadcastFirst(v); +} +FfxBoolean ffxWaveReadLaneFirstB1(FfxBoolean v) +{ + return subgroupBroadcastFirst(v); +} +FfxUInt32 ffxWaveOr(FfxUInt32 a) +{ + return subgroupOr(a); +} +FfxUInt32 ffxWaveMin(FfxUInt32 a) +{ + return subgroupMin(a); +} +FfxFloat32 ffxWaveMin(FfxFloat32 a) +{ + return subgroupMin(a); +} +FfxUInt32 ffxWaveMax(FfxUInt32 a) +{ + return subgroupMax(a); +} +FfxFloat32 ffxWaveMax(FfxFloat32 a) +{ + return subgroupMax(a); +} +FfxUInt32 ffxWaveSum(FfxUInt32 a) +{ + return subgroupAdd(a); +} +FfxFloat32 ffxWaveSum(FfxFloat32 a) +{ + return subgroupAdd(a); +} +FfxUInt32 ffxWaveLaneCount() +{ + return gl_SubgroupSize; +} +#if defined(FFX_WAVE_ALL_TRUE) +FfxBoolean ffxWaveAllTrue(FfxBoolean v) +{ + return subgroupAll(v); +} +#endif +FfxFloat32 ffxQuadReadX(FfxFloat32 v) +{ + return subgroupQuadSwapHorizontal(v); +} +FfxFloat32x2 ffxQuadReadX(FfxFloat32x2 v) +{ + return subgroupQuadSwapHorizontal(v); +} +FfxFloat32 ffxQuadReadY(FfxFloat32 v) +{ + return subgroupQuadSwapVertical(v); +} +FfxFloat32x2 ffxQuadReadY(FfxFloat32x2 v) +{ + return subgroupQuadSwapVertical(v); +} //------------------------------------------------------------------------------------------------------------------------------ #if FFX_HALF diff --git a/thirdparty/amd-fsr2/shaders/ffx_core_gpu_common.h b/thirdparty/amd-ffx/gpu/ffx_core_gpu_common.h similarity index 89% rename from thirdparty/amd-fsr2/shaders/ffx_core_gpu_common.h rename to thirdparty/amd-ffx/gpu/ffx_core_gpu_common.h index ae07642f0df3..da03b07aaf99 100644 --- a/thirdparty/amd-fsr2/shaders/ffx_core_gpu_common.h +++ b/thirdparty/amd-ffx/gpu/ffx_core_gpu_common.h @@ -1,13 +1,14 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. -// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// // Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal +// of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell // copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: +// furnished to do so, subject to the following conditions : +// // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // @@ -21,81 +22,34 @@ /// A define for a true value in a boolean expression. /// -/// @ingroup GPU +/// @ingroup GPUCore #define FFX_TRUE (true) /// A define for a false value in a boolean expression. /// -/// @ingroup GPU +/// @ingroup GPUCore #define FFX_FALSE (false) /// A define value for positive infinity. /// -/// @ingroup GPU +/// @ingroup GPUCore #define FFX_POSITIVE_INFINITY_FLOAT ffxAsFloat(0x7f800000u) /// A define value for negative infinity. /// -/// @ingroup GPU +/// @ingroup GPUCore #define FFX_NEGATIVE_INFINITY_FLOAT ffxAsFloat(0xff800000u) /// A define value for PI. /// -/// @ingroup GPU +/// @ingroup GPUCore #define FFX_PI (3.14159) +FFX_STATIC const FfxFloat32 FFX_FP16_MIN = 6.10e-05f; +FFX_STATIC const FfxFloat32 FFX_FP16_MAX = 65504.0f; +FFX_STATIC const FfxFloat32 FFX_TONEMAP_EPSILON = 1.0f / FFX_FP16_MAX; -/// Compute the reciprocal of value. -/// -/// @param [in] value The value to compute the reciprocal of. -/// -/// @returns -/// The 1 / value. -/// -/// @ingroup GPU -FfxFloat32 ffxReciprocal(FfxFloat32 value) -{ - return rcp(value); -} - -/// Compute the reciprocal of value. -/// -/// @param [in] value The value to compute the reciprocal of. -/// -/// @returns -/// The 1 / value. -/// -/// @ingroup GPU -FfxFloat32x2 ffxReciprocal(FfxFloat32x2 value) -{ - return rcp(value); -} - -/// Compute the reciprocal of value. -/// -/// @param [in] value The value to compute the reciprocal of. -/// -/// @returns -/// The 1 / value. -/// -/// @ingroup GPU -FfxFloat32x3 ffxReciprocal(FfxFloat32x3 value) -{ - return rcp(value); -} - -/// Compute the reciprocal of value. -/// -/// @param [in] value The value to compute the reciprocal of. -/// -/// @returns -/// The 1 / value. -/// -/// @ingroup GPU -FfxFloat32x4 ffxReciprocal(FfxFloat32x4 value) -{ - return rcp(value); -} +#define FFX_HAS_FLAG(v, f) ((v & f) == f) /// Compute the min of two values. /// @@ -105,7 +59,7 @@ FfxFloat32x4 ffxReciprocal(FfxFloat32x4 value) /// @returns /// The the lowest of two values. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32 ffxMin(FfxFloat32 x, FfxFloat32 y) { return min(x, y); @@ -119,7 +73,7 @@ FfxFloat32 ffxMin(FfxFloat32 x, FfxFloat32 y) /// @returns /// The the lowest of two values. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32x2 ffxMin(FfxFloat32x2 x, FfxFloat32x2 y) { return min(x, y); @@ -133,7 +87,7 @@ FfxFloat32x2 ffxMin(FfxFloat32x2 x, FfxFloat32x2 y) /// @returns /// The the lowest of two values. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32x3 ffxMin(FfxFloat32x3 x, FfxFloat32x3 y) { return min(x, y); @@ -147,7 +101,7 @@ FfxFloat32x3 ffxMin(FfxFloat32x3 x, FfxFloat32x3 y) /// @returns /// The the lowest of two values. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32x4 ffxMin(FfxFloat32x4 x, FfxFloat32x4 y) { return min(x, y); @@ -161,7 +115,7 @@ FfxFloat32x4 ffxMin(FfxFloat32x4 x, FfxFloat32x4 y) /// @returns /// The the lowest of two values. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxInt32 ffxMin(FfxInt32 x, FfxInt32 y) { return min(x, y); @@ -175,7 +129,7 @@ FfxInt32 ffxMin(FfxInt32 x, FfxInt32 y) /// @returns /// The the lowest of two values. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxInt32x2 ffxMin(FfxInt32x2 x, FfxInt32x2 y) { return min(x, y); @@ -189,7 +143,7 @@ FfxInt32x2 ffxMin(FfxInt32x2 x, FfxInt32x2 y) /// @returns /// The the lowest of two values. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxInt32x3 ffxMin(FfxInt32x3 x, FfxInt32x3 y) { return min(x, y); @@ -203,7 +157,7 @@ FfxInt32x3 ffxMin(FfxInt32x3 x, FfxInt32x3 y) /// @returns /// The the lowest of two values. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxInt32x4 ffxMin(FfxInt32x4 x, FfxInt32x4 y) { return min(x, y); @@ -217,7 +171,7 @@ FfxInt32x4 ffxMin(FfxInt32x4 x, FfxInt32x4 y) /// @returns /// The the lowest of two values. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt32 ffxMin(FfxUInt32 x, FfxUInt32 y) { return min(x, y); @@ -231,7 +185,7 @@ FfxUInt32 ffxMin(FfxUInt32 x, FfxUInt32 y) /// @returns /// The the lowest of two values. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt32x2 ffxMin(FfxUInt32x2 x, FfxUInt32x2 y) { return min(x, y); @@ -245,7 +199,7 @@ FfxUInt32x2 ffxMin(FfxUInt32x2 x, FfxUInt32x2 y) /// @returns /// The the lowest of two values. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt32x3 ffxMin(FfxUInt32x3 x, FfxUInt32x3 y) { return min(x, y); @@ -259,7 +213,7 @@ FfxUInt32x3 ffxMin(FfxUInt32x3 x, FfxUInt32x3 y) /// @returns /// The the lowest of two values. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt32x4 ffxMin(FfxUInt32x4 x, FfxUInt32x4 y) { return min(x, y); @@ -273,7 +227,7 @@ FfxUInt32x4 ffxMin(FfxUInt32x4 x, FfxUInt32x4 y) /// @returns /// The the lowest of two values. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32 ffxMax(FfxFloat32 x, FfxFloat32 y) { return max(x, y); @@ -287,7 +241,7 @@ FfxFloat32 ffxMax(FfxFloat32 x, FfxFloat32 y) /// @returns /// The the lowest of two values. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32x2 ffxMax(FfxFloat32x2 x, FfxFloat32x2 y) { return max(x, y); @@ -301,7 +255,7 @@ FfxFloat32x2 ffxMax(FfxFloat32x2 x, FfxFloat32x2 y) /// @returns /// The the lowest of two values. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32x3 ffxMax(FfxFloat32x3 x, FfxFloat32x3 y) { return max(x, y); @@ -315,7 +269,7 @@ FfxFloat32x3 ffxMax(FfxFloat32x3 x, FfxFloat32x3 y) /// @returns /// The the lowest of two values. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32x4 ffxMax(FfxFloat32x4 x, FfxFloat32x4 y) { return max(x, y); @@ -329,7 +283,7 @@ FfxFloat32x4 ffxMax(FfxFloat32x4 x, FfxFloat32x4 y) /// @returns /// The the lowest of two values. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxInt32 ffxMax(FfxInt32 x, FfxInt32 y) { return max(x, y); @@ -343,7 +297,7 @@ FfxInt32 ffxMax(FfxInt32 x, FfxInt32 y) /// @returns /// The the lowest of two values. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxInt32x2 ffxMax(FfxInt32x2 x, FfxInt32x2 y) { return max(x, y); @@ -357,7 +311,7 @@ FfxInt32x2 ffxMax(FfxInt32x2 x, FfxInt32x2 y) /// @returns /// The the lowest of two values. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxInt32x3 ffxMax(FfxInt32x3 x, FfxInt32x3 y) { return max(x, y); @@ -371,7 +325,7 @@ FfxInt32x3 ffxMax(FfxInt32x3 x, FfxInt32x3 y) /// @returns /// The the lowest of two values. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxInt32x4 ffxMax(FfxInt32x4 x, FfxInt32x4 y) { return max(x, y); @@ -385,7 +339,7 @@ FfxInt32x4 ffxMax(FfxInt32x4 x, FfxInt32x4 y) /// @returns /// The the lowest of two values. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt32 ffxMax(FfxUInt32 x, FfxUInt32 y) { return max(x, y); @@ -399,7 +353,7 @@ FfxUInt32 ffxMax(FfxUInt32 x, FfxUInt32 y) /// @returns /// The the lowest of two values. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt32x2 ffxMax(FfxUInt32x2 x, FfxUInt32x2 y) { return max(x, y); @@ -413,7 +367,7 @@ FfxUInt32x2 ffxMax(FfxUInt32x2 x, FfxUInt32x2 y) /// @returns /// The the lowest of two values. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt32x3 ffxMax(FfxUInt32x3 x, FfxUInt32x3 y) { return max(x, y); @@ -427,7 +381,7 @@ FfxUInt32x3 ffxMax(FfxUInt32x3 x, FfxUInt32x3 y) /// @returns /// The the lowest of two values. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt32x4 ffxMax(FfxUInt32x4 x, FfxUInt32x4 y) { return max(x, y); @@ -441,7 +395,7 @@ FfxUInt32x4 ffxMax(FfxUInt32x4 x, FfxUInt32x4 y) /// @returns /// The value of the first parameter raised to the power of the second. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32 ffxPow(FfxFloat32 x, FfxFloat32 y) { return pow(x, y); @@ -455,7 +409,7 @@ FfxFloat32 ffxPow(FfxFloat32 x, FfxFloat32 y) /// @returns /// The value of the first parameter raised to the power of the second. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32x2 ffxPow(FfxFloat32x2 x, FfxFloat32x2 y) { return pow(x, y); @@ -469,7 +423,7 @@ FfxFloat32x2 ffxPow(FfxFloat32x2 x, FfxFloat32x2 y) /// @returns /// The value of the first parameter raised to the power of the second. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32x3 ffxPow(FfxFloat32x3 x, FfxFloat32x3 y) { return pow(x, y); @@ -483,7 +437,7 @@ FfxFloat32x3 ffxPow(FfxFloat32x3 x, FfxFloat32x3 y) /// @returns /// The value of the first parameter raised to the power of the second. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32x4 ffxPow(FfxFloat32x4 x, FfxFloat32x4 y) { return pow(x, y); @@ -496,7 +450,7 @@ FfxFloat32x4 ffxPow(FfxFloat32x4 x, FfxFloat32x4 y) /// @returns /// The the square root of x. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32 ffxSqrt(FfxFloat32 x) { return sqrt(x); @@ -509,7 +463,7 @@ FfxFloat32 ffxSqrt(FfxFloat32 x) /// @returns /// The the square root of x. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32x2 ffxSqrt(FfxFloat32x2 x) { return sqrt(x); @@ -522,7 +476,7 @@ FfxFloat32x2 ffxSqrt(FfxFloat32x2 x) /// @returns /// The the square root of x. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32x3 ffxSqrt(FfxFloat32x3 x) { return sqrt(x); @@ -535,7 +489,7 @@ FfxFloat32x3 ffxSqrt(FfxFloat32x3 x) /// @returns /// The the square root of x. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32x4 ffxSqrt(FfxFloat32x4 x) { return sqrt(x); @@ -549,7 +503,7 @@ FfxFloat32x4 ffxSqrt(FfxFloat32x4 x) /// @returns /// The value of d with the sign bit from s. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32 ffxCopySignBit(FfxFloat32 d, FfxFloat32 s) { return ffxAsFloat(ffxAsUInt32(d) | (ffxAsUInt32(s) & FfxUInt32(0x80000000u))); @@ -563,7 +517,7 @@ FfxFloat32 ffxCopySignBit(FfxFloat32 d, FfxFloat32 s) /// @returns /// The value of d with the sign bit from s. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32x2 ffxCopySignBit(FfxFloat32x2 d, FfxFloat32x2 s) { return ffxAsFloat(ffxAsUInt32(d) | (ffxAsUInt32(s) & ffxBroadcast2(0x80000000u))); @@ -577,7 +531,7 @@ FfxFloat32x2 ffxCopySignBit(FfxFloat32x2 d, FfxFloat32x2 s) /// @returns /// The value of d with the sign bit from s. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32x3 ffxCopySignBit(FfxFloat32x3 d, FfxFloat32x3 s) { return ffxAsFloat(ffxAsUInt32(d) | (ffxAsUInt32(s) & ffxBroadcast3(0x80000000u))); @@ -591,7 +545,7 @@ FfxFloat32x3 ffxCopySignBit(FfxFloat32x3 d, FfxFloat32x3 s) /// @returns /// The value of d with the sign bit from s. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32x4 ffxCopySignBit(FfxFloat32x4 d, FfxFloat32x4 s) { return ffxAsFloat(ffxAsUInt32(d) | (ffxAsUInt32(s) & ffxBroadcast4(0x80000000u))); @@ -614,7 +568,7 @@ FfxFloat32x4 ffxCopySignBit(FfxFloat32x4 d, FfxFloat32x4 s) /// @returns /// 1.0 when the value is negative, or 0.0 when the value is 0 or position. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32 ffxIsSigned(FfxFloat32 m) { return ffxSaturate(m * FfxFloat32(FFX_NEGATIVE_INFINITY_FLOAT)); @@ -637,7 +591,7 @@ FfxFloat32 ffxIsSigned(FfxFloat32 m) /// @returns /// 1.0 when the value is negative, or 0.0 when the value is 0 or position. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32x2 ffxIsSigned(FfxFloat32x2 m) { return ffxSaturate(m * ffxBroadcast2(FFX_NEGATIVE_INFINITY_FLOAT)); @@ -660,7 +614,7 @@ FfxFloat32x2 ffxIsSigned(FfxFloat32x2 m) /// @returns /// 1.0 when the value is negative, or 0.0 when the value is 0 or position. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32x3 ffxIsSigned(FfxFloat32x3 m) { return ffxSaturate(m * ffxBroadcast3(FFX_NEGATIVE_INFINITY_FLOAT)); @@ -683,7 +637,7 @@ FfxFloat32x3 ffxIsSigned(FfxFloat32x3 m) /// @returns /// 1.0 when the value is negative, or 0.0 when the value is 0 or positive. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32x4 ffxIsSigned(FfxFloat32x4 m) { return ffxSaturate(m * ffxBroadcast4(FFX_NEGATIVE_INFINITY_FLOAT)); @@ -701,7 +655,7 @@ FfxFloat32x4 ffxIsSigned(FfxFloat32x4 m) /// @returns /// 1.0 when the value is position, or 0.0 when the value is 0 or negative. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32 ffxIsGreaterThanZero(FfxFloat32 m) { return ffxSaturate(m * FfxFloat32(FFX_POSITIVE_INFINITY_FLOAT)); @@ -719,7 +673,7 @@ FfxFloat32 ffxIsGreaterThanZero(FfxFloat32 m) /// @returns /// 1.0 when the value is position, or 0.0 when the value is 0 or negative. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32x2 ffxIsGreaterThanZero(FfxFloat32x2 m) { return ffxSaturate(m * ffxBroadcast2(FFX_POSITIVE_INFINITY_FLOAT)); @@ -737,7 +691,7 @@ FfxFloat32x2 ffxIsGreaterThanZero(FfxFloat32x2 m) /// @returns /// 1.0 when the value is position, or 0.0 when the value is 0 or negative. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32x3 ffxIsGreaterThanZero(FfxFloat32x3 m) { return ffxSaturate(m * ffxBroadcast3(FFX_POSITIVE_INFINITY_FLOAT)); @@ -755,7 +709,7 @@ FfxFloat32x3 ffxIsGreaterThanZero(FfxFloat32x3 m) /// @returns /// 1.0 when the value is position, or 0.0 when the value is 0 or negative. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32x4 ffxIsGreaterThanZero(FfxFloat32x4 m) { return ffxSaturate(m * ffxBroadcast4(FFX_POSITIVE_INFINITY_FLOAT)); @@ -775,10 +729,10 @@ FfxFloat32x4 ffxIsGreaterThanZero(FfxFloat32x4 m) /// @returns /// The sortable integer value. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt32 ffxFloatToSortableInteger(FfxUInt32 value) { - return value ^ ((AShrSU1(value, FfxUInt32(31))) | FfxUInt32(0x80000000)); + return value ^ ((ffxAShrSU1(value, FfxUInt32(31))) | FfxUInt32(0x80000000)); } /// Convert a sortable integer to a 32bit floating point value. @@ -792,10 +746,10 @@ FfxUInt32 ffxFloatToSortableInteger(FfxUInt32 value) /// @returns /// The sortable integer value. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt32 ffxSortableIntegerToFloat(FfxUInt32 value) { - return value ^ ((~AShrSU1(value, FfxUInt32(31))) | FfxUInt32(0x80000000)); + return value ^ ((~ffxAShrSU1(value, FfxUInt32(31))) | FfxUInt32(0x80000000)); } /// Calculate a low-quality approximation for the square root of a value. @@ -811,10 +765,10 @@ FfxUInt32 ffxSortableIntegerToFloat(FfxUInt32 value) /// @returns /// An approximation of the square root, estimated to low quality. /// -/// @ingroup GPU -FfxFloat32 ffxApproximateSqrt(FfxFloat32 a) +/// @ingroup GPUCore +FfxFloat32 ffxApproximateSqrt(FfxFloat32 value) { - return ffxAsFloat((ffxAsUInt32(a) >> FfxUInt32(1)) + FfxUInt32(0x1fbc4639)); + return ffxAsFloat((ffxAsUInt32(value) >> FfxUInt32(1)) + FfxUInt32(0x1fbc4639)); } /// Calculate a low-quality approximation for the reciprocal of a value. @@ -830,10 +784,10 @@ FfxFloat32 ffxApproximateSqrt(FfxFloat32 a) /// @returns /// An approximation of the reciprocal, estimated to low quality. /// -/// @ingroup GPU -FfxFloat32 ffxApproximateReciprocal(FfxFloat32 a) +/// @ingroup GPUCore +FfxFloat32 ffxApproximateReciprocal(FfxFloat32 value) { - return ffxAsFloat(FfxUInt32(0x7ef07ebb) - ffxAsUInt32(a)); + return ffxAsFloat(FfxUInt32(0x7ef07ebb) - ffxAsUInt32(value)); } /// Calculate a medium-quality approximation for the reciprocal of a value. @@ -849,7 +803,7 @@ FfxFloat32 ffxApproximateReciprocal(FfxFloat32 a) /// @returns /// An approximation of the reciprocal, estimated to medium quality. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32 ffxApproximateReciprocalMedium(FfxFloat32 value) { FfxFloat32 b = ffxAsFloat(FfxUInt32(0x7ef19fff) - ffxAsUInt32(value)); @@ -869,10 +823,10 @@ FfxFloat32 ffxApproximateReciprocalMedium(FfxFloat32 value) /// @returns /// An approximation of the reciprocal square root, estimated to low quality. /// -/// @ingroup GPU -FfxFloat32 ffxApproximateReciprocalSquareRoot(FfxFloat32 a) +/// @ingroup GPUCore +FfxFloat32 ffxApproximateReciprocalSquareRoot(FfxFloat32 value) { - return ffxAsFloat(FfxUInt32(0x5f347d74) - (ffxAsUInt32(a) >> FfxUInt32(1))); + return ffxAsFloat(FfxUInt32(0x5f347d74) - (ffxAsUInt32(value) >> FfxUInt32(1))); } /// Calculate a low-quality approximation for the square root of a value. @@ -888,10 +842,10 @@ FfxFloat32 ffxApproximateReciprocalSquareRoot(FfxFloat32 a) /// @returns /// An approximation of the square root, estimated to low quality. /// -/// @ingroup GPU -FfxFloat32x2 ffxApproximateSqrt(FfxFloat32x2 a) +/// @ingroup GPUCore +FfxFloat32x2 ffxApproximateSqrt(FfxFloat32x2 value) { - return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast2(1u)) + ffxBroadcast2(0x1fbc4639u)); + return ffxAsFloat((ffxAsUInt32(value) >> ffxBroadcast2(1u)) + ffxBroadcast2(0x1fbc4639u)); } /// Calculate a low-quality approximation for the reciprocal of a value. @@ -907,10 +861,10 @@ FfxFloat32x2 ffxApproximateSqrt(FfxFloat32x2 a) /// @returns /// An approximation of the reciprocal, estimated to low quality. /// -/// @ingroup GPU -FfxFloat32x2 ffxApproximateReciprocal(FfxFloat32x2 a) +/// @ingroup GPUCore +FfxFloat32x2 ffxApproximateReciprocal(FfxFloat32x2 value) { - return ffxAsFloat(ffxBroadcast2(0x7ef07ebbu) - ffxAsUInt32(a)); + return ffxAsFloat(ffxBroadcast2(0x7ef07ebbu) - ffxAsUInt32(value)); } /// Calculate a medium-quality approximation for the reciprocal of a value. @@ -926,11 +880,11 @@ FfxFloat32x2 ffxApproximateReciprocal(FfxFloat32x2 a) /// @returns /// An approximation of the reciprocal, estimated to medium quality. /// -/// @ingroup GPU -FfxFloat32x2 ffxApproximateReciprocalMedium(FfxFloat32x2 a) +/// @ingroup GPUCore +FfxFloat32x2 ffxApproximateReciprocalMedium(FfxFloat32x2 value) { - FfxFloat32x2 b = ffxAsFloat(ffxBroadcast2(0x7ef19fffu) - ffxAsUInt32(a)); - return b * (-b * a + ffxBroadcast2(2.0f)); + FfxFloat32x2 b = ffxAsFloat(ffxBroadcast2(0x7ef19fffu) - ffxAsUInt32(value)); + return b * (-b * value + ffxBroadcast2(2.0f)); } /// Calculate a low-quality approximation for the square root of a value. @@ -946,10 +900,10 @@ FfxFloat32x2 ffxApproximateReciprocalMedium(FfxFloat32x2 a) /// @returns /// An approximation of the square root, estimated to low quality. /// -/// @ingroup GPU -FfxFloat32x2 ffxApproximateReciprocalSquareRoot(FfxFloat32x2 a) +/// @ingroup GPUCore +FfxFloat32x2 ffxApproximateReciprocalSquareRoot(FfxFloat32x2 value) { - return ffxAsFloat(ffxBroadcast2(0x5f347d74u) - (ffxAsUInt32(a) >> ffxBroadcast2(1u))); + return ffxAsFloat(ffxBroadcast2(0x5f347d74u) - (ffxAsUInt32(value) >> ffxBroadcast2(1u))); } /// Calculate a low-quality approximation for the square root of a value. @@ -965,10 +919,10 @@ FfxFloat32x2 ffxApproximateReciprocalSquareRoot(FfxFloat32x2 a) /// @returns /// An approximation of the square root, estimated to low quality. /// -/// @ingroup GPU -FfxFloat32x3 ffxApproximateSqrt(FfxFloat32x3 a) +/// @ingroup GPUCore +FfxFloat32x3 ffxApproximateSqrt(FfxFloat32x3 value) { - return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast3(1u)) + ffxBroadcast3(0x1fbc4639u)); + return ffxAsFloat((ffxAsUInt32(value) >> ffxBroadcast3(1u)) + ffxBroadcast3(0x1fbc4639u)); } /// Calculate a low-quality approximation for the reciprocal of a value. @@ -984,10 +938,10 @@ FfxFloat32x3 ffxApproximateSqrt(FfxFloat32x3 a) /// @returns /// An approximation of the reciprocal, estimated to low quality. /// -/// @ingroup GPU -FfxFloat32x3 ffxApproximateReciprocal(FfxFloat32x3 a) +/// @ingroup GPUCore +FfxFloat32x3 ffxApproximateReciprocal(FfxFloat32x3 value) { - return ffxAsFloat(ffxBroadcast3(0x7ef07ebbu) - ffxAsUInt32(a)); + return ffxAsFloat(ffxBroadcast3(0x7ef07ebbu) - ffxAsUInt32(value)); } /// Calculate a medium-quality approximation for the reciprocal of a value. @@ -1003,11 +957,11 @@ FfxFloat32x3 ffxApproximateReciprocal(FfxFloat32x3 a) /// @returns /// An approximation of the reciprocal, estimated to medium quality. /// -/// @ingroup GPU -FfxFloat32x3 ffxApproximateReciprocalMedium(FfxFloat32x3 a) +/// @ingroup GPUCore +FfxFloat32x3 ffxApproximateReciprocalMedium(FfxFloat32x3 value) { - FfxFloat32x3 b = ffxAsFloat(ffxBroadcast3(0x7ef19fffu) - ffxAsUInt32(a)); - return b * (-b * a + ffxBroadcast3(2.0f)); + FfxFloat32x3 b = ffxAsFloat(ffxBroadcast3(0x7ef19fffu) - ffxAsUInt32(value)); + return b * (-b * value + ffxBroadcast3(2.0f)); } /// Calculate a low-quality approximation for the square root of a value. @@ -1023,10 +977,10 @@ FfxFloat32x3 ffxApproximateReciprocalMedium(FfxFloat32x3 a) /// @returns /// An approximation of the square root, estimated to low quality. /// -/// @ingroup GPU -FfxFloat32x3 ffxApproximateReciprocalSquareRoot(FfxFloat32x3 a) +/// @ingroup GPUCore +FfxFloat32x3 ffxApproximateReciprocalSquareRoot(FfxFloat32x3 value) { - return ffxAsFloat(ffxBroadcast3(0x5f347d74u) - (ffxAsUInt32(a) >> ffxBroadcast3(1u))); + return ffxAsFloat(ffxBroadcast3(0x5f347d74u) - (ffxAsUInt32(value) >> ffxBroadcast3(1u))); } /// Calculate a low-quality approximation for the square root of a value. @@ -1042,10 +996,10 @@ FfxFloat32x3 ffxApproximateReciprocalSquareRoot(FfxFloat32x3 a) /// @returns /// An approximation of the square root, estimated to low quality. /// -/// @ingroup GPU -FfxFloat32x4 ffxApproximateSqrt(FfxFloat32x4 a) +/// @ingroup GPUCore +FfxFloat32x4 ffxApproximateSqrt(FfxFloat32x4 value) { - return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast4(1u)) + ffxBroadcast4(0x1fbc4639u)); + return ffxAsFloat((ffxAsUInt32(value) >> ffxBroadcast4(1u)) + ffxBroadcast4(0x1fbc4639u)); } /// Calculate a low-quality approximation for the reciprocal of a value. @@ -1061,10 +1015,10 @@ FfxFloat32x4 ffxApproximateSqrt(FfxFloat32x4 a) /// @returns /// An approximation of the reciprocal, estimated to low quality. /// -/// @ingroup GPU -FfxFloat32x4 ffxApproximateReciprocal(FfxFloat32x4 a) +/// @ingroup GPUCore +FfxFloat32x4 ffxApproximateReciprocal(FfxFloat32x4 value) { - return ffxAsFloat(ffxBroadcast4(0x7ef07ebbu) - ffxAsUInt32(a)); + return ffxAsFloat(ffxBroadcast4(0x7ef07ebbu) - ffxAsUInt32(value)); } /// Calculate a medium-quality approximation for the reciprocal of a value. @@ -1080,11 +1034,11 @@ FfxFloat32x4 ffxApproximateReciprocal(FfxFloat32x4 a) /// @returns /// An approximation of the reciprocal, estimated to medium quality. /// -/// @ingroup GPU -FfxFloat32x4 ffxApproximateReciprocalMedium(FfxFloat32x4 a) +/// @ingroup GPUCore +FfxFloat32x4 ffxApproximateReciprocalMedium(FfxFloat32x4 value) { - FfxFloat32x4 b = ffxAsFloat(ffxBroadcast4(0x7ef19fffu) - ffxAsUInt32(a)); - return b * (-b * a + ffxBroadcast4(2.0f)); + FfxFloat32x4 b = ffxAsFloat(ffxBroadcast4(0x7ef19fffu) - ffxAsUInt32(value)); + return b * (-b * value + ffxBroadcast4(2.0f)); } /// Calculate a low-quality approximation for the square root of a value. @@ -1100,10 +1054,10 @@ FfxFloat32x4 ffxApproximateReciprocalMedium(FfxFloat32x4 a) /// @returns /// An approximation of the square root, estimated to low quality. /// -/// @ingroup GPU -FfxFloat32x4 ffxApproximateReciprocalSquareRoot(FfxFloat32x4 a) +/// @ingroup GPUCore +FfxFloat32x4 ffxApproximateReciprocalSquareRoot(FfxFloat32x4 value) { - return ffxAsFloat(ffxBroadcast4(0x5f347d74u) - (ffxAsUInt32(a) >> ffxBroadcast4(1u))); + return ffxAsFloat(ffxBroadcast4(0x5f347d74u) - (ffxAsUInt32(value) >> ffxBroadcast4(1u))); } /// Calculate dot product of 'a' and 'b'. @@ -1114,7 +1068,7 @@ FfxFloat32x4 ffxApproximateReciprocalSquareRoot(FfxFloat32x4 a) /// @returns /// The value of a dot b. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32 ffxDot2(FfxFloat32x2 a, FfxFloat32x2 b) { return dot(a, b); @@ -1128,7 +1082,7 @@ FfxFloat32 ffxDot2(FfxFloat32x2 a, FfxFloat32x2 b) /// @returns /// The value of a dot b. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32 ffxDot3(FfxFloat32x3 a, FfxFloat32x3 b) { return dot(a, b); @@ -1142,7 +1096,7 @@ FfxFloat32 ffxDot3(FfxFloat32x3 a, FfxFloat32x3 b) /// @returns /// The value of a dot b. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32 ffxDot4(FfxFloat32x4 a, FfxFloat32x4 b) { return dot(a, b); @@ -1160,7 +1114,7 @@ FfxFloat32 ffxDot4(FfxFloat32x4 a, FfxFloat32x4 b) /// @returns /// The value a converted into Gamma2. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32 ffxApproximatePQToGamma2Medium(FfxFloat32 a) { return a * a * a * a; @@ -1177,7 +1131,7 @@ FfxFloat32 ffxApproximatePQToGamma2Medium(FfxFloat32 a) /// @returns /// The value a converted into linear. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32 ffxApproximatePQToLinear(FfxFloat32 a) { return a * a * a * a * a * a * a * a; @@ -1194,7 +1148,7 @@ FfxFloat32 ffxApproximatePQToLinear(FfxFloat32 a) /// @returns /// The value a converted into PQ. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32 ffxApproximateGamma2ToPQ(FfxFloat32 a) { return ffxAsFloat((ffxAsUInt32(a) >> FfxUInt32(2)) + FfxUInt32(0x2F9A4E46)); @@ -1211,7 +1165,7 @@ FfxFloat32 ffxApproximateGamma2ToPQ(FfxFloat32 a) /// @returns /// The value a converted into PQ. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32 ffxApproximateGamma2ToPQMedium(FfxFloat32 a) { FfxFloat32 b = ffxAsFloat((ffxAsUInt32(a) >> FfxUInt32(2)) + FfxUInt32(0x2F9A4E46)); @@ -1230,7 +1184,7 @@ FfxFloat32 ffxApproximateGamma2ToPQMedium(FfxFloat32 a) /// @returns /// The value a converted into PQ. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32 ffxApproximateGamma2ToPQHigh(FfxFloat32 a) { return ffxSqrt(ffxSqrt(a)); @@ -1247,7 +1201,7 @@ FfxFloat32 ffxApproximateGamma2ToPQHigh(FfxFloat32 a) /// @returns /// The value a converted into PQ. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32 ffxApproximateLinearToPQ(FfxFloat32 a) { return ffxAsFloat((ffxAsUInt32(a) >> FfxUInt32(3)) + FfxUInt32(0x378D8723)); @@ -1264,7 +1218,7 @@ FfxFloat32 ffxApproximateLinearToPQ(FfxFloat32 a) /// @returns /// The value a converted into PQ. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32 ffxApproximateLinearToPQMedium(FfxFloat32 a) { FfxFloat32 b = ffxAsFloat((ffxAsUInt32(a) >> FfxUInt32(3)) + FfxUInt32(0x378D8723)); @@ -1283,7 +1237,7 @@ FfxFloat32 ffxApproximateLinearToPQMedium(FfxFloat32 a) /// @returns /// The value a converted into PQ. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32 ffxApproximateLinearToPQHigh(FfxFloat32 a) { return ffxSqrt(ffxSqrt(ffxSqrt(a))); @@ -1300,7 +1254,7 @@ FfxFloat32 ffxApproximateLinearToPQHigh(FfxFloat32 a) /// @returns /// The value a converted into Gamma2. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32x2 ffxApproximatePQToGamma2Medium(FfxFloat32x2 a) { return a * a * a * a; @@ -1317,7 +1271,7 @@ FfxFloat32x2 ffxApproximatePQToGamma2Medium(FfxFloat32x2 a) /// @returns /// The value a converted into linear. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32x2 ffxApproximatePQToLinear(FfxFloat32x2 a) { return a * a * a * a * a * a * a * a; @@ -1334,7 +1288,7 @@ FfxFloat32x2 ffxApproximatePQToLinear(FfxFloat32x2 a) /// @returns /// The value a converted into PQ. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32x2 ffxApproximateGamma2ToPQ(FfxFloat32x2 a) { return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast2(2u)) + ffxBroadcast2(0x2F9A4E46u)); @@ -1351,7 +1305,7 @@ FfxFloat32x2 ffxApproximateGamma2ToPQ(FfxFloat32x2 a) /// @returns /// The value a converted into PQ. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32x2 ffxApproximateGamma2ToPQMedium(FfxFloat32x2 a) { FfxFloat32x2 b = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast2(2u)) + ffxBroadcast2(0x2F9A4E46u)); @@ -1370,7 +1324,7 @@ FfxFloat32x2 ffxApproximateGamma2ToPQMedium(FfxFloat32x2 a) /// @returns /// The value a converted into PQ. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32x2 ffxApproximateGamma2ToPQHigh(FfxFloat32x2 a) { return ffxSqrt(ffxSqrt(a)); @@ -1387,7 +1341,7 @@ FfxFloat32x2 ffxApproximateGamma2ToPQHigh(FfxFloat32x2 a) /// @returns /// The value a converted into PQ. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32x2 ffxApproximateLinearToPQ(FfxFloat32x2 a) { return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast2(3u)) + ffxBroadcast2(0x378D8723u)); @@ -1404,7 +1358,7 @@ FfxFloat32x2 ffxApproximateLinearToPQ(FfxFloat32x2 a) /// @returns /// The value a converted into PQ. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32x2 ffxApproximateLinearToPQMedium(FfxFloat32x2 a) { FfxFloat32x2 b = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast2(3u)) + ffxBroadcast2(0x378D8723u)); @@ -1423,7 +1377,7 @@ FfxFloat32x2 ffxApproximateLinearToPQMedium(FfxFloat32x2 a) /// @returns /// The value a converted into PQ. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32x2 ffxApproximateLinearToPQHigh(FfxFloat32x2 a) { return ffxSqrt(ffxSqrt(ffxSqrt(a))); @@ -1440,7 +1394,7 @@ FfxFloat32x2 ffxApproximateLinearToPQHigh(FfxFloat32x2 a) /// @returns /// The value a converted into Gamma2. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32x3 ffxApproximatePQToGamma2Medium(FfxFloat32x3 a) { return a * a * a * a; @@ -1457,7 +1411,7 @@ FfxFloat32x3 ffxApproximatePQToGamma2Medium(FfxFloat32x3 a) /// @returns /// The value a converted into linear. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32x3 ffxApproximatePQToLinear(FfxFloat32x3 a) { return a * a * a * a * a * a * a * a; @@ -1474,7 +1428,7 @@ FfxFloat32x3 ffxApproximatePQToLinear(FfxFloat32x3 a) /// @returns /// The value a converted into PQ. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32x3 ffxApproximateGamma2ToPQ(FfxFloat32x3 a) { return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast3(2u)) + ffxBroadcast3(0x2F9A4E46u)); @@ -1491,7 +1445,7 @@ FfxFloat32x3 ffxApproximateGamma2ToPQ(FfxFloat32x3 a) /// @returns /// The value a converted into PQ. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32x3 ffxApproximateGamma2ToPQMedium(FfxFloat32x3 a) { FfxFloat32x3 b = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast3(2u)) + ffxBroadcast3(0x2F9A4E46u)); @@ -1510,7 +1464,7 @@ FfxFloat32x3 ffxApproximateGamma2ToPQMedium(FfxFloat32x3 a) /// @returns /// The value a converted into PQ. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32x3 ffxApproximateGamma2ToPQHigh(FfxFloat32x3 a) { return ffxSqrt(ffxSqrt(a)); @@ -1527,7 +1481,7 @@ FfxFloat32x3 ffxApproximateGamma2ToPQHigh(FfxFloat32x3 a) /// @returns /// The value a converted into PQ. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32x3 ffxApproximateLinearToPQ(FfxFloat32x3 a) { return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast3(3u)) + ffxBroadcast3(0x378D8723u)); @@ -1544,7 +1498,7 @@ FfxFloat32x3 ffxApproximateLinearToPQ(FfxFloat32x3 a) /// @returns /// The value a converted into PQ. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32x3 ffxApproximateLinearToPQMedium(FfxFloat32x3 a) { FfxFloat32x3 b = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast3(3u)) + ffxBroadcast3(0x378D8723u)); @@ -1563,7 +1517,7 @@ FfxFloat32x3 ffxApproximateLinearToPQMedium(FfxFloat32x3 a) /// @returns /// The value a converted into PQ. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32x3 ffxApproximateLinearToPQHigh(FfxFloat32x3 a) { return ffxSqrt(ffxSqrt(ffxSqrt(a))); @@ -1580,7 +1534,7 @@ FfxFloat32x3 ffxApproximateLinearToPQHigh(FfxFloat32x3 a) /// @returns /// The value a converted into Gamma2. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32x4 ffxApproximatePQToGamma2Medium(FfxFloat32x4 a) { return a * a * a * a; @@ -1597,7 +1551,7 @@ FfxFloat32x4 ffxApproximatePQToGamma2Medium(FfxFloat32x4 a) /// @returns /// The value a converted into linear. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32x4 ffxApproximatePQToLinear(FfxFloat32x4 a) { return a * a * a * a * a * a * a * a; @@ -1614,7 +1568,7 @@ FfxFloat32x4 ffxApproximatePQToLinear(FfxFloat32x4 a) /// @returns /// The value a converted into PQ. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32x4 ffxApproximateGamma2ToPQ(FfxFloat32x4 a) { return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast4(2u)) + ffxBroadcast4(0x2F9A4E46u)); @@ -1631,7 +1585,7 @@ FfxFloat32x4 ffxApproximateGamma2ToPQ(FfxFloat32x4 a) /// @returns /// The value a converted into PQ. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32x4 ffxApproximateGamma2ToPQMedium(FfxFloat32x4 a) { FfxFloat32x4 b = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast4(2u)) + ffxBroadcast4(0x2F9A4E46u)); @@ -1650,7 +1604,7 @@ FfxFloat32x4 ffxApproximateGamma2ToPQMedium(FfxFloat32x4 a) /// @returns /// The value a converted into PQ. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32x4 ffxApproximateGamma2ToPQHigh(FfxFloat32x4 a) { return ffxSqrt(ffxSqrt(a)); @@ -1667,7 +1621,7 @@ FfxFloat32x4 ffxApproximateGamma2ToPQHigh(FfxFloat32x4 a) /// @returns /// The value a converted into PQ. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32x4 ffxApproximateLinearToPQ(FfxFloat32x4 a) { return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast4(3u)) + ffxBroadcast4(0x378D8723u)); @@ -1684,7 +1638,7 @@ FfxFloat32x4 ffxApproximateLinearToPQ(FfxFloat32x4 a) /// @returns /// The value a converted into PQ. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32x4 ffxApproximateLinearToPQMedium(FfxFloat32x4 a) { FfxFloat32x4 b = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast4(3u)) + ffxBroadcast4(0x378D8723u)); @@ -1703,7 +1657,7 @@ FfxFloat32x4 ffxApproximateLinearToPQMedium(FfxFloat32x4 a) /// @returns /// The value a converted into PQ. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32x4 ffxApproximateLinearToPQHigh(FfxFloat32x4 a) { return ffxSqrt(ffxSqrt(ffxSqrt(a))); @@ -1793,7 +1747,7 @@ FfxFloat32x2 ffxParabolicSinCos(FfxFloat32 x) /// @returns /// Result of the AND operation. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt32 ffxZeroOneAnd(FfxUInt32 x, FfxUInt32 y) { return min(x, y); @@ -1807,7 +1761,7 @@ FfxUInt32 ffxZeroOneAnd(FfxUInt32 x, FfxUInt32 y) /// @returns /// Result of the AND operation. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt32x2 ffxZeroOneAnd(FfxUInt32x2 x, FfxUInt32x2 y) { return min(x, y); @@ -1821,7 +1775,7 @@ FfxUInt32x2 ffxZeroOneAnd(FfxUInt32x2 x, FfxUInt32x2 y) /// @returns /// Result of the AND operation. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt32x3 ffxZeroOneAnd(FfxUInt32x3 x, FfxUInt32x3 y) { return min(x, y); @@ -1835,7 +1789,7 @@ FfxUInt32x3 ffxZeroOneAnd(FfxUInt32x3 x, FfxUInt32x3 y) /// @returns /// Result of the AND operation. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt32x4 ffxZeroOneAnd(FfxUInt32x4 x, FfxUInt32x4 y) { return min(x, y); @@ -1844,12 +1798,11 @@ FfxUInt32x4 ffxZeroOneAnd(FfxUInt32x4 x, FfxUInt32x4 y) /// Conditional free logic NOT operation using two values. /// /// @param [in] x The first value to be fed into the NOT operator. -/// @param [in] y The second value to be fed into the NOT operator. /// /// @returns /// Result of the NOT operation. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt32 ffxZeroOneAnd(FfxUInt32 x) { return x ^ FfxUInt32(1); @@ -1858,12 +1811,11 @@ FfxUInt32 ffxZeroOneAnd(FfxUInt32 x) /// Conditional free logic NOT operation using two values. /// /// @param [in] x The first value to be fed into the NOT operator. -/// @param [in] y The second value to be fed into the NOT operator. /// /// @returns /// Result of the NOT operation. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt32x2 ffxZeroOneAnd(FfxUInt32x2 x) { return x ^ ffxBroadcast2(1u); @@ -1872,12 +1824,11 @@ FfxUInt32x2 ffxZeroOneAnd(FfxUInt32x2 x) /// Conditional free logic NOT operation using two values. /// /// @param [in] x The first value to be fed into the NOT operator. -/// @param [in] y The second value to be fed into the NOT operator. /// /// @returns /// Result of the NOT operation. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt32x3 ffxZeroOneAnd(FfxUInt32x3 x) { return x ^ ffxBroadcast3(1u); @@ -1886,12 +1837,11 @@ FfxUInt32x3 ffxZeroOneAnd(FfxUInt32x3 x) /// Conditional free logic NOT operation using two values. /// /// @param [in] x The first value to be fed into the NOT operator. -/// @param [in] y The second value to be fed into the NOT operator. /// /// @returns /// Result of the NOT operation. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt32x4 ffxZeroOneAnd(FfxUInt32x4 x) { return x ^ ffxBroadcast4(1u); @@ -1905,7 +1855,7 @@ FfxUInt32x4 ffxZeroOneAnd(FfxUInt32x4 x) /// @returns /// Result of the OR operation. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt32 ffxZeroOneOr(FfxUInt32 x, FfxUInt32 y) { return max(x, y); @@ -1919,7 +1869,7 @@ FfxUInt32 ffxZeroOneOr(FfxUInt32 x, FfxUInt32 y) /// @returns /// Result of the OR operation. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt32x2 ffxZeroOneOr(FfxUInt32x2 x, FfxUInt32x2 y) { return max(x, y); @@ -1933,7 +1883,7 @@ FfxUInt32x2 ffxZeroOneOr(FfxUInt32x2 x, FfxUInt32x2 y) /// @returns /// Result of the OR operation. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt32x3 ffxZeroOneOr(FfxUInt32x3 x, FfxUInt32x3 y) { return max(x, y); @@ -1947,7 +1897,7 @@ FfxUInt32x3 ffxZeroOneOr(FfxUInt32x3 x, FfxUInt32x3 y) /// @returns /// Result of the OR operation. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt32x4 ffxZeroOneOr(FfxUInt32x4 x, FfxUInt32x4 y) { return max(x, y); @@ -1960,7 +1910,7 @@ FfxUInt32x4 ffxZeroOneOr(FfxUInt32x4 x, FfxUInt32x4 y) /// @returns /// Result of the AND OR operation. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt32 ffxZeroOneAndToU1(FfxFloat32 x) { return FfxUInt32(FfxFloat32(1.0) - x); @@ -1973,7 +1923,7 @@ FfxUInt32 ffxZeroOneAndToU1(FfxFloat32 x) /// @returns /// Result of the AND OR operation. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt32x2 ffxZeroOneAndToU2(FfxFloat32x2 x) { return FfxUInt32x2(ffxBroadcast2(1.0) - x); @@ -1986,7 +1936,7 @@ FfxUInt32x2 ffxZeroOneAndToU2(FfxFloat32x2 x) /// @returns /// Result of the AND OR operation. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt32x3 ffxZeroOneAndToU3(FfxFloat32x3 x) { return FfxUInt32x3(ffxBroadcast3(1.0) - x); @@ -1999,7 +1949,7 @@ FfxUInt32x3 ffxZeroOneAndToU3(FfxFloat32x3 x) /// @returns /// Result of the AND OR operation. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt32x4 ffxZeroOneAndToU4(FfxFloat32x4 x) { return FfxUInt32x4(ffxBroadcast4(1.0) - x); @@ -2015,7 +1965,7 @@ FfxUInt32x4 ffxZeroOneAndToU4(FfxFloat32x4 x) /// @returns /// Result of the AND OR operation. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32 ffxZeroOneAndOr(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z) { return ffxSaturate(x * y + z); @@ -2031,7 +1981,7 @@ FfxFloat32 ffxZeroOneAndOr(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z) /// @returns /// Result of the AND OR operation. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32x2 ffxZeroOneAndOr(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z) { return ffxSaturate(x * y + z); @@ -2047,7 +1997,7 @@ FfxFloat32x2 ffxZeroOneAndOr(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z) /// @returns /// Result of the AND OR operation. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32x3 ffxZeroOneAndOr(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z) { return ffxSaturate(x * y + z); @@ -2063,7 +2013,7 @@ FfxFloat32x3 ffxZeroOneAndOr(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z) /// @returns /// Result of the AND OR operation. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32x4 ffxZeroOneAndOr(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z) { return ffxSaturate(x * y + z); @@ -2076,7 +2026,7 @@ FfxFloat32x4 ffxZeroOneAndOr(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z) /// @returns /// Result of the greater than zero comparison. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32 ffxZeroOneIsGreaterThanZero(FfxFloat32 x) { return ffxSaturate(x * FfxFloat32(FFX_POSITIVE_INFINITY_FLOAT)); @@ -2089,7 +2039,7 @@ FfxFloat32 ffxZeroOneIsGreaterThanZero(FfxFloat32 x) /// @returns /// Result of the greater than zero comparison. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32x2 ffxZeroOneIsGreaterThanZero(FfxFloat32x2 x) { return ffxSaturate(x * ffxBroadcast2(FFX_POSITIVE_INFINITY_FLOAT)); @@ -2102,7 +2052,7 @@ FfxFloat32x2 ffxZeroOneIsGreaterThanZero(FfxFloat32x2 x) /// @returns /// Result of the greater than zero comparison. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32x3 ffxZeroOneIsGreaterThanZero(FfxFloat32x3 x) { return ffxSaturate(x * ffxBroadcast3(FFX_POSITIVE_INFINITY_FLOAT)); @@ -2115,7 +2065,7 @@ FfxFloat32x3 ffxZeroOneIsGreaterThanZero(FfxFloat32x3 x) /// @returns /// Result of the greater than zero comparison. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32x4 ffxZeroOneIsGreaterThanZero(FfxFloat32x4 x) { return ffxSaturate(x * ffxBroadcast4(FFX_POSITIVE_INFINITY_FLOAT)); @@ -2128,7 +2078,7 @@ FfxFloat32x4 ffxZeroOneIsGreaterThanZero(FfxFloat32x4 x) /// @returns /// Result of the AND OR operation. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32 ffxZeroOneAnd(FfxFloat32 x) { return FfxFloat32(1.0) - x; @@ -2141,7 +2091,7 @@ FfxFloat32 ffxZeroOneAnd(FfxFloat32 x) /// @returns /// Result of the AND OR operation. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32x2 ffxZeroOneAnd(FfxFloat32x2 x) { return ffxBroadcast2(1.0) - x; @@ -2154,7 +2104,7 @@ FfxFloat32x2 ffxZeroOneAnd(FfxFloat32x2 x) /// @returns /// Result of the AND OR operation. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32x3 ffxZeroOneAnd(FfxFloat32x3 x) { return ffxBroadcast3(1.0) - x; @@ -2167,7 +2117,7 @@ FfxFloat32x3 ffxZeroOneAnd(FfxFloat32x3 x) /// @returns /// Result of the AND OR operation. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32x4 ffxZeroOneAnd(FfxFloat32x4 x) { return ffxBroadcast4(1.0) - x; @@ -2181,7 +2131,7 @@ FfxFloat32x4 ffxZeroOneAnd(FfxFloat32x4 x) /// @returns /// Result of the OR operation. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32 ffxZeroOneOr(FfxFloat32 x, FfxFloat32 y) { return max(x, y); @@ -2195,7 +2145,7 @@ FfxFloat32 ffxZeroOneOr(FfxFloat32 x, FfxFloat32 y) /// @returns /// Result of the OR operation. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32x2 ffxZeroOneOr(FfxFloat32x2 x, FfxFloat32x2 y) { return max(x, y); @@ -2209,7 +2159,7 @@ FfxFloat32x2 ffxZeroOneOr(FfxFloat32x2 x, FfxFloat32x2 y) /// @returns /// Result of the OR operation. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32x3 ffxZeroOneOr(FfxFloat32x3 x, FfxFloat32x3 y) { return max(x, y); @@ -2223,7 +2173,7 @@ FfxFloat32x3 ffxZeroOneOr(FfxFloat32x3 x, FfxFloat32x3 y) /// @returns /// Result of the OR operation. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32x4 ffxZeroOneOr(FfxFloat32x4 x, FfxFloat32x4 y) { return max(x, y); @@ -2238,7 +2188,7 @@ FfxFloat32x4 ffxZeroOneOr(FfxFloat32x4 x, FfxFloat32x4 y) /// @returns /// The selected value. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32 ffxZeroOneSelect(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z) { FfxFloat32 r = (-x) * z + z; @@ -2254,7 +2204,7 @@ FfxFloat32 ffxZeroOneSelect(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z) /// @returns /// The selected value. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32x2 ffxZeroOneSelect(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z) { FfxFloat32x2 r = (-x) * z + z; @@ -2270,7 +2220,7 @@ FfxFloat32x2 ffxZeroOneSelect(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z) /// @returns /// The selected value. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32x3 ffxZeroOneSelect(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z) { FfxFloat32x3 r = (-x) * z + z; @@ -2286,7 +2236,7 @@ FfxFloat32x3 ffxZeroOneSelect(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z) /// @returns /// The selected value. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32x4 ffxZeroOneSelect(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z) { FfxFloat32x4 r = (-x) * z + z; @@ -2300,7 +2250,7 @@ FfxFloat32x4 ffxZeroOneSelect(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z) /// @returns /// Result of the sign value. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32 ffxZeroOneIsSigned(FfxFloat32 x) { return ffxSaturate(x * FfxFloat32(FFX_NEGATIVE_INFINITY_FLOAT)); @@ -2313,7 +2263,7 @@ FfxFloat32 ffxZeroOneIsSigned(FfxFloat32 x) /// @returns /// Result of the sign value. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32x2 ffxZeroOneIsSigned(FfxFloat32x2 x) { return ffxSaturate(x * ffxBroadcast2(FFX_NEGATIVE_INFINITY_FLOAT)); @@ -2326,7 +2276,7 @@ FfxFloat32x2 ffxZeroOneIsSigned(FfxFloat32x2 x) /// @returns /// Result of the sign value. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32x3 ffxZeroOneIsSigned(FfxFloat32x3 x) { return ffxSaturate(x * ffxBroadcast3(FFX_NEGATIVE_INFINITY_FLOAT)); @@ -2339,7 +2289,7 @@ FfxFloat32x3 ffxZeroOneIsSigned(FfxFloat32x3 x) /// @returns /// Result of the sign value. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32x4 ffxZeroOneIsSigned(FfxFloat32x4 x) { return ffxSaturate(x * ffxBroadcast4(FFX_NEGATIVE_INFINITY_FLOAT)); @@ -2358,7 +2308,7 @@ FfxFloat32x4 ffxZeroOneIsSigned(FfxFloat32x4 x) /// @returns /// The color in linear space. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32 ffxRec709FromLinear(FfxFloat32 color) { FfxFloat32x3 j = FfxFloat32x3(0.018 * 4.5, 4.5, 0.45); @@ -2379,7 +2329,7 @@ FfxFloat32 ffxRec709FromLinear(FfxFloat32 color) /// @returns /// The color in linear space. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32x2 ffxRec709FromLinear(FfxFloat32x2 color) { FfxFloat32x3 j = FfxFloat32x3(0.018 * 4.5, 4.5, 0.45); @@ -2400,7 +2350,7 @@ FfxFloat32x2 ffxRec709FromLinear(FfxFloat32x2 color) /// @returns /// The color in linear space. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat32x3 ffxRec709FromLinear(FfxFloat32x3 color) { FfxFloat32x3 j = FfxFloat32x3(0.018 * 4.5, 4.5, 0.45); @@ -2408,6 +2358,51 @@ FfxFloat32x3 ffxRec709FromLinear(FfxFloat32x3 color) return clamp(j.xxx, color * j.yyy, pow(color, j.zzz) * k.xxx + k.yyy); } +/// Compute a linear value from a REC.709 value. +/// +/// @param [in] color The value to convert to linear from REC.709. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat32 ffxLinearFromRec709(FfxFloat32 color) +{ + FfxFloat32x3 j = FfxFloat32x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45); + FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.099, 0.099 / 1.099); + return ffxZeroOneSelect(ffxZeroOneIsSigned(color - j.x), color * j.y, pow(color * k.x + k.y, j.z)); +} + +/// Compute a linear value from a REC.709 value. +/// +/// @param [in] color The value to convert to linear from REC.709. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat32x2 ffxLinearFromRec709(FfxFloat32x2 color) +{ + FfxFloat32x3 j = FfxFloat32x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45); + FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.099, 0.099 / 1.099); + return ffxZeroOneSelect(ffxZeroOneIsSigned(color - j.xx), color * j.yy, pow(color * k.xx + k.yy, j.zz)); +} + +/// Compute a linear value from a REC.709 value. +/// +/// @param [in] color The value to convert to linear from REC.709. +/// +/// @returns +/// A value in linear space. +/// +/// @ingroup GPUCore +FfxFloat32x3 ffxLinearFromRec709(FfxFloat32x3 color) +{ + FfxFloat32x3 j = FfxFloat32x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45); + FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.099, 0.099 / 1.099); + return ffxZeroOneSelect(ffxZeroOneIsSigned(color - j.xxx), color * j.yyy, pow(color * k.xxx + k.yyy, j.zzz)); +} + /// Compute a gamma value from a linear value. /// /// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. @@ -2420,10 +2415,10 @@ FfxFloat32x3 ffxRec709FromLinear(FfxFloat32x3 color) /// @returns /// A value in gamma space. /// -/// @ingroup GPU -FfxFloat32 ffxGammaFromLinear(FfxFloat32 color, FfxFloat32 rcpX) +/// @ingroup GPUCore +FfxFloat32 ffxGammaFromLinear(FfxFloat32 value, FfxFloat32 power) { - return pow(color, FfxFloat32(rcpX)); + return pow(value, FfxFloat32(power)); } /// Compute a gamma value from a linear value. @@ -2438,10 +2433,10 @@ FfxFloat32 ffxGammaFromLinear(FfxFloat32 color, FfxFloat32 rcpX) /// @returns /// A value in gamma space. /// -/// @ingroup GPU -FfxFloat32x2 ffxGammaFromLinear(FfxFloat32x2 color, FfxFloat32 rcpX) +/// @ingroup GPUCore +FfxFloat32x2 ffxGammaFromLinear(FfxFloat32x2 value, FfxFloat32 power) { - return pow(color, ffxBroadcast2(rcpX)); + return pow(value, ffxBroadcast2(power)); } /// Compute a gamma value from a linear value. @@ -2456,238 +2451,193 @@ FfxFloat32x2 ffxGammaFromLinear(FfxFloat32x2 color, FfxFloat32 rcpX) /// @returns /// A value in gamma space. /// -/// @ingroup GPU -FfxFloat32x3 ffxGammaFromLinear(FfxFloat32x3 color, FfxFloat32 rcpX) +/// @ingroup GPUCore +FfxFloat32x3 ffxGammaFromLinear(FfxFloat32x3 value, FfxFloat32 power) { - return pow(color, ffxBroadcast3(rcpX)); + return pow(value, ffxBroadcast3(power)); } -/// Compute a PQ value from a linear value. -/// -/// @param [in] value The value to convert to PQ from linear. -/// -/// @returns -/// A value in linear space. +/// Compute a linear value from a value in a gamma space. /// -/// @ingroup GPU -FfxFloat32 ffxPQToLinear(FfxFloat32 x) -{ - FfxFloat32 p = pow(x, FfxFloat32(0.159302)); - return pow((FfxFloat32(0.835938) + FfxFloat32(18.8516) * p) / (FfxFloat32(1.0) + FfxFloat32(18.6875) * p), FfxFloat32(78.8438)); -} - -/// Compute a PQ value from a linear value. +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. /// -/// @param [in] value The value to convert to PQ from linear. +/// @param [in] color The value to convert to linear in gamma space. +/// @param [in] power The power value used for the gamma curve. /// /// @returns /// A value in linear space. /// -/// @ingroup GPU -FfxFloat32x2 ffxPQToLinear(FfxFloat32x2 x) +/// @ingroup GPUCore +FfxFloat32 ffxLinearFromGamma(FfxFloat32 color, FfxFloat32 power) { - FfxFloat32x2 p = pow(x, ffxBroadcast2(0.159302)); - return pow((ffxBroadcast2(0.835938) + ffxBroadcast2(18.8516) * p) / (ffxBroadcast2(1.0) + ffxBroadcast2(18.6875) * p), ffxBroadcast2(78.8438)); + return pow(color, FfxFloat32(power)); } -/// Compute a PQ value from a linear value. -/// -/// @param [in] value The value to convert to PQ from linear. -/// -/// @returns -/// A value in linear space. +/// Compute a linear value from a value in a gamma space. /// -/// @ingroup GPU -FfxFloat32x3 ffxPQToLinear(FfxFloat32x3 x) -{ - FfxFloat32x3 p = pow(x, ffxBroadcast3(0.159302)); - return pow((ffxBroadcast3(0.835938) + ffxBroadcast3(18.8516) * p) / (ffxBroadcast3(1.0) + ffxBroadcast3(18.6875) * p), ffxBroadcast3(78.8438)); -} - -/// Compute a linear value from a SRGB value. +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. /// -/// @param [in] value The value to convert to linear from SRGB. +/// @param [in] color The value to convert to linear in gamma space. +/// @param [in] power The power value used for the gamma curve. /// /// @returns -/// A value in SRGB space. +/// A value in linear space. /// -/// @ingroup GPU -FfxFloat32 ffxSrgbToLinear(FfxFloat32 color) +/// @ingroup GPUCore +FfxFloat32x2 ffxLinearFromGamma(FfxFloat32x2 color, FfxFloat32 power) { - FfxFloat32x3 j = FfxFloat32x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4); - FfxFloat32x2 k = FfxFloat32x2(1.055, -0.055); - return clamp(j.x, color * j.y, pow(color, j.z) * k.x + k.y); + return pow(color, ffxBroadcast2(power)); } -/// Compute a linear value from a SRGB value. -/// -/// @param [in] value The value to convert to linear from SRGB. -/// -/// @returns -/// A value in SRGB space. +/// Compute a linear value from a value in a gamma space. /// -/// @ingroup GPU -FfxFloat32x2 ffxSrgbToLinear(FfxFloat32x2 color) -{ - FfxFloat32x3 j = FfxFloat32x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4); - FfxFloat32x2 k = FfxFloat32x2(1.055, -0.055); - return clamp(j.xx, color * j.yy, pow(color, j.zz) * k.xx + k.yy); -} - -/// Compute a linear value from a SRGB value. +/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. /// -/// @param [in] value The value to convert to linear from SRGB. +/// @param [in] color The value to convert to linear in gamma space. +/// @param [in] power The power value used for the gamma curve. /// /// @returns -/// A value in SRGB space. +/// A value in linear space. /// -/// @ingroup GPU -FfxFloat32x3 ffxSrgbToLinear(FfxFloat32x3 color) +/// @ingroup GPUCore +FfxFloat32x3 ffxLinearFromGamma(FfxFloat32x3 color, FfxFloat32 power) { - FfxFloat32x3 j = FfxFloat32x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4); - FfxFloat32x2 k = FfxFloat32x2(1.055, -0.055); - return clamp(j.xxx, color * j.yyy, pow(color, j.zzz) * k.xxx + k.yyy); + return pow(color, ffxBroadcast3(power)); } -/// Compute a linear value from a REC.709 value. +/// Compute a PQ value from a linear value. /// -/// @param [in] color The value to convert to linear from REC.709. +/// @param [in] value The value to convert to PQ from linear. /// /// @returns /// A value in linear space. /// -/// @ingroup GPU -FfxFloat32 ffxLinearFromRec709(FfxFloat32 color) +/// @ingroup GPUCore +FfxFloat32 ffxPQFromLinear(FfxFloat32 value) { - FfxFloat32x3 j = FfxFloat32x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45); - FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.099, 0.099 / 1.099); - return ffxZeroOneSelect(ffxZeroOneIsSigned(color - j.x), color * j.y, pow(color * k.x + k.y, j.z)); + FfxFloat32 p = pow(value, FfxFloat32(0.159302)); + return pow((FfxFloat32(0.835938) + FfxFloat32(18.8516) * p) / (FfxFloat32(1.0) + FfxFloat32(18.6875) * p), FfxFloat32(78.8438)); } -/// Compute a linear value from a REC.709 value. +/// Compute a PQ value from a linear value. /// -/// @param [in] color The value to convert to linear from REC.709. +/// @param [in] value The value to convert to PQ from linear. /// /// @returns /// A value in linear space. /// -/// @ingroup GPU -FfxFloat32x2 ffxLinearFromRec709(FfxFloat32x2 color) +/// @ingroup GPUCore +FfxFloat32x2 ffxPQFromLinear(FfxFloat32x2 value) { - FfxFloat32x3 j = FfxFloat32x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45); - FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.099, 0.099 / 1.099); - return ffxZeroOneSelect(ffxZeroOneIsSigned(color - j.xx), color * j.yy, pow(color * k.xx + k.yy, j.zz)); + FfxFloat32x2 p = pow(value, ffxBroadcast2(0.159302)); + return pow((ffxBroadcast2(0.835938) + ffxBroadcast2(18.8516) * p) / (ffxBroadcast2(1.0) + ffxBroadcast2(18.6875) * p), ffxBroadcast2(78.8438)); } -/// Compute a linear value from a REC.709 value. +/// Compute a PQ value from a linear value. /// -/// @param [in] color The value to convert to linear from REC.709. +/// @param [in] value The value to convert to PQ from linear. /// /// @returns /// A value in linear space. /// -/// @ingroup GPU -FfxFloat32x3 ffxLinearFromRec709(FfxFloat32x3 color) +/// @ingroup GPUCore +FfxFloat32x3 ffxPQFromLinear(FfxFloat32x3 value) { - FfxFloat32x3 j = FfxFloat32x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45); - FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.099, 0.099 / 1.099); - return ffxZeroOneSelect(ffxZeroOneIsSigned(color - j.xxx), color * j.yyy, pow(color * k.xxx + k.yyy, j.zzz)); + FfxFloat32x3 p = pow(value, ffxBroadcast3(0.159302)); + return pow((ffxBroadcast3(0.835938) + ffxBroadcast3(18.8516) * p) / (ffxBroadcast3(1.0) + ffxBroadcast3(18.6875) * p), ffxBroadcast3(78.8438)); } -/// Compute a linear value from a value in a gamma space. +/// Compute a linear value from a value in a PQ space. /// /// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. /// -/// @param [in] color The value to convert to linear in gamma space. -/// @param [in] power The power value used for the gamma curve. +/// @param [in] value The value to convert to linear in PQ space. /// /// @returns /// A value in linear space. /// -/// @ingroup GPU -FfxFloat32 ffxLinearFromGamma(FfxFloat32 color, FfxFloat32 power) +/// @ingroup GPUCore +FfxFloat32 ffxLinearFromPQ(FfxFloat32 value) { - return pow(color, FfxFloat32(power)); + FfxFloat32 p = pow(value, FfxFloat32(0.0126833)); + return pow(ffxSaturate(p - FfxFloat32(0.835938)) / (FfxFloat32(18.8516) - FfxFloat32(18.6875) * p), FfxFloat32(6.27739)); } -/// Compute a linear value from a value in a gamma space. +/// Compute a linear value from a value in a PQ space. /// /// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. /// -/// @param [in] color The value to convert to linear in gamma space. -/// @param [in] power The power value used for the gamma curve. +/// @param [in] value The value to convert to linear in PQ space. /// /// @returns /// A value in linear space. /// -/// @ingroup GPU -FfxFloat32x2 ffxLinearFromGamma(FfxFloat32x2 color, FfxFloat32 power) +/// @ingroup GPUCore +FfxFloat32x2 ffxLinearFromPQ(FfxFloat32x2 value) { - return pow(color, ffxBroadcast2(power)); + FfxFloat32x2 p = pow(value, ffxBroadcast2(0.0126833)); + return pow(ffxSaturate(p - ffxBroadcast2(0.835938)) / (ffxBroadcast2(18.8516) - ffxBroadcast2(18.6875) * p), ffxBroadcast2(6.27739)); } -/// Compute a linear value from a value in a gamma space. +/// Compute a linear value from a value in a PQ space. /// /// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. /// -/// @param [in] color The value to convert to linear in gamma space. -/// @param [in] power The power value used for the gamma curve. +/// @param [in] value The value to convert to linear in PQ space. /// /// @returns /// A value in linear space. /// -/// @ingroup GPU -FfxFloat32x3 ffxLinearFromGamma(FfxFloat32x3 color, FfxFloat32 power) +/// @ingroup GPUCore +FfxFloat32x3 ffxLinearFromPQ(FfxFloat32x3 value) { - return pow(color, ffxBroadcast3(power)); + FfxFloat32x3 p = pow(value, ffxBroadcast3(0.0126833)); + return pow(ffxSaturate(p - ffxBroadcast3(0.835938)) / (ffxBroadcast3(18.8516) - ffxBroadcast3(18.6875) * p), ffxBroadcast3(6.27739)); } -/// Compute a linear value from a value in a PQ space. -/// -/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. +/// Compute an SRGB value from a linear value. /// -/// @param [in] value The value to convert to linear in PQ space. +/// @param [in] value The value to convert to SRGB from linear. /// /// @returns -/// A value in linear space. +/// A value in SRGB space. /// -/// @ingroup GPU -FfxFloat32 ffxLinearFromPQ(FfxFloat32 x) +/// @ingroup GPUCore +FfxFloat32 ffxSrgbFromLinear(FfxFloat32 value) { - FfxFloat32 p = pow(x, FfxFloat32(0.0126833)); - return pow(ffxSaturate(p - FfxFloat32(0.835938)) / (FfxFloat32(18.8516) - FfxFloat32(18.6875) * p), FfxFloat32(6.27739)); + FfxFloat32x3 j = FfxFloat32x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4); + FfxFloat32x2 k = FfxFloat32x2(1.055, -0.055); + return clamp(j.x, value * j.y, pow(value, j.z) * k.x + k.y); } -/// Compute a linear value from a value in a PQ space. +/// Compute an SRGB value from a linear value. /// -/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. -/// -/// @param [in] value The value to convert to linear in PQ space. +/// @param [in] value The value to convert to SRGB from linear. /// /// @returns -/// A value in linear space. +/// A value in SRGB space. /// -/// @ingroup GPU -FfxFloat32x2 ffxLinearFromPQ(FfxFloat32x2 x) +/// @ingroup GPUCore +FfxFloat32x2 ffxSrgbFromLinear(FfxFloat32x2 value) { - FfxFloat32x2 p = pow(x, ffxBroadcast2(0.0126833)); - return pow(ffxSaturate(p - ffxBroadcast2(0.835938)) / (ffxBroadcast2(18.8516) - ffxBroadcast2(18.6875) * p), ffxBroadcast2(6.27739)); + FfxFloat32x3 j = FfxFloat32x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4); + FfxFloat32x2 k = FfxFloat32x2(1.055, -0.055); + return clamp(j.xx, value * j.yy, pow(value, j.zz) * k.xx + k.yy); } -/// Compute a linear value from a value in a PQ space. +/// Compute an SRGB value from a linear value. /// -/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native. -/// -/// @param [in] value The value to convert to linear in PQ space. +/// @param [in] value The value to convert to SRGB from linear. /// /// @returns -/// A value in linear space. +/// A value in SRGB space. /// -/// @ingroup GPU -FfxFloat32x3 ffxLinearFromPQ(FfxFloat32x3 x) +/// @ingroup GPUCore +FfxFloat32x3 ffxSrgbFromLinear(FfxFloat32x3 value) { - FfxFloat32x3 p = pow(x, ffxBroadcast3(0.0126833)); - return pow(ffxSaturate(p - ffxBroadcast3(0.835938)) / (ffxBroadcast3(18.8516) - ffxBroadcast3(18.6875) * p), ffxBroadcast3(6.27739)); + FfxFloat32x3 j = FfxFloat32x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4); + FfxFloat32x2 k = FfxFloat32x2(1.055, -0.055); + return clamp(j.xxx, value * j.yyy, pow(value, j.zzz) * k.xxx + k.yyy); } /// Compute a linear value from a value in a SRGB space. @@ -2699,12 +2649,12 @@ FfxFloat32x3 ffxLinearFromPQ(FfxFloat32x3 x) /// @returns /// A value in linear space. /// -/// @ingroup GPU -FfxFloat32 ffxLinearFromSrgb(FfxFloat32 color) +/// @ingroup GPUCore +FfxFloat32 ffxLinearFromSrgb(FfxFloat32 value) { - FfxFloat32x3 j = FfxFloat32x3(0.04045 / 12.92, 1.0 / 12.92, 2.4); + FfxFloat32x3 j = FfxFloat32x3(0.04045, 1.0 / 12.92, 2.4); FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.055, 0.055 / 1.055); - return ffxZeroOneSelect(ffxZeroOneIsSigned(color - j.x), color * j.y, pow(color * k.x + k.y, j.z)); + return ffxZeroOneSelect(ffxZeroOneIsSigned(value - j.x), value * j.y, pow(value * k.x + k.y, j.z)); } /// Compute a linear value from a value in a SRGB space. @@ -2716,12 +2666,12 @@ FfxFloat32 ffxLinearFromSrgb(FfxFloat32 color) /// @returns /// A value in linear space. /// -/// @ingroup GPU -FfxFloat32x2 ffxLinearFromSrgb(FfxFloat32x2 color) +/// @ingroup GPUCore +FfxFloat32x2 ffxLinearFromSrgb(FfxFloat32x2 value) { - FfxFloat32x3 j = FfxFloat32x3(0.04045 / 12.92, 1.0 / 12.92, 2.4); + FfxFloat32x3 j = FfxFloat32x3(0.04045, 1.0 / 12.92, 2.4); FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.055, 0.055 / 1.055); - return ffxZeroOneSelect(ffxZeroOneIsSigned(color - j.xx), color * j.yy, pow(color * k.xx + k.yy, j.zz)); + return ffxZeroOneSelect(ffxZeroOneIsSigned(value - j.xx), value * j.yy, pow(value * k.xx + k.yy, j.zz)); } /// Compute a linear value from a value in a SRGB space. @@ -2733,30 +2683,32 @@ FfxFloat32x2 ffxLinearFromSrgb(FfxFloat32x2 color) /// @returns /// A value in linear space. /// -/// @ingroup GPU -FfxFloat32x3 ffxLinearFromSrgb(FfxFloat32x3 color) +/// @ingroup GPUCore +FfxFloat32x3 ffxLinearFromSrgb(FfxFloat32x3 value) { - FfxFloat32x3 j = FfxFloat32x3(0.04045 / 12.92, 1.0 / 12.92, 2.4); + FfxFloat32x3 j = FfxFloat32x3(0.04045, 1.0 / 12.92, 2.4); FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.055, 0.055 / 1.055); - return ffxZeroOneSelect(ffxZeroOneIsSigned(color - j.xxx), color * j.yyy, pow(color * k.xxx + k.yyy, j.zzz)); + return ffxZeroOneSelect(ffxZeroOneIsSigned(value - j.xxx), value * j.yyy, pow(value * k.xxx + k.yyy, j.zzz)); } /// A remapping of 64x1 to 8x8 imposing rotated 2x2 pixel quads in quad linear. -/// -/// 543210 -/// ====== -/// ..xxx. -/// yy...y +/// +/// Remap illustration: +/// +/// 543210 +/// ~~~~~~ +/// ..xxx. +/// yy...y /// /// @param [in] a The input 1D coordinates to remap. /// /// @returns /// The remapped 2D coordinates. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt32x2 ffxRemapForQuad(FfxUInt32 a) { - return FfxUInt32x2(bitfieldExtract(a, 1u, 3u), bitfieldInsertMask(bitfieldExtract(a, 3u, 3u), a, 1u)); + return FfxUInt32x2(ffxBitfieldExtract(a, 1u, 3u), ffxBitfieldInsertMask(ffxBitfieldExtract(a, 3u, 3u), a, 1u)); } /// A helper function performing a remap 64x1 to 8x8 remapping which is necessary for 2D wave reductions. @@ -2777,8 +2729,8 @@ FfxUInt32x2 ffxRemapForQuad(FfxUInt32 a) /// @returns /// The remapped 2D coordinates. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt32x2 ffxRemapForWaveReduction(FfxUInt32 a) { - return FfxUInt32x2(bitfieldInsertMask(bitfieldExtract(a, 2u, 3u), a, 1u), bitfieldInsertMask(bitfieldExtract(a, 3u, 3u), bitfieldExtract(a, 1u, 2u), 2u)); + return FfxUInt32x2(ffxBitfieldInsertMask(ffxBitfieldExtract(a, 2u, 3u), a, 1u), ffxBitfieldInsertMask(ffxBitfieldExtract(a, 3u, 3u), ffxBitfieldExtract(a, 1u, 2u), 2u)); } diff --git a/thirdparty/amd-fsr2/shaders/ffx_core_gpu_common_half.h b/thirdparty/amd-ffx/gpu/ffx_core_gpu_common_half.h similarity index 95% rename from thirdparty/amd-fsr2/shaders/ffx_core_gpu_common_half.h rename to thirdparty/amd-ffx/gpu/ffx_core_gpu_common_half.h index c46ccb36575d..6b071d590f5a 100644 --- a/thirdparty/amd-fsr2/shaders/ffx_core_gpu_common_half.h +++ b/thirdparty/amd-ffx/gpu/ffx_core_gpu_common_half.h @@ -1,13 +1,14 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. -// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// // Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal +// of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell // copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: +// furnished to do so, subject to the following conditions : +// // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // @@ -20,27 +21,27 @@ // THE SOFTWARE. #if FFX_HALF -#if FFX_HLSL_6_2 +#if FFX_HLSL_SM >= 62 /// A define value for 16bit positive infinity. /// -/// @ingroup GPU +/// @ingroup GPUCore #define FFX_POSITIVE_INFINITY_HALF FFX_TO_FLOAT16((uint16_t)0x7c00u) /// A define value for 16bit negative infinity. /// -/// @ingroup GPU +/// @ingroup GPUCore #define FFX_NEGATIVE_INFINITY_HALF FFX_TO_FLOAT16((uint16_t)0xfc00u) #else /// A define value for 16bit positive infinity. /// -/// @ingroup GPU +/// @ingroup GPUCore #define FFX_POSITIVE_INFINITY_HALF FFX_TO_FLOAT16(0x7c00u) /// A define value for 16bit negative infinity. /// -/// @ingroup GPU +/// @ingroup GPUCore #define FFX_NEGATIVE_INFINITY_HALF FFX_TO_FLOAT16(0xfc00u) -#endif // FFX_HLSL_6_2 +#endif // #if FFX_HLSL_SM>=62 /// Compute the min of two values. /// @@ -50,7 +51,7 @@ /// @returns /// The the lowest of two values. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16 ffxMin(FfxFloat16 x, FfxFloat16 y) { return min(x, y); @@ -64,7 +65,7 @@ FfxFloat16 ffxMin(FfxFloat16 x, FfxFloat16 y) /// @returns /// The the lowest of two values. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x2 ffxMin(FfxFloat16x2 x, FfxFloat16x2 y) { return min(x, y); @@ -78,7 +79,7 @@ FfxFloat16x2 ffxMin(FfxFloat16x2 x, FfxFloat16x2 y) /// @returns /// The the lowest of two values. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x3 ffxMin(FfxFloat16x3 x, FfxFloat16x3 y) { return min(x, y); @@ -92,7 +93,7 @@ FfxFloat16x3 ffxMin(FfxFloat16x3 x, FfxFloat16x3 y) /// @returns /// The the lowest of two values. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x4 ffxMin(FfxFloat16x4 x, FfxFloat16x4 y) { return min(x, y); @@ -106,7 +107,7 @@ FfxFloat16x4 ffxMin(FfxFloat16x4 x, FfxFloat16x4 y) /// @returns /// The the lowest of two values. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxInt16 ffxMin(FfxInt16 x, FfxInt16 y) { return min(x, y); @@ -120,7 +121,7 @@ FfxInt16 ffxMin(FfxInt16 x, FfxInt16 y) /// @returns /// The the lowest of two values. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxInt16x2 ffxMin(FfxInt16x2 x, FfxInt16x2 y) { return min(x, y); @@ -134,7 +135,7 @@ FfxInt16x2 ffxMin(FfxInt16x2 x, FfxInt16x2 y) /// @returns /// The the lowest of two values. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxInt16x3 ffxMin(FfxInt16x3 x, FfxInt16x3 y) { return min(x, y); @@ -148,7 +149,7 @@ FfxInt16x3 ffxMin(FfxInt16x3 x, FfxInt16x3 y) /// @returns /// The the lowest of two values. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxInt16x4 ffxMin(FfxInt16x4 x, FfxInt16x4 y) { return min(x, y); @@ -162,7 +163,7 @@ FfxInt16x4 ffxMin(FfxInt16x4 x, FfxInt16x4 y) /// @returns /// The the lowest of two values. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt16 ffxMin(FfxUInt16 x, FfxUInt16 y) { return min(x, y); @@ -176,7 +177,7 @@ FfxUInt16 ffxMin(FfxUInt16 x, FfxUInt16 y) /// @returns /// The the lowest of two values. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt16x2 ffxMin(FfxUInt16x2 x, FfxUInt16x2 y) { return min(x, y); @@ -190,7 +191,7 @@ FfxUInt16x2 ffxMin(FfxUInt16x2 x, FfxUInt16x2 y) /// @returns /// The the lowest of two values. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt16x3 ffxMin(FfxUInt16x3 x, FfxUInt16x3 y) { return min(x, y); @@ -204,7 +205,7 @@ FfxUInt16x3 ffxMin(FfxUInt16x3 x, FfxUInt16x3 y) /// @returns /// The the lowest of two values. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt16x4 ffxMin(FfxUInt16x4 x, FfxUInt16x4 y) { return min(x, y); @@ -218,7 +219,7 @@ FfxUInt16x4 ffxMin(FfxUInt16x4 x, FfxUInt16x4 y) /// @returns /// The the lowest of two values. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16 ffxMax(FfxFloat16 x, FfxFloat16 y) { return max(x, y); @@ -232,7 +233,7 @@ FfxFloat16 ffxMax(FfxFloat16 x, FfxFloat16 y) /// @returns /// The the lowest of two values. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x2 ffxMax(FfxFloat16x2 x, FfxFloat16x2 y) { return max(x, y); @@ -246,7 +247,7 @@ FfxFloat16x2 ffxMax(FfxFloat16x2 x, FfxFloat16x2 y) /// @returns /// The the lowest of two values. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x3 ffxMax(FfxFloat16x3 x, FfxFloat16x3 y) { return max(x, y); @@ -260,7 +261,7 @@ FfxFloat16x3 ffxMax(FfxFloat16x3 x, FfxFloat16x3 y) /// @returns /// The the lowest of two values. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x4 ffxMax(FfxFloat16x4 x, FfxFloat16x4 y) { return max(x, y); @@ -274,7 +275,7 @@ FfxFloat16x4 ffxMax(FfxFloat16x4 x, FfxFloat16x4 y) /// @returns /// The the lowest of two values. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxInt16 ffxMax(FfxInt16 x, FfxInt16 y) { return max(x, y); @@ -288,7 +289,7 @@ FfxInt16 ffxMax(FfxInt16 x, FfxInt16 y) /// @returns /// The the lowest of two values. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxInt16x2 ffxMax(FfxInt16x2 x, FfxInt16x2 y) { return max(x, y); @@ -302,7 +303,7 @@ FfxInt16x2 ffxMax(FfxInt16x2 x, FfxInt16x2 y) /// @returns /// The the lowest of two values. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxInt16x3 ffxMax(FfxInt16x3 x, FfxInt16x3 y) { return max(x, y); @@ -316,7 +317,7 @@ FfxInt16x3 ffxMax(FfxInt16x3 x, FfxInt16x3 y) /// @returns /// The the lowest of two values. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxInt16x4 ffxMax(FfxInt16x4 x, FfxInt16x4 y) { return max(x, y); @@ -330,7 +331,7 @@ FfxInt16x4 ffxMax(FfxInt16x4 x, FfxInt16x4 y) /// @returns /// The the lowest of two values. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt16 ffxMax(FfxUInt16 x, FfxUInt16 y) { return max(x, y); @@ -344,7 +345,7 @@ FfxUInt16 ffxMax(FfxUInt16 x, FfxUInt16 y) /// @returns /// The the lowest of two values. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt16x2 ffxMax(FfxUInt16x2 x, FfxUInt16x2 y) { return max(x, y); @@ -358,7 +359,7 @@ FfxUInt16x2 ffxMax(FfxUInt16x2 x, FfxUInt16x2 y) /// @returns /// The the lowest of two values. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt16x3 ffxMax(FfxUInt16x3 x, FfxUInt16x3 y) { return max(x, y); @@ -372,7 +373,7 @@ FfxUInt16x3 ffxMax(FfxUInt16x3 x, FfxUInt16x3 y) /// @returns /// The the lowest of two values. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt16x4 ffxMax(FfxUInt16x4 x, FfxUInt16x4 y) { return max(x, y); @@ -386,7 +387,7 @@ FfxUInt16x4 ffxMax(FfxUInt16x4 x, FfxUInt16x4 y) /// @returns /// The value of the first parameter raised to the power of the second. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16 ffxPow(FfxFloat16 x, FfxFloat16 y) { return pow(x, y); @@ -400,7 +401,7 @@ FfxFloat16 ffxPow(FfxFloat16 x, FfxFloat16 y) /// @returns /// The value of the first parameter raised to the power of the second. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x2 ffxPow(FfxFloat16x2 x, FfxFloat16x2 y) { return pow(x, y); @@ -414,7 +415,7 @@ FfxFloat16x2 ffxPow(FfxFloat16x2 x, FfxFloat16x2 y) /// @returns /// The value of the first parameter raised to the power of the second. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x3 ffxPow(FfxFloat16x3 x, FfxFloat16x3 y) { return pow(x, y); @@ -428,7 +429,7 @@ FfxFloat16x3 ffxPow(FfxFloat16x3 x, FfxFloat16x3 y) /// @returns /// The value of the first parameter raised to the power of the second. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x4 ffxPow(FfxFloat16x4 x, FfxFloat16x4 y) { return pow(x, y); @@ -441,7 +442,7 @@ FfxFloat16x4 ffxPow(FfxFloat16x4 x, FfxFloat16x4 y) /// @returns /// The the square root of x. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16 ffxSqrt(FfxFloat16 x) { return sqrt(x); @@ -454,7 +455,7 @@ FfxFloat16 ffxSqrt(FfxFloat16 x) /// @returns /// The the square root of x. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x2 ffxSqrt(FfxFloat16x2 x) { return sqrt(x); @@ -467,7 +468,7 @@ FfxFloat16x2 ffxSqrt(FfxFloat16x2 x) /// @returns /// The the square root of x. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x3 ffxSqrt(FfxFloat16x3 x) { return sqrt(x); @@ -480,7 +481,7 @@ FfxFloat16x3 ffxSqrt(FfxFloat16x3 x) /// @returns /// The the square root of x. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x4 ffxSqrt(FfxFloat16x4 x) { return sqrt(x); @@ -494,7 +495,7 @@ FfxFloat16x4 ffxSqrt(FfxFloat16x4 x) /// @returns /// The value of d with the sign bit from s. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16 ffxCopySignBitHalf(FfxFloat16 d, FfxFloat16 s) { return FFX_TO_FLOAT16(FFX_TO_UINT16(d) | (FFX_TO_UINT16(s) & FFX_BROADCAST_UINT16(0x8000u))); @@ -508,7 +509,7 @@ FfxFloat16 ffxCopySignBitHalf(FfxFloat16 d, FfxFloat16 s) /// @returns /// The value of d with the sign bit from s. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x2 ffxCopySignBitHalf(FfxFloat16x2 d, FfxFloat16x2 s) { return FFX_TO_FLOAT16X2(FFX_TO_UINT16X2(d) | (FFX_TO_UINT16X2(s) & FFX_BROADCAST_UINT16X2(0x8000u))); @@ -522,7 +523,7 @@ FfxFloat16x2 ffxCopySignBitHalf(FfxFloat16x2 d, FfxFloat16x2 s) /// @returns /// The value of d with the sign bit from s. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x3 ffxCopySignBitHalf(FfxFloat16x3 d, FfxFloat16x3 s) { return FFX_TO_FLOAT16X3(FFX_TO_UINT16X3(d) | (FFX_TO_UINT16X3(s) & FFX_BROADCAST_UINT16X3(0x8000u))); @@ -536,7 +537,7 @@ FfxFloat16x3 ffxCopySignBitHalf(FfxFloat16x3 d, FfxFloat16x3 s) /// @returns /// The value of d with the sign bit from s. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x4 ffxCopySignBitHalf(FfxFloat16x4 d, FfxFloat16x4 s) { return FFX_TO_FLOAT16X4(FFX_TO_UINT16X4(d) | (FFX_TO_UINT16X4(s) & FFX_BROADCAST_UINT16X4(0x8000u))); @@ -559,7 +560,7 @@ FfxFloat16x4 ffxCopySignBitHalf(FfxFloat16x4 d, FfxFloat16x4 s) /// @returns /// 1.0 when the value is negative, or 0.0 when the value is 0 or position. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16 ffxIsSignedHalf(FfxFloat16 m) { return ffxSaturate(m * FFX_BROADCAST_FLOAT16(FFX_NEGATIVE_INFINITY_HALF)); @@ -582,7 +583,7 @@ FfxFloat16 ffxIsSignedHalf(FfxFloat16 m) /// @returns /// 1.0 when the value is negative, or 0.0 when the value is 0 or position. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x2 ffxIsSignedHalf(FfxFloat16x2 m) { return ffxSaturate(m * FFX_BROADCAST_FLOAT16X2(FFX_NEGATIVE_INFINITY_HALF)); @@ -605,7 +606,7 @@ FfxFloat16x2 ffxIsSignedHalf(FfxFloat16x2 m) /// @returns /// 1.0 when the value is negative, or 0.0 when the value is 0 or position. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x3 ffxIsSignedHalf(FfxFloat16x3 m) { return ffxSaturate(m * FFX_BROADCAST_FLOAT16X3(FFX_NEGATIVE_INFINITY_HALF)); @@ -628,7 +629,7 @@ FfxFloat16x3 ffxIsSignedHalf(FfxFloat16x3 m) /// @returns /// 1.0 when the value is negative, or 0.0 when the value is 0 or position. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x4 ffxIsSignedHalf(FfxFloat16x4 m) { return ffxSaturate(m * FFX_BROADCAST_FLOAT16X4(FFX_NEGATIVE_INFINITY_HALF)); @@ -646,7 +647,7 @@ FfxFloat16x4 ffxIsSignedHalf(FfxFloat16x4 m) /// @returns /// 1.0 when the value is position, or 0.0 when the value is 0 or negative. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16 ffxIsGreaterThanZeroHalf(FfxFloat16 m) { return ffxSaturate(m * FFX_BROADCAST_FLOAT16(FFX_POSITIVE_INFINITY_HALF)); @@ -664,7 +665,7 @@ FfxFloat16 ffxIsGreaterThanZeroHalf(FfxFloat16 m) /// @returns /// 1.0 when the value is position, or 0.0 when the value is 0 or negative. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x2 ffxIsGreaterThanZeroHalf(FfxFloat16x2 m) { return ffxSaturate(m * FFX_BROADCAST_FLOAT16X2(FFX_POSITIVE_INFINITY_HALF)); @@ -682,7 +683,7 @@ FfxFloat16x2 ffxIsGreaterThanZeroHalf(FfxFloat16x2 m) /// @returns /// 1.0 when the value is position, or 0.0 when the value is 0 or negative. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x3 ffxIsGreaterThanZeroHalf(FfxFloat16x3 m) { return ffxSaturate(m * FFX_BROADCAST_FLOAT16X3(FFX_POSITIVE_INFINITY_HALF)); @@ -700,7 +701,7 @@ FfxFloat16x3 ffxIsGreaterThanZeroHalf(FfxFloat16x3 m) /// @returns /// 1.0 when the value is position, or 0.0 when the value is 0 or negative. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x4 ffxIsGreaterThanZeroHalf(FfxFloat16x4 m) { return ffxSaturate(m * FFX_BROADCAST_FLOAT16X4(FFX_POSITIVE_INFINITY_HALF)); @@ -720,7 +721,7 @@ FfxFloat16x4 ffxIsGreaterThanZeroHalf(FfxFloat16x4 m) /// @returns /// The sortable integer value. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt16 ffxFloatToSortableIntegerHalf(FfxUInt16 x) { return x ^ ((ffxBitShiftRightHalf(x, FFX_BROADCAST_UINT16(15))) | FFX_BROADCAST_UINT16(0x8000)); @@ -737,7 +738,7 @@ FfxUInt16 ffxFloatToSortableIntegerHalf(FfxUInt16 x) /// @returns /// The floating point value. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt16 ffxSortableIntegerToFloatHalf(FfxUInt16 x) { return x ^ ((~ffxBitShiftRightHalf(x, FFX_BROADCAST_UINT16(15))) | FFX_BROADCAST_UINT16(0x8000)); @@ -757,7 +758,7 @@ FfxUInt16 ffxSortableIntegerToFloatHalf(FfxUInt16 x) /// @returns /// The sortable integer values. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt16x2 ffxFloatToSortableIntegerHalf(FfxUInt16x2 x) { return x ^ ((ffxBitShiftRightHalf(x, FFX_BROADCAST_UINT16X2(15))) | FFX_BROADCAST_UINT16X2(0x8000)); @@ -774,7 +775,7 @@ FfxUInt16x2 ffxFloatToSortableIntegerHalf(FfxUInt16x2 x) /// @returns /// The floating point values. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt16x2 ffxSortableIntegerToFloatHalf(FfxUInt16x2 x) { return x ^ ((~ffxBitShiftRightHalf(x, FFX_BROADCAST_UINT16X2(15))) | FFX_BROADCAST_UINT16X2(0x8000)); @@ -790,7 +791,7 @@ FfxUInt16x2 ffxSortableIntegerToFloatHalf(FfxUInt16x2 x) /// @returns /// The packed integer value. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt32 ffxPackBytesZeroY0ZeroX0(FfxUInt32x2 i) { return ((i.x) & 0xffu) | ((i.y << 16) & 0xff0000u); @@ -806,7 +807,7 @@ FfxUInt32 ffxPackBytesZeroY0ZeroX0(FfxUInt32x2 i) /// @returns /// The packed integer value. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt32 ffxPackBytesZeroY1ZeroX1(FfxUInt32x2 i) { return ((i.x >> 8) & 0xffu) | ((i.y << 8) & 0xff0000u); @@ -822,7 +823,7 @@ FfxUInt32 ffxPackBytesZeroY1ZeroX1(FfxUInt32x2 i) /// @returns /// The packed integer value. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt32 ffxPackBytesZeroY2ZeroX2(FfxUInt32x2 i) { return ((i.x >> 16) & 0xffu) | ((i.y) & 0xff0000u); @@ -838,7 +839,7 @@ FfxUInt32 ffxPackBytesZeroY2ZeroX2(FfxUInt32x2 i) /// @returns /// The packed integer value. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt32 ffxPackBytesZeroY3ZeroX3(FfxUInt32x2 i) { return ((i.x >> 24) & 0xffu) | ((i.y >> 8) & 0xff0000u); @@ -854,7 +855,7 @@ FfxUInt32 ffxPackBytesZeroY3ZeroX3(FfxUInt32x2 i) /// @returns /// The packed integer value. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt32 ffxPackBytesY3Y2Y1X0(FfxUInt32x2 i) { return ((i.x) & 0x000000ffu) | (i.y & 0xffffff00u); @@ -870,7 +871,7 @@ FfxUInt32 ffxPackBytesY3Y2Y1X0(FfxUInt32x2 i) /// @returns /// The packed integer value. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt32 ffxPackBytesY3Y2Y1X2(FfxUInt32x2 i) { return ((i.x >> 16) & 0x000000ffu) | (i.y & 0xffffff00u); @@ -886,7 +887,7 @@ FfxUInt32 ffxPackBytesY3Y2Y1X2(FfxUInt32x2 i) /// @returns /// The packed integer value. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt32 ffxPackBytesY3Y2X0Y0(FfxUInt32x2 i) { return ((i.x << 8) & 0x0000ff00u) | (i.y & 0xffff00ffu); @@ -902,7 +903,7 @@ FfxUInt32 ffxPackBytesY3Y2X0Y0(FfxUInt32x2 i) /// @returns /// The packed integer value. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt32 ffxPackBytesY3Y2X2Y0(FfxUInt32x2 i) { return ((i.x >> 8) & 0x0000ff00u) | (i.y & 0xffff00ffu); @@ -918,7 +919,7 @@ FfxUInt32 ffxPackBytesY3Y2X2Y0(FfxUInt32x2 i) /// @returns /// The packed integer value. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt32 ffxPackBytesY3X0Y1Y0(FfxUInt32x2 i) { return ((i.x << 16) & 0x00ff0000u) | (i.y & 0xff00ffffu); @@ -934,7 +935,7 @@ FfxUInt32 ffxPackBytesY3X0Y1Y0(FfxUInt32x2 i) /// @returns /// The packed integer value. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt32 ffxPackBytesY3X2Y1Y0(FfxUInt32x2 i) { return ((i.x) & 0x00ff0000u) | (i.y & 0xff00ffffu); @@ -950,7 +951,7 @@ FfxUInt32 ffxPackBytesY3X2Y1Y0(FfxUInt32x2 i) /// @returns /// The packed integer value. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt32 ffxPackBytesX0Y2Y1Y0(FfxUInt32x2 i) { return ((i.x << 24) & 0xff000000u) | (i.y & 0x00ffffffu); @@ -966,7 +967,7 @@ FfxUInt32 ffxPackBytesX0Y2Y1Y0(FfxUInt32x2 i) /// @returns /// The packed integer value. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt32 ffxPackBytesX2Y2Y1Y0(FfxUInt32x2 i) { return ((i.x << 8) & 0xff000000u) | (i.y & 0x00ffffffu); @@ -982,7 +983,7 @@ FfxUInt32 ffxPackBytesX2Y2Y1Y0(FfxUInt32x2 i) /// @returns /// The packed integer value. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt32 ffxPackBytesY2X2Y0X0(FfxUInt32x2 i) { return ((i.x) & 0x00ff00ffu) | ((i.y << 8) & 0xff00ff00u); @@ -998,7 +999,7 @@ FfxUInt32 ffxPackBytesY2X2Y0X0(FfxUInt32x2 i) /// @returns /// The packed integer value. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt32 ffxPackBytesY2Y0X2X0(FfxUInt32x2 i) { return (((i.x) & 0xffu) | ((i.x >> 8) & 0xff00u) | ((i.y << 16) & 0xff0000u) | ((i.y << 8) & 0xff000000u)); @@ -1012,7 +1013,7 @@ FfxUInt32 ffxPackBytesY2Y0X2X0(FfxUInt32x2 i) /// @returns /// The packed FfxUInt32x2 value. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt16x2 ffxPackX0Y0X1Y1UnsignedToUint16x2(FfxFloat16x2 x, FfxFloat16x2 y) { x *= FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0); @@ -1037,7 +1038,7 @@ FfxUInt16x2 ffxPackX0Y0X1Y1UnsignedToUint16x2(FfxFloat16x2 x, FfxFloat16x2 y) /// @returns /// The packed FfxUInt32x2 value. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt32x2 ffxPermuteUByte0Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) { FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0))); @@ -1061,7 +1062,7 @@ FfxUInt32x2 ffxPermuteUByte0Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) /// @returns /// The packed FfxUInt32x2 value. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt32x2 ffxPermuteUByte1Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) { FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0))); @@ -1085,7 +1086,7 @@ FfxUInt32x2 ffxPermuteUByte1Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) /// @returns /// The packed FfxUInt32x2 value. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt32x2 ffxPermuteUByte2Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) { FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0))); @@ -1109,7 +1110,7 @@ FfxUInt32x2 ffxPermuteUByte2Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) /// @returns /// The packed FfxUInt32x2 value. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt32x2 ffxPermuteUByte3Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) { FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0))); @@ -1123,7 +1124,7 @@ FfxUInt32x2 ffxPermuteUByte3Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) /// @returns /// The unpacked FfxFloat16x2. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x2 ffxPermuteUByte0Uint2ToFloat16x2(FfxUInt32x2 i) { return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY0ZeroX0(i))) * FFX_BROADCAST_FLOAT16X2(32768.0); @@ -1136,7 +1137,7 @@ FfxFloat16x2 ffxPermuteUByte0Uint2ToFloat16x2(FfxUInt32x2 i) /// @returns /// The unpacked FfxFloat16x2. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x2 ffxPermuteUByte1Uint2ToFloat16x2(FfxUInt32x2 i) { return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY1ZeroX1(i))) * FFX_BROADCAST_FLOAT16X2(32768.0); @@ -1149,7 +1150,7 @@ FfxFloat16x2 ffxPermuteUByte1Uint2ToFloat16x2(FfxUInt32x2 i) /// @returns /// The unpacked FfxFloat16x2. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x2 ffxPermuteUByte2Uint2ToFloat16x2(FfxUInt32x2 i) { return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY2ZeroX2(i))) * FFX_BROADCAST_FLOAT16X2(32768.0); @@ -1162,7 +1163,7 @@ FfxFloat16x2 ffxPermuteUByte2Uint2ToFloat16x2(FfxUInt32x2 i) /// @returns /// The unpacked FfxFloat16x2. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x2 ffxPermuteUByte3Uint2ToFloat16x2(FfxUInt32x2 i) { return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY3ZeroX3(i))) * FFX_BROADCAST_FLOAT16X2(32768.0); @@ -1176,7 +1177,7 @@ FfxFloat16x2 ffxPermuteUByte3Uint2ToFloat16x2(FfxUInt32x2 i) /// @returns /// The packed FfxUInt32x2 value. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt16x2 ffxPackX0Y0X1Y1SignedToUint16x2(FfxFloat16x2 x, FfxFloat16x2 y) { x = x * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0); @@ -1195,7 +1196,7 @@ FfxUInt16x2 ffxPackX0Y0X1Y1SignedToUint16x2(FfxFloat16x2 x, FfxFloat16x2 y) /// @returns /// The packed FfxUInt32x2 value. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt32x2 ffxPermuteSByte0Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) { FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0))); @@ -1213,7 +1214,7 @@ FfxUInt32x2 ffxPermuteSByte0Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) /// @returns /// The packed FfxUInt32x2 value. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt32x2 ffxPermuteSByte1Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) { FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0))); @@ -1231,7 +1232,7 @@ FfxUInt32x2 ffxPermuteSByte1Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) /// @returns /// The packed FfxUInt32x2 value. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt32x2 ffxPermuteSByte2Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) { FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0))); @@ -1249,7 +1250,7 @@ FfxUInt32x2 ffxPermuteSByte2Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) /// @returns /// The packed FfxUInt32x2 value. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt32x2 ffxPermuteSByte3Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) { FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0))); @@ -1270,7 +1271,7 @@ FfxUInt32x2 ffxPermuteSByte3Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) /// @returns /// The packed FfxUInt32x2 value. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt32x2 ffxPermuteZeroBasedSByte0Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) { FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0))) ^ 0x00800080u; @@ -1291,7 +1292,7 @@ FfxUInt32x2 ffxPermuteZeroBasedSByte0Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x /// @returns /// The packed FfxUInt32x2 value. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt32x2 ffxPermuteZeroBasedSByte1Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) { FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0))) ^ 0x00800080u; @@ -1312,7 +1313,7 @@ FfxUInt32x2 ffxPermuteZeroBasedSByte1Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x /// @returns /// The packed FfxUInt32x2 value. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt32x2 ffxPermuteZeroBasedSByte2Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) { FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0))) ^ 0x00800080u; @@ -1333,7 +1334,7 @@ FfxUInt32x2 ffxPermuteZeroBasedSByte2Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x /// @returns /// The packed FfxUInt32x2 value. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt32x2 ffxPermuteZeroBasedSByte3Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i) { FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0))) ^ 0x00800080u; @@ -1349,7 +1350,7 @@ FfxUInt32x2 ffxPermuteZeroBasedSByte3Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x /// @returns /// The unpacked FfxFloat16x2. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x2 ffxPermuteSByte0Uint2ToFloat16x2(FfxUInt32x2 i) { return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY0ZeroX0(i))) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25); @@ -1364,7 +1365,7 @@ FfxFloat16x2 ffxPermuteSByte0Uint2ToFloat16x2(FfxUInt32x2 i) /// @returns /// The unpacked FfxFloat16x2. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x2 ffxPermuteSByte1Uint2ToFloat16x2(FfxUInt32x2 i) { return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY1ZeroX1(i))) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25); @@ -1379,7 +1380,7 @@ FfxFloat16x2 ffxPermuteSByte1Uint2ToFloat16x2(FfxUInt32x2 i) /// @returns /// The unpacked FfxFloat16x2. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x2 ffxPermuteSByte2Uint2ToFloat16x2(FfxUInt32x2 i) { return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY2ZeroX2(i))) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25); @@ -1394,7 +1395,7 @@ FfxFloat16x2 ffxPermuteSByte2Uint2ToFloat16x2(FfxUInt32x2 i) /// @returns /// The unpacked FfxFloat16x2. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x2 ffxPermuteSByte3Uint2ToFloat16x2(FfxUInt32x2 i) { return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY3ZeroX3(i))) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25); @@ -1409,7 +1410,7 @@ FfxFloat16x2 ffxPermuteSByte3Uint2ToFloat16x2(FfxUInt32x2 i) /// @returns /// The unpacked FfxFloat16x2. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x2 ffxPermuteZeroBasedSByte0Uint2ToFloat16x2(FfxUInt32x2 i) { return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY0ZeroX0(i) ^ 0x00800080u)) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25); @@ -1424,7 +1425,7 @@ FfxFloat16x2 ffxPermuteZeroBasedSByte0Uint2ToFloat16x2(FfxUInt32x2 i) /// @returns /// The unpacked FfxFloat16x2. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x2 ffxPermuteZeroBasedSByte1Uint2ToFloat16x2(FfxUInt32x2 i) { return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY1ZeroX1(i) ^ 0x00800080u)) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25); @@ -1439,7 +1440,7 @@ FfxFloat16x2 ffxPermuteZeroBasedSByte1Uint2ToFloat16x2(FfxUInt32x2 i) /// @returns /// The unpacked FfxFloat16x2. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x2 ffxPermuteZeroBasedSByte2Uint2ToFloat16x2(FfxUInt32x2 i) { return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY2ZeroX2(i) ^ 0x00800080u)) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25); @@ -1454,7 +1455,7 @@ FfxFloat16x2 ffxPermuteZeroBasedSByte2Uint2ToFloat16x2(FfxUInt32x2 i) /// @returns /// The unpacked FfxFloat16x2. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x2 ffxPermuteZeroBasedSByte3Uint2ToFloat16x2(FfxUInt32x2 i) { return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY3ZeroX3(i) ^ 0x00800080u)) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25); @@ -1473,7 +1474,7 @@ FfxFloat16x2 ffxPermuteZeroBasedSByte3Uint2ToFloat16x2(FfxUInt32x2 i) /// @returns /// An approximation of the square root, estimated to low quality. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16 ffxApproximateSqrtHalf(FfxFloat16 a) { return FFX_TO_FLOAT16((FFX_TO_UINT16(a) >> FFX_BROADCAST_UINT16(1)) + FFX_BROADCAST_UINT16(0x1de2)); @@ -1492,7 +1493,7 @@ FfxFloat16 ffxApproximateSqrtHalf(FfxFloat16 a) /// @returns /// An approximation of the square root, estimated to low quality. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x2 ffxApproximateSqrtHalf(FfxFloat16x2 a) { return FFX_TO_FLOAT16X2((FFX_TO_UINT16X2(a) >> FFX_BROADCAST_UINT16X2(1)) + FFX_BROADCAST_UINT16X2(0x1de2)); @@ -1511,7 +1512,7 @@ FfxFloat16x2 ffxApproximateSqrtHalf(FfxFloat16x2 a) /// @returns /// An approximation of the square root, estimated to low quality. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x3 ffxApproximateSqrtHalf(FfxFloat16x3 a) { return FFX_TO_FLOAT16X3((FFX_TO_UINT16X3(a) >> FFX_BROADCAST_UINT16X3(1)) + FFX_BROADCAST_UINT16X3(0x1de2)); @@ -1530,7 +1531,7 @@ FfxFloat16x3 ffxApproximateSqrtHalf(FfxFloat16x3 a) /// @returns /// An approximation of the reciprocal, estimated to low quality. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16 ffxApproximateReciprocalHalf(FfxFloat16 a) { return FFX_TO_FLOAT16(FFX_BROADCAST_UINT16(0x7784) - FFX_TO_UINT16(a)); @@ -1549,7 +1550,7 @@ FfxFloat16 ffxApproximateReciprocalHalf(FfxFloat16 a) /// @returns /// An approximation of the reciprocal, estimated to low quality. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x2 ffxApproximateReciprocalHalf(FfxFloat16x2 a) { return FFX_TO_FLOAT16X2(FFX_BROADCAST_UINT16X2(0x7784) - FFX_TO_UINT16X2(a)); @@ -1568,7 +1569,7 @@ FfxFloat16x2 ffxApproximateReciprocalHalf(FfxFloat16x2 a) /// @returns /// An approximation of the reciprocal, estimated to low quality. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x3 ffxApproximateReciprocalHalf(FfxFloat16x3 a) { return FFX_TO_FLOAT16X3(FFX_BROADCAST_UINT16X3(0x7784) - FFX_TO_UINT16X3(a)); @@ -1587,7 +1588,7 @@ FfxFloat16x3 ffxApproximateReciprocalHalf(FfxFloat16x3 a) /// @returns /// An approximation of the reciprocal, estimated to low quality. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x4 ffxApproximateReciprocalHalf(FfxFloat16x4 a) { return FFX_TO_FLOAT16X4(FFX_BROADCAST_UINT16X4(0x7784) - FFX_TO_UINT16X4(a)); @@ -1606,7 +1607,7 @@ FfxFloat16x4 ffxApproximateReciprocalHalf(FfxFloat16x4 a) /// @returns /// An approximation of the reciprocal, estimated to medium quality. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16 ffxApproximateReciprocalMediumHalf(FfxFloat16 a) { FfxFloat16 b = FFX_TO_FLOAT16(FFX_BROADCAST_UINT16(0x778d) - FFX_TO_UINT16(a)); @@ -1626,7 +1627,7 @@ FfxFloat16 ffxApproximateReciprocalMediumHalf(FfxFloat16 a) /// @returns /// An approximation of the reciprocal, estimated to medium quality. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x2 ffxApproximateReciprocalMediumHalf(FfxFloat16x2 a) { FfxFloat16x2 b = FFX_TO_FLOAT16X2(FFX_BROADCAST_UINT16X2(0x778d) - FFX_TO_UINT16X2(a)); @@ -1646,7 +1647,7 @@ FfxFloat16x2 ffxApproximateReciprocalMediumHalf(FfxFloat16x2 a) /// @returns /// An approximation of the reciprocal, estimated to medium quality. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x3 ffxApproximateReciprocalMediumHalf(FfxFloat16x3 a) { FfxFloat16x3 b = FFX_TO_FLOAT16X3(FFX_BROADCAST_UINT16X3(0x778d) - FFX_TO_UINT16X3(a)); @@ -1666,7 +1667,7 @@ FfxFloat16x3 ffxApproximateReciprocalMediumHalf(FfxFloat16x3 a) /// @returns /// An approximation of the reciprocal, estimated to medium quality. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x4 ffxApproximateReciprocalMediumHalf(FfxFloat16x4 a) { FfxFloat16x4 b = FFX_TO_FLOAT16X4(FFX_BROADCAST_UINT16X4(0x778d) - FFX_TO_UINT16X4(a)); @@ -1686,7 +1687,7 @@ FfxFloat16x4 ffxApproximateReciprocalMediumHalf(FfxFloat16x4 a) /// @returns /// An approximation of the reciprocal of the square root, estimated to low quality. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16 ffxApproximateReciprocalSquareRootHalf(FfxFloat16 a) { return FFX_TO_FLOAT16(FFX_BROADCAST_UINT16(0x59a3) - (FFX_TO_UINT16(a) >> FFX_BROADCAST_UINT16(1))); @@ -1705,7 +1706,7 @@ FfxFloat16 ffxApproximateReciprocalSquareRootHalf(FfxFloat16 a) /// @returns /// An approximation of the reciprocal of the square root, estimated to low quality. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x2 ffxApproximateReciprocalSquareRootHalf(FfxFloat16x2 a) { return FFX_TO_FLOAT16X2(FFX_BROADCAST_UINT16X2(0x59a3) - (FFX_TO_UINT16X2(a) >> FFX_BROADCAST_UINT16X2(1))); @@ -1724,7 +1725,7 @@ FfxFloat16x2 ffxApproximateReciprocalSquareRootHalf(FfxFloat16x2 a) /// @returns /// An approximation of the reciprocal of the square root, estimated to low quality. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x3 ffxApproximateReciprocalSquareRootHalf(FfxFloat16x3 a) { return FFX_TO_FLOAT16X3(FFX_BROADCAST_UINT16X3(0x59a3) - (FFX_TO_UINT16X3(a) >> FFX_BROADCAST_UINT16X3(1))); @@ -1743,7 +1744,7 @@ FfxFloat16x3 ffxApproximateReciprocalSquareRootHalf(FfxFloat16x3 a) /// @returns /// An approximation of the reciprocal of the square root, estimated to low quality. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x4 ffxApproximateReciprocalSquareRootHalf(FfxFloat16x4 a) { return FFX_TO_FLOAT16X4(FFX_BROADCAST_UINT16X4(0x59a3) - (FFX_TO_UINT16X4(a) >> FFX_BROADCAST_UINT16X4(1))); @@ -1833,7 +1834,7 @@ FfxFloat16x2 ffxParabolicSinCosHalf(FfxFloat16 x) /// @returns /// Result of the AND operation. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt16 ffxZeroOneAndHalf(FfxUInt16 x, FfxUInt16 y) { return min(x, y); @@ -1847,7 +1848,7 @@ FfxUInt16 ffxZeroOneAndHalf(FfxUInt16 x, FfxUInt16 y) /// @returns /// Result of the AND operation. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt16x2 ffxZeroOneAndHalf(FfxUInt16x2 x, FfxUInt16x2 y) { return min(x, y); @@ -1861,7 +1862,7 @@ FfxUInt16x2 ffxZeroOneAndHalf(FfxUInt16x2 x, FfxUInt16x2 y) /// @returns /// Result of the AND operation. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt16x3 ffxZeroOneAndHalf(FfxUInt16x3 x, FfxUInt16x3 y) { return min(x, y); @@ -1875,7 +1876,7 @@ FfxUInt16x3 ffxZeroOneAndHalf(FfxUInt16x3 x, FfxUInt16x3 y) /// @returns /// Result of the AND operation. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt16x4 ffxZeroOneAndHalf(FfxUInt16x4 x, FfxUInt16x4 y) { return min(x, y); @@ -1889,7 +1890,7 @@ FfxUInt16x4 ffxZeroOneAndHalf(FfxUInt16x4 x, FfxUInt16x4 y) /// @returns /// Result of the NOT operation. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt16 ffxZeroOneNotHalf(FfxUInt16 x) { return x ^ FFX_BROADCAST_UINT16(1); @@ -1903,7 +1904,7 @@ FfxUInt16 ffxZeroOneNotHalf(FfxUInt16 x) /// @returns /// Result of the NOT operation. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt16x2 ffxZeroOneNotHalf(FfxUInt16x2 x) { return x ^ FFX_BROADCAST_UINT16X2(1); @@ -1917,7 +1918,7 @@ FfxUInt16x2 ffxZeroOneNotHalf(FfxUInt16x2 x) /// @returns /// Result of the NOT operation. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt16x3 ffxZeroOneNotHalf(FfxUInt16x3 x) { return x ^ FFX_BROADCAST_UINT16X3(1); @@ -1931,7 +1932,7 @@ FfxUInt16x3 ffxZeroOneNotHalf(FfxUInt16x3 x) /// @returns /// Result of the NOT operation. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt16x4 ffxZeroOneNotHalf(FfxUInt16x4 x) { return x ^ FFX_BROADCAST_UINT16X4(1); @@ -1945,7 +1946,7 @@ FfxUInt16x4 ffxZeroOneNotHalf(FfxUInt16x4 x) /// @returns /// Result of the OR operation. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt16 ffxZeroOneOrHalf(FfxUInt16 x, FfxUInt16 y) { return max(x, y); @@ -1959,7 +1960,7 @@ FfxUInt16 ffxZeroOneOrHalf(FfxUInt16 x, FfxUInt16 y) /// @returns /// Result of the OR operation. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt16x2 ffxZeroOneOrHalf(FfxUInt16x2 x, FfxUInt16x2 y) { return max(x, y); @@ -1973,7 +1974,7 @@ FfxUInt16x2 ffxZeroOneOrHalf(FfxUInt16x2 x, FfxUInt16x2 y) /// @returns /// Result of the OR operation. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt16x3 ffxZeroOneOrHalf(FfxUInt16x3 x, FfxUInt16x3 y) { return max(x, y); @@ -1987,7 +1988,7 @@ FfxUInt16x3 ffxZeroOneOrHalf(FfxUInt16x3 x, FfxUInt16x3 y) /// @returns /// Result of the OR operation. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt16x4 ffxZeroOneOrHalf(FfxUInt16x4 x, FfxUInt16x4 y) { return max(x, y); @@ -2000,7 +2001,7 @@ FfxUInt16x4 ffxZeroOneOrHalf(FfxUInt16x4 x, FfxUInt16x4 y) /// @returns /// The converted Uint value. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt16 ffxZeroOneFloat16ToUint16(FfxFloat16 x) { return FFX_TO_UINT16(x * FFX_TO_FLOAT16(FFX_TO_UINT16(1))); @@ -2013,7 +2014,7 @@ FfxUInt16 ffxZeroOneFloat16ToUint16(FfxFloat16 x) /// @returns /// The converted Uint value. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt16x2 ffxZeroOneFloat16x2ToUint16x2(FfxFloat16x2 x) { return FFX_TO_UINT16X2(x * FFX_TO_FLOAT16X2(FfxUInt16x2(1, 1))); @@ -2026,7 +2027,7 @@ FfxUInt16x2 ffxZeroOneFloat16x2ToUint16x2(FfxFloat16x2 x) /// @returns /// The converted Uint value. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt16x3 ffxZeroOneFloat16x3ToUint16x3(FfxFloat16x3 x) { return FFX_TO_UINT16X3(x * FFX_TO_FLOAT16X3(FfxUInt16x3(1, 1, 1))); @@ -2039,7 +2040,7 @@ FfxUInt16x3 ffxZeroOneFloat16x3ToUint16x3(FfxFloat16x3 x) /// @returns /// The converted Uint value. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt16x4 ffxZeroOneFloat16x4ToUint16x4(FfxFloat16x4 x) { return FFX_TO_UINT16X4(x * FFX_TO_FLOAT16X4(FfxUInt16x4(1, 1, 1, 1))); @@ -2052,7 +2053,7 @@ FfxUInt16x4 ffxZeroOneFloat16x4ToUint16x4(FfxFloat16x4 x) /// @returns /// The converted half-precision FfxFloat32 value. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16 ffxZeroOneUint16ToFloat16(FfxUInt16 x) { return FFX_TO_FLOAT16(x * FFX_TO_UINT16(FFX_TO_FLOAT16(1.0))); @@ -2065,7 +2066,7 @@ FfxFloat16 ffxZeroOneUint16ToFloat16(FfxUInt16 x) /// @returns /// The converted half-precision FfxFloat32 value. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x2 ffxZeroOneUint16x2ToFloat16x2(FfxUInt16x2 x) { return FFX_TO_FLOAT16X2(x * FFX_TO_UINT16X2(FfxUInt16x2(FFX_TO_FLOAT16(1.0), FFX_TO_FLOAT16(1.0)))); @@ -2078,7 +2079,7 @@ FfxFloat16x2 ffxZeroOneUint16x2ToFloat16x2(FfxUInt16x2 x) /// @returns /// The converted half-precision FfxFloat32 value. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x3 ffxZeroOneUint16x3ToFloat16x3(FfxUInt16x3 x) { return FFX_TO_FLOAT16X3(x * FFX_TO_UINT16X3(FfxUInt16x3(FFX_TO_FLOAT16(1.0), FFX_TO_FLOAT16(1.0), FFX_TO_FLOAT16(1.0)))); @@ -2091,7 +2092,7 @@ FfxFloat16x3 ffxZeroOneUint16x3ToFloat16x3(FfxUInt16x3 x) /// @returns /// The converted half-precision FfxFloat32 value. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x4 ffxZeroOneUint16x4ToFloat16x4(FfxUInt16x4 x) { return FFX_TO_FLOAT16X4(x * FFX_TO_UINT16X4(FfxUInt16x4(FFX_TO_FLOAT16(1.0), FFX_TO_FLOAT16(1.0), FFX_TO_FLOAT16(1.0), FFX_TO_FLOAT16(1.0)))); @@ -2105,7 +2106,7 @@ FfxFloat16x4 ffxZeroOneUint16x4ToFloat16x4(FfxUInt16x4 x) /// @returns /// Result of the AND operation. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16 ffxZeroOneAndHalf(FfxFloat16 x, FfxFloat16 y) { return min(x, y); @@ -2119,7 +2120,7 @@ FfxFloat16 ffxZeroOneAndHalf(FfxFloat16 x, FfxFloat16 y) /// @returns /// Result of the AND operation. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x2 ffxZeroOneAndHalf(FfxFloat16x2 x, FfxFloat16x2 y) { return min(x, y); @@ -2133,7 +2134,7 @@ FfxFloat16x2 ffxZeroOneAndHalf(FfxFloat16x2 x, FfxFloat16x2 y) /// @returns /// Result of the AND operation. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x3 ffxZeroOneAndHalf(FfxFloat16x3 x, FfxFloat16x3 y) { return min(x, y); @@ -2147,7 +2148,7 @@ FfxFloat16x3 ffxZeroOneAndHalf(FfxFloat16x3 x, FfxFloat16x3 y) /// @returns /// Result of the AND operation. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x4 ffxZeroOneAndHalf(FfxFloat16x4 x, FfxFloat16x4 y) { return min(x, y); @@ -2161,7 +2162,7 @@ FfxFloat16x4 ffxZeroOneAndHalf(FfxFloat16x4 x, FfxFloat16x4 y) /// @returns /// Result of the AND NOT operation. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16 ffxSignedZeroOneAndOrHalf(FfxFloat16 x, FfxFloat16 y) { return (-x) * y + FFX_BROADCAST_FLOAT16(1.0); @@ -2175,7 +2176,7 @@ FfxFloat16 ffxSignedZeroOneAndOrHalf(FfxFloat16 x, FfxFloat16 y) /// @returns /// Result of the AND NOT operation. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x2 ffxSignedZeroOneAndOrHalf(FfxFloat16x2 x, FfxFloat16x2 y) { return (-x) * y + FFX_BROADCAST_FLOAT16X2(1.0); @@ -2189,7 +2190,7 @@ FfxFloat16x2 ffxSignedZeroOneAndOrHalf(FfxFloat16x2 x, FfxFloat16x2 y) /// @returns /// Result of the AND NOT operation. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x3 ffxSignedZeroOneAndOrHalf(FfxFloat16x3 x, FfxFloat16x3 y) { return (-x) * y + FFX_BROADCAST_FLOAT16X3(1.0); @@ -2203,7 +2204,7 @@ FfxFloat16x3 ffxSignedZeroOneAndOrHalf(FfxFloat16x3 x, FfxFloat16x3 y) /// @returns /// Result of the AND NOT operation. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x4 ffxSignedZeroOneAndOrHalf(FfxFloat16x4 x, FfxFloat16x4 y) { return (-x) * y + FFX_BROADCAST_FLOAT16X4(1.0); @@ -2219,7 +2220,7 @@ FfxFloat16x4 ffxSignedZeroOneAndOrHalf(FfxFloat16x4 x, FfxFloat16x4 y) /// @returns /// Result of the AND OR operation. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16 ffxZeroOneAndOrHalf(FfxFloat16 x, FfxFloat16 y, FfxFloat16 z) { return ffxSaturate(x * y + z); @@ -2235,7 +2236,7 @@ FfxFloat16 ffxZeroOneAndOrHalf(FfxFloat16 x, FfxFloat16 y, FfxFloat16 z) /// @returns /// Result of the AND OR operation. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x2 ffxZeroOneAndOrHalf(FfxFloat16x2 x, FfxFloat16x2 y, FfxFloat16x2 z) { return ffxSaturate(x * y + z); @@ -2251,7 +2252,7 @@ FfxFloat16x2 ffxZeroOneAndOrHalf(FfxFloat16x2 x, FfxFloat16x2 y, FfxFloat16x2 z) /// @returns /// Result of the AND OR operation. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x3 ffxZeroOneAndOrHalf(FfxFloat16x3 x, FfxFloat16x3 y, FfxFloat16x3 z) { return ffxSaturate(x * y + z); @@ -2267,7 +2268,7 @@ FfxFloat16x3 ffxZeroOneAndOrHalf(FfxFloat16x3 x, FfxFloat16x3 y, FfxFloat16x3 z) /// @returns /// Result of the AND OR operation. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x4 ffxZeroOneAndOrHalf(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16x4 z) { return ffxSaturate(x * y + z); @@ -2280,7 +2281,7 @@ FfxFloat16x4 ffxZeroOneAndOrHalf(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16x4 z) /// @returns /// Result of the greater than zero comparison. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16 ffxZeroOneIsGreaterThanZeroHalf(FfxFloat16 x) { return ffxSaturate(x * FFX_BROADCAST_FLOAT16(FFX_POSITIVE_INFINITY_HALF)); @@ -2293,7 +2294,7 @@ FfxFloat16 ffxZeroOneIsGreaterThanZeroHalf(FfxFloat16 x) /// @returns /// Result of the greater than zero comparison. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x2 ffxZeroOneIsGreaterThanZeroHalf(FfxFloat16x2 x) { return ffxSaturate(x * FFX_BROADCAST_FLOAT16X2(FFX_POSITIVE_INFINITY_HALF)); @@ -2306,7 +2307,7 @@ FfxFloat16x2 ffxZeroOneIsGreaterThanZeroHalf(FfxFloat16x2 x) /// @returns /// Result of the greater than zero comparison. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x3 ffxZeroOneIsGreaterThanZeroHalf(FfxFloat16x3 x) { return ffxSaturate(x * FFX_BROADCAST_FLOAT16X3(FFX_POSITIVE_INFINITY_HALF)); @@ -2319,7 +2320,7 @@ FfxFloat16x3 ffxZeroOneIsGreaterThanZeroHalf(FfxFloat16x3 x) /// @returns /// Result of the greater than zero comparison. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x4 ffxZeroOneIsGreaterThanZeroHalf(FfxFloat16x4 x) { return ffxSaturate(x * FFX_BROADCAST_FLOAT16X4(FFX_POSITIVE_INFINITY_HALF)); @@ -2332,7 +2333,7 @@ FfxFloat16x4 ffxZeroOneIsGreaterThanZeroHalf(FfxFloat16x4 x) /// @returns /// Result of the AND OR operation. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16 ffxZeroOneNotHalf(FfxFloat16 x) { return FFX_BROADCAST_FLOAT16(1.0) - x; @@ -2345,7 +2346,7 @@ FfxFloat16 ffxZeroOneNotHalf(FfxFloat16 x) /// @returns /// Result of the AND OR operation. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x2 ffxZeroOneNotHalf(FfxFloat16x2 x) { return FFX_BROADCAST_FLOAT16X2(1.0) - x; @@ -2358,7 +2359,7 @@ FfxFloat16x2 ffxZeroOneNotHalf(FfxFloat16x2 x) /// @returns /// Result of the AND OR operation. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x3 ffxZeroOneNotHalf(FfxFloat16x3 x) { return FFX_BROADCAST_FLOAT16X3(1.0) - x; @@ -2371,7 +2372,7 @@ FfxFloat16x3 ffxZeroOneNotHalf(FfxFloat16x3 x) /// @returns /// Result of the AND OR operation. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x4 ffxZeroOneNotHalf(FfxFloat16x4 x) { return FFX_BROADCAST_FLOAT16X4(1.0) - x; @@ -2385,7 +2386,7 @@ FfxFloat16x4 ffxZeroOneNotHalf(FfxFloat16x4 x) /// @returns /// Result of the OR operation. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16 ffxZeroOneOrHalf(FfxFloat16 x, FfxFloat16 y) { return max(x, y); @@ -2399,7 +2400,7 @@ FfxFloat16 ffxZeroOneOrHalf(FfxFloat16 x, FfxFloat16 y) /// @returns /// Result of the OR operation. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x2 ffxZeroOneOrHalf(FfxFloat16x2 x, FfxFloat16x2 y) { return max(x, y); @@ -2413,7 +2414,7 @@ FfxFloat16x2 ffxZeroOneOrHalf(FfxFloat16x2 x, FfxFloat16x2 y) /// @returns /// Result of the OR operation. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x3 ffxZeroOneOrHalf(FfxFloat16x3 x, FfxFloat16x3 y) { return max(x, y); @@ -2427,7 +2428,7 @@ FfxFloat16x3 ffxZeroOneOrHalf(FfxFloat16x3 x, FfxFloat16x3 y) /// @returns /// Result of the OR operation. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x4 ffxZeroOneOrHalf(FfxFloat16x4 x, FfxFloat16x4 y) { return max(x, y); @@ -2442,7 +2443,7 @@ FfxFloat16x4 ffxZeroOneOrHalf(FfxFloat16x4 x, FfxFloat16x4 y) /// @returns /// The selected value. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16 ffxZeroOneSelectHalf(FfxFloat16 x, FfxFloat16 y, FfxFloat16 z) { FfxFloat16 r = (-x) * z + z; @@ -2458,7 +2459,7 @@ FfxFloat16 ffxZeroOneSelectHalf(FfxFloat16 x, FfxFloat16 y, FfxFloat16 z) /// @returns /// The selected value. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x2 ffxZeroOneSelectHalf(FfxFloat16x2 x, FfxFloat16x2 y, FfxFloat16x2 z) { FfxFloat16x2 r = (-x) * z + z; @@ -2474,7 +2475,7 @@ FfxFloat16x2 ffxZeroOneSelectHalf(FfxFloat16x2 x, FfxFloat16x2 y, FfxFloat16x2 z /// @returns /// The selected value. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x3 ffxZeroOneSelectHalf(FfxFloat16x3 x, FfxFloat16x3 y, FfxFloat16x3 z) { FfxFloat16x3 r = (-x) * z + z; @@ -2490,7 +2491,7 @@ FfxFloat16x3 ffxZeroOneSelectHalf(FfxFloat16x3 x, FfxFloat16x3 y, FfxFloat16x3 z /// @returns /// The selected value. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x4 ffxZeroOneSelectHalf(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16x4 z) { FfxFloat16x4 r = (-x) * z + z; @@ -2504,7 +2505,7 @@ FfxFloat16x4 ffxZeroOneSelectHalf(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16x4 z /// @returns /// Result of the sign value. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16 ffxZeroOneIsSignedHalf(FfxFloat16 x) { return ffxSaturate(x * FFX_BROADCAST_FLOAT16(FFX_NEGATIVE_INFINITY_HALF)); @@ -2517,7 +2518,7 @@ FfxFloat16 ffxZeroOneIsSignedHalf(FfxFloat16 x) /// @returns /// Result of the sign value. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x2 ffxZeroOneIsSignedHalf(FfxFloat16x2 x) { return ffxSaturate(x * FFX_BROADCAST_FLOAT16X2(FFX_NEGATIVE_INFINITY_HALF)); @@ -2530,7 +2531,7 @@ FfxFloat16x2 ffxZeroOneIsSignedHalf(FfxFloat16x2 x) /// @returns /// Result of the sign value. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x3 ffxZeroOneIsSignedHalf(FfxFloat16x3 x) { return ffxSaturate(x * FFX_BROADCAST_FLOAT16X3(FFX_NEGATIVE_INFINITY_HALF)); @@ -2543,7 +2544,7 @@ FfxFloat16x3 ffxZeroOneIsSignedHalf(FfxFloat16x3 x) /// @returns /// Result of the sign value. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x4 ffxZeroOneIsSignedHalf(FfxFloat16x4 x) { return ffxSaturate(x * FFX_BROADCAST_FLOAT16X4(FFX_NEGATIVE_INFINITY_HALF)); @@ -2562,7 +2563,7 @@ FfxFloat16x4 ffxZeroOneIsSignedHalf(FfxFloat16x4 x) /// @returns /// The color in Rec.709 space. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16 ffxRec709FromLinearHalf(FfxFloat16 c) { FfxFloat16x3 j = FfxFloat16x3(0.018 * 4.5, 4.5, 0.45); @@ -2583,7 +2584,7 @@ FfxFloat16 ffxRec709FromLinearHalf(FfxFloat16 c) /// @returns /// The color in Rec.709 space. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x2 ffxRec709FromLinearHalf(FfxFloat16x2 c) { FfxFloat16x3 j = FfxFloat16x3(0.018 * 4.5, 4.5, 0.45); @@ -2604,7 +2605,7 @@ FfxFloat16x2 ffxRec709FromLinearHalf(FfxFloat16x2 c) /// @returns /// The color in Rec.709 space. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x3 ffxRec709FromLinearHalf(FfxFloat16x3 c) { FfxFloat16x3 j = FfxFloat16x3(0.018 * 4.5, 4.5, 0.45); @@ -2624,7 +2625,7 @@ FfxFloat16x3 ffxRec709FromLinearHalf(FfxFloat16x3 c) /// @returns /// A value in gamma space. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16 ffxGammaFromLinearHalf(FfxFloat16 c, FfxFloat16 rcpX) { return pow(c, FFX_BROADCAST_FLOAT16(rcpX)); @@ -2642,7 +2643,7 @@ FfxFloat16 ffxGammaFromLinearHalf(FfxFloat16 c, FfxFloat16 rcpX) /// @returns /// A value in gamma space. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x2 ffxGammaFromLinearHalf(FfxFloat16x2 c, FfxFloat16 rcpX) { return pow(c, FFX_BROADCAST_FLOAT16X2(rcpX)); @@ -2660,7 +2661,7 @@ FfxFloat16x2 ffxGammaFromLinearHalf(FfxFloat16x2 c, FfxFloat16 rcpX) /// @returns /// A value in gamma space. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x3 ffxGammaFromLinearHalf(FfxFloat16x3 c, FfxFloat16 rcpX) { return pow(c, FFX_BROADCAST_FLOAT16X3(rcpX)); @@ -2673,7 +2674,7 @@ FfxFloat16x3 ffxGammaFromLinearHalf(FfxFloat16x3 c, FfxFloat16 rcpX) /// @returns /// A value in SRGB space. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16 ffxSrgbFromLinearHalf(FfxFloat16 c) { FfxFloat16x3 j = FfxFloat16x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4); @@ -2688,7 +2689,7 @@ FfxFloat16 ffxSrgbFromLinearHalf(FfxFloat16 c) /// @returns /// A value in SRGB space. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x2 ffxSrgbFromLinearHalf(FfxFloat16x2 c) { FfxFloat16x3 j = FfxFloat16x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4); @@ -2703,7 +2704,7 @@ FfxFloat16x2 ffxSrgbFromLinearHalf(FfxFloat16x2 c) /// @returns /// A value in SRGB space. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x3 ffxSrgbFromLinearHalf(FfxFloat16x3 c) { FfxFloat16x3 j = FfxFloat16x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4); @@ -2718,7 +2719,7 @@ FfxFloat16x3 ffxSrgbFromLinearHalf(FfxFloat16x3 c) /// @returns /// A square root of the input value. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16 ffxSquareRootHalf(FfxFloat16 c) { return sqrt(c); @@ -2731,7 +2732,7 @@ FfxFloat16 ffxSquareRootHalf(FfxFloat16 c) /// @returns /// A square root of the input value. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x2 ffxSquareRootHalf(FfxFloat16x2 c) { return sqrt(c); @@ -2744,7 +2745,7 @@ FfxFloat16x2 ffxSquareRootHalf(FfxFloat16x2 c) /// @returns /// A square root of the input value. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x3 ffxSquareRootHalf(FfxFloat16x3 c) { return sqrt(c); @@ -2757,7 +2758,7 @@ FfxFloat16x3 ffxSquareRootHalf(FfxFloat16x3 c) /// @returns /// A cube root of the input value. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16 ffxCubeRootHalf(FfxFloat16 c) { return pow(c, FFX_BROADCAST_FLOAT16(1.0 / 3.0)); @@ -2770,7 +2771,7 @@ FfxFloat16 ffxCubeRootHalf(FfxFloat16 c) /// @returns /// A cube root of the input value. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x2 ffxCubeRootHalf(FfxFloat16x2 c) { return pow(c, FFX_BROADCAST_FLOAT16X2(1.0 / 3.0)); @@ -2783,7 +2784,7 @@ FfxFloat16x2 ffxCubeRootHalf(FfxFloat16x2 c) /// @returns /// A cube root of the input value. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x3 ffxCubeRootHalf(FfxFloat16x3 c) { return pow(c, FFX_BROADCAST_FLOAT16X3(1.0 / 3.0)); @@ -2796,7 +2797,7 @@ FfxFloat16x3 ffxCubeRootHalf(FfxFloat16x3 c) /// @returns /// A value in linear space. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16 ffxLinearFromRec709Half(FfxFloat16 c) { FfxFloat16x3 j = FfxFloat16x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45); @@ -2811,7 +2812,7 @@ FfxFloat16 ffxLinearFromRec709Half(FfxFloat16 c) /// @returns /// A value in linear space. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x2 ffxLinearFromRec709Half(FfxFloat16x2 c) { FfxFloat16x3 j = FfxFloat16x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45); @@ -2826,7 +2827,7 @@ FfxFloat16x2 ffxLinearFromRec709Half(FfxFloat16x2 c) /// @returns /// A value in linear space. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x3 ffxLinearFromRec709Half(FfxFloat16x3 c) { FfxFloat16x3 j = FfxFloat16x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45); @@ -2844,7 +2845,7 @@ FfxFloat16x3 ffxLinearFromRec709Half(FfxFloat16x3 c) /// @returns /// A value in linear space. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16 ffxLinearFromGammaHalf(FfxFloat16 c, FfxFloat16 x) { return pow(c, FFX_BROADCAST_FLOAT16(x)); @@ -2860,7 +2861,7 @@ FfxFloat16 ffxLinearFromGammaHalf(FfxFloat16 c, FfxFloat16 x) /// @returns /// A value in linear space. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x2 ffxLinearFromGammaHalf(FfxFloat16x2 c, FfxFloat16 x) { return pow(c, FFX_BROADCAST_FLOAT16X2(x)); @@ -2876,7 +2877,7 @@ FfxFloat16x2 ffxLinearFromGammaHalf(FfxFloat16x2 c, FfxFloat16 x) /// @returns /// A value in linear space. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x3 ffxLinearFromGammaHalf(FfxFloat16x3 c, FfxFloat16 x) { return pow(c, FFX_BROADCAST_FLOAT16X3(x)); @@ -2891,10 +2892,10 @@ FfxFloat16x3 ffxLinearFromGammaHalf(FfxFloat16x3 c, FfxFloat16 x) /// @returns /// A value in linear space. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16 ffxLinearFromSrgbHalf(FfxFloat16 c) { - FfxFloat16x3 j = FfxFloat16x3(0.04045 / 12.92, 1.0 / 12.92, 2.4); + FfxFloat16x3 j = FfxFloat16x3(0.04045, 1.0 / 12.92, 2.4); FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.055, 0.055 / 1.055); return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.x), c * j.y, pow(c * k.x + k.y, j.z)); } @@ -2908,10 +2909,10 @@ FfxFloat16 ffxLinearFromSrgbHalf(FfxFloat16 c) /// @returns /// A value in linear space. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x2 ffxLinearFromSrgbHalf(FfxFloat16x2 c) { - FfxFloat16x3 j = FfxFloat16x3(0.04045 / 12.92, 1.0 / 12.92, 2.4); + FfxFloat16x3 j = FfxFloat16x3(0.04045, 1.0 / 12.92, 2.4); FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.055, 0.055 / 1.055); return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.xx), c * j.yy, pow(c * k.xx + k.yy, j.zz)); } @@ -2925,30 +2926,32 @@ FfxFloat16x2 ffxLinearFromSrgbHalf(FfxFloat16x2 c) /// @returns /// A value in linear space. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxFloat16x3 ffxLinearFromSrgbHalf(FfxFloat16x3 c) { - FfxFloat16x3 j = FfxFloat16x3(0.04045 / 12.92, 1.0 / 12.92, 2.4); + FfxFloat16x3 j = FfxFloat16x3(0.04045, 1.0 / 12.92, 2.4); FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.055, 0.055 / 1.055); return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.xxx), c * j.yyy, pow(c * k.xxx + k.yyy, j.zzz)); } /// A remapping of 64x1 to 8x8 imposing rotated 2x2 pixel quads in quad linear. /// -/// 543210 -/// ====== -/// ..xxx. -/// yy...y +/// Remap illustration: +/// +/// 543210 +/// ~~~~~~ +/// ..xxx. +/// yy...y /// /// @param [in] a The input 1D coordinates to remap. /// /// @returns /// The remapped 2D coordinates. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt16x2 ffxRemapForQuadHalf(FfxUInt32 a) { - return FfxUInt16x2(bitfieldExtract(a, 1u, 3u), bitfieldInsertMask(bitfieldExtract(a, 3u, 3u), a, 1u)); + return FfxUInt16x2(ffxBitfieldExtract(a, 1u, 3u), ffxBitfieldInsertMask(ffxBitfieldExtract(a, 3u, 3u), a, 1u)); } /// A helper function performing a remap 64x1 to 8x8 remapping which is necessary for 2D wave reductions. @@ -2969,10 +2972,10 @@ FfxUInt16x2 ffxRemapForQuadHalf(FfxUInt32 a) /// @returns /// The remapped 2D coordinates. /// -/// @ingroup GPU +/// @ingroup GPUCore FfxUInt16x2 ffxRemapForWaveReductionHalf(FfxUInt32 a) { - return FfxUInt16x2(bitfieldInsertMask(bitfieldExtract(a, 2u, 3u), a, 1u), bitfieldInsertMask(bitfieldExtract(a, 3u, 3u), bitfieldExtract(a, 1u, 2u), 2u)); + return FfxUInt16x2(ffxBitfieldInsertMask(ffxBitfieldExtract(a, 2u, 3u), a, 1u), ffxBitfieldInsertMask(ffxBitfieldExtract(a, 3u, 3u), ffxBitfieldExtract(a, 1u, 2u), 2u)); } #endif // FFX_HALF diff --git a/thirdparty/amd-fsr2/shaders/ffx_core_hlsl.h b/thirdparty/amd-ffx/gpu/ffx_core_hlsl.h similarity index 75% rename from thirdparty/amd-fsr2/shaders/ffx_core_hlsl.h rename to thirdparty/amd-ffx/gpu/ffx_core_hlsl.h index ad4ff6552d16..28827d98652f 100644 --- a/thirdparty/amd-fsr2/shaders/ffx_core_hlsl.h +++ b/thirdparty/amd-ffx/gpu/ffx_core_hlsl.h @@ -1,13 +1,14 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. -// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// // Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal +// of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell // copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: +// furnished to do so, subject to the following conditions : +// // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // @@ -19,191 +20,274 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. +/// @defgroup HLSLCore HLSL Core +/// HLSL core defines and functions +/// +/// @ingroup FfxHLSL + +#define DECLARE_SRV_REGISTER(regIndex) t##regIndex +#define DECLARE_UAV_REGISTER(regIndex) u##regIndex +#define DECLARE_CB_REGISTER(regIndex) b##regIndex +#define FFX_DECLARE_SRV(regIndex) register(DECLARE_SRV_REGISTER(regIndex)) +#define FFX_DECLARE_UAV(regIndex) register(DECLARE_UAV_REGISTER(regIndex)) +#define FFX_DECLARE_CB(regIndex) register(DECLARE_CB_REGISTER(regIndex)) + +/// A define for abstracting select functionality for pre/post HLSL 21 +/// +/// @ingroup HLSLCore +#if __HLSL_VERSION >= 2021 + +#define FFX_SELECT(cond, arg1, arg2) select(cond, arg1, arg2) + +#else // #if __HLSL_VERSION >= 2021 + +#define FFX_SELECT(cond, arg1, arg2) cond ? arg1 : arg2 + +#endif // #if __HLSL_VERSION >= 2021 + /// A define for abstracting shared memory between shading languages. /// -/// @ingroup GPU +/// @ingroup HLSLCore #define FFX_GROUPSHARED groupshared /// A define for abstracting compute memory barriers between shading languages. /// -/// @ingroup GPU -#define FFX_GROUP_MEMORY_BARRIER GroupMemoryBarrierWithGroupSync +/// @ingroup HLSLCore +#define FFX_GROUP_MEMORY_BARRIER GroupMemoryBarrierWithGroupSync() + +/// A define for abstracting compute atomic additions between shading languages. +/// +/// @ingroup HLSLCore +#define FFX_ATOMIC_ADD(x, y) InterlockedAdd(x, y) + +/// A define for abstracting compute atomic additions between shading languages. +/// +/// @ingroup HLSLCore +#define FFX_ATOMIC_ADD_RETURN(x, y, r) InterlockedAdd(x, y, r) + +/// A define for abstracting compute atomic OR between shading languages. +/// +/// @ingroup HLSLCore +#define FFX_ATOMIC_OR(x, y) InterlockedOr(x, y) + +/// A define for abstracting compute atomic min between shading languages. +/// +/// @ingroup HLSLCore +#define FFX_ATOMIC_MIN(x, y) InterlockedMin(x, y) + +/// A define for abstracting compute atomic max between shading languages. +/// +/// @ingroup HLSLCore +#define FFX_ATOMIC_MAX(x, y) InterlockedMax(x, y) /// A define added to accept static markup on functions to aid CPU/GPU portability of code. /// -/// @ingroup GPU +/// @ingroup HLSLCore #define FFX_STATIC static /// A define for abstracting loop unrolling between shading languages. /// -/// @ingroup GPU +/// @ingroup HLSLCore #define FFX_UNROLL [unroll] /// A define for abstracting a 'greater than' comparison operator between two types. /// -/// @ingroup GPU +/// @ingroup HLSLCore #define FFX_GREATER_THAN(x, y) x > y /// A define for abstracting a 'greater than or equal' comparison operator between two types. /// -/// @ingroup GPU +/// @ingroup HLSLCore #define FFX_GREATER_THAN_EQUAL(x, y) x >= y /// A define for abstracting a 'less than' comparison operator between two types. /// -/// @ingroup GPU +/// @ingroup HLSLCore #define FFX_LESS_THAN(x, y) x < y /// A define for abstracting a 'less than or equal' comparison operator between two types. /// -/// @ingroup GPU +/// @ingroup HLSLCore #define FFX_LESS_THAN_EQUAL(x, y) x <= y /// A define for abstracting an 'equal' comparison operator between two types. /// -/// @ingroup GPU +/// @ingroup HLSLCore #define FFX_EQUAL(x, y) x == y /// A define for abstracting a 'not equal' comparison operator between two types. /// -/// @ingroup GPU +/// @ingroup HLSLCore #define FFX_NOT_EQUAL(x, y) x != y +/// A define for abstracting matrix multiply operations between shading languages. +/// +/// @ingroup HLSLCore +#define FFX_MATRIX_MULTIPLY(a, b) mul(a, b) + +/// A define for abstracting vector transformations between shading languages. +/// +/// @ingroup HLSLCore +#define FFX_TRANSFORM_VECTOR(a, b) mul(a, b) + +/// A define for abstracting modulo operations between shading languages. +/// +/// @ingroup HLSLCore +#define FFX_MODULO(a, b) (fmod(a, b)) + /// Broadcast a scalar value to a 1-dimensional floating point vector. /// -/// @ingroup GPU +/// @ingroup HLSLCore #define FFX_BROADCAST_FLOAT32(x) FfxFloat32(x) /// Broadcast a scalar value to a 2-dimensional floating point vector. /// -/// @ingroup GPU +/// @ingroup HLSLCore #define FFX_BROADCAST_FLOAT32X2(x) FfxFloat32(x) /// Broadcast a scalar value to a 3-dimensional floating point vector. /// -/// @ingroup GPU +/// @ingroup HLSLCore #define FFX_BROADCAST_FLOAT32X3(x) FfxFloat32(x) /// Broadcast a scalar value to a 4-dimensional floating point vector. /// -/// @ingroup GPU +/// @ingroup HLSLCore #define FFX_BROADCAST_FLOAT32X4(x) FfxFloat32(x) /// Broadcast a scalar value to a 1-dimensional unsigned integer vector. /// -/// @ingroup GPU +/// @ingroup HLSLCore #define FFX_BROADCAST_UINT32(x) FfxUInt32(x) /// Broadcast a scalar value to a 2-dimensional unsigned integer vector. /// -/// @ingroup GPU +/// @ingroup HLSLCore #define FFX_BROADCAST_UINT32X2(x) FfxUInt32(x) /// Broadcast a scalar value to a 4-dimensional unsigned integer vector. /// -/// @ingroup GPU +/// @ingroup HLSLCore #define FFX_BROADCAST_UINT32X3(x) FfxUInt32(x) /// Broadcast a scalar value to a 4-dimensional unsigned integer vector. /// -/// @ingroup GPU +/// @ingroup HLSLCore #define FFX_BROADCAST_UINT32X4(x) FfxUInt32(x) /// Broadcast a scalar value to a 1-dimensional signed integer vector. /// -/// @ingroup GPU +/// @ingroup HLSLCore #define FFX_BROADCAST_INT32(x) FfxInt32(x) /// Broadcast a scalar value to a 2-dimensional signed integer vector. /// -/// @ingroup GPU +/// @ingroup HLSLCore #define FFX_BROADCAST_INT32X2(x) FfxInt32(x) /// Broadcast a scalar value to a 3-dimensional signed integer vector. /// -/// @ingroup GPU +/// @ingroup HLSLCore #define FFX_BROADCAST_INT32X3(x) FfxInt32(x) /// Broadcast a scalar value to a 4-dimensional signed integer vector. /// -/// @ingroup GPU +/// @ingroup HLSLCore #define FFX_BROADCAST_INT32X4(x) FfxInt32(x) /// Broadcast a scalar value to a 1-dimensional half-precision floating point vector. /// -/// @ingroup GPU +/// @ingroup HLSLCore #define FFX_BROADCAST_MIN_FLOAT16(a) FFX_MIN16_F(a) /// Broadcast a scalar value to a 2-dimensional half-precision floating point vector. /// -/// @ingroup GPU +/// @ingroup HLSLCore #define FFX_BROADCAST_MIN_FLOAT16X2(a) FFX_MIN16_F(a) /// Broadcast a scalar value to a 3-dimensional half-precision floating point vector. /// -/// @ingroup GPU +/// @ingroup HLSLCore #define FFX_BROADCAST_MIN_FLOAT16X3(a) FFX_MIN16_F(a) /// Broadcast a scalar value to a 4-dimensional half-precision floating point vector. /// -/// @ingroup GPU +/// @ingroup HLSLCore #define FFX_BROADCAST_MIN_FLOAT16X4(a) FFX_MIN16_F(a) /// Broadcast a scalar value to a 1-dimensional half-precision unsigned integer vector. /// -/// @ingroup GPU +/// @ingroup HLSLCore #define FFX_BROADCAST_MIN_UINT16(a) FFX_MIN16_U(a) /// Broadcast a scalar value to a 2-dimensional half-precision unsigned integer vector. /// -/// @ingroup GPU +/// @ingroup HLSLCore #define FFX_BROADCAST_MIN_UINT16X2(a) FFX_MIN16_U(a) /// Broadcast a scalar value to a 3-dimensional half-precision unsigned integer vector. /// -/// @ingroup GPU +/// @ingroup HLSLCore #define FFX_BROADCAST_MIN_UINT16X3(a) FFX_MIN16_U(a) /// Broadcast a scalar value to a 4-dimensional half-precision unsigned integer vector. /// -/// @ingroup GPU +/// @ingroup HLSLCore #define FFX_BROADCAST_MIN_UINT16X4(a) FFX_MIN16_U(a) /// Broadcast a scalar value to a 1-dimensional half-precision signed integer vector. /// -/// @ingroup GPU +/// @ingroup HLSLCore #define FFX_BROADCAST_MIN_INT16(a) FFX_MIN16_I(a) /// Broadcast a scalar value to a 2-dimensional half-precision signed integer vector. /// -/// @ingroup GPU +/// @ingroup HLSLCore #define FFX_BROADCAST_MIN_INT16X2(a) FFX_MIN16_I(a) /// Broadcast a scalar value to a 3-dimensional half-precision signed integer vector. /// -/// @ingroup GPU +/// @ingroup HLSLCore #define FFX_BROADCAST_MIN_INT16X3(a) FFX_MIN16_I(a) /// Broadcast a scalar value to a 4-dimensional half-precision signed integer vector. /// -/// @ingroup GPU +/// @ingroup HLSLCore #define FFX_BROADCAST_MIN_INT16X4(a) FFX_MIN16_I(a) -/// Pack 2x32-bit floating point values in a single 32bit value. +/// Convert FfxFloat32 to half (in lower 16-bits of output). +/// +/// This function implements the same fast technique that is documented here: ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf +/// +/// The function supports denormals. +/// +/// Some conversion rules are to make computations possibly "safer" on the GPU, +/// -INF & -NaN -> -65504 +/// +INF & +NaN -> +65504 +/// +/// @param [in] f The 32bit floating point value to convert. +/// +/// @returns +/// The closest 16bit floating point value to f. /// +/// @ingroup HLSLCore +#define ffxF32ToF16 f32tof16 + +/// Pack 2x32-bit floating point values in a single 32bit value. +/// /// This function first converts each component of value into their nearest 16-bit floating /// point representation, and then stores the X and Y components in the lower and upper 16 bits of the /// 32bit unsigned integer respectively. /// /// @param [in] value A 2-dimensional floating point value to convert and pack. -/// +/// /// @returns /// A packed 32bit value containing 2 16bit floating point values. -/// -/// @ingroup HLSL -FfxUInt32 packHalf2x16(FfxFloat32x2 value) +/// +/// @ingroup HLSLCore +FfxUInt32 ffxPackHalf2x16(FfxFloat32x2 value) { - return f32tof16(value.x) | (f32tof16(value.y) << 16); + return ffxF32ToF16(value.x) | (ffxF32ToF16(value.y) << 16); } /// Broadcast a scalar value to a 2-dimensional floating point vector. @@ -213,7 +297,7 @@ FfxUInt32 packHalf2x16(FfxFloat32x2 value) /// @returns /// A 2-dimensional floating point vector with value in each component. /// -/// @ingroup HLSL +/// @ingroup HLSLCore FfxFloat32x2 ffxBroadcast2(FfxFloat32 value) { return FfxFloat32x2(value, value); @@ -226,7 +310,7 @@ FfxFloat32x2 ffxBroadcast2(FfxFloat32 value) /// @returns /// A 3-dimensional floating point vector with value in each component. /// -/// @ingroup HLSL +/// @ingroup HLSLCore FfxFloat32x3 ffxBroadcast3(FfxFloat32 value) { return FfxFloat32x3(value, value, value); @@ -239,7 +323,7 @@ FfxFloat32x3 ffxBroadcast3(FfxFloat32 value) /// @returns /// A 4-dimensional floating point vector with value in each component. /// -/// @ingroup HLSL +/// @ingroup HLSLCore FfxFloat32x4 ffxBroadcast4(FfxFloat32 value) { return FfxFloat32x4(value, value, value, value); @@ -252,7 +336,7 @@ FfxFloat32x4 ffxBroadcast4(FfxFloat32 value) /// @returns /// A 2-dimensional signed integer vector with value in each component. /// -/// @ingroup HLSL +/// @ingroup HLSLCore FfxInt32x2 ffxBroadcast2(FfxInt32 value) { return FfxInt32x2(value, value); @@ -265,10 +349,10 @@ FfxInt32x2 ffxBroadcast2(FfxInt32 value) /// @returns /// A 3-dimensional signed integer vector with value in each component. /// -/// @ingroup HLSL -FfxUInt32x3 ffxBroadcast3(FfxInt32 value) +/// @ingroup HLSLCore +FfxInt32x3 ffxBroadcast3(FfxInt32 value) { - return FfxUInt32x3(value, value, value); + return FfxInt32x3(value, value, value); } /// Broadcast a scalar value to a 4-dimensional signed integer vector. @@ -278,7 +362,7 @@ FfxUInt32x3 ffxBroadcast3(FfxInt32 value) /// @returns /// A 4-dimensional signed integer vector with value in each component. /// -/// @ingroup HLSL +/// @ingroup HLSLCore FfxInt32x4 ffxBroadcast4(FfxInt32 value) { return FfxInt32x4(value, value, value, value); @@ -291,7 +375,7 @@ FfxInt32x4 ffxBroadcast4(FfxInt32 value) /// @returns /// A 2-dimensional unsigned integer vector with value in each component. /// -/// @ingroup HLSL +/// @ingroup HLSLCore FfxUInt32x2 ffxBroadcast2(FfxUInt32 value) { return FfxUInt32x2(value, value); @@ -304,7 +388,7 @@ FfxUInt32x2 ffxBroadcast2(FfxUInt32 value) /// @returns /// A 3-dimensional unsigned integer vector with value in each component. /// -/// @ingroup HLSL +/// @ingroup HLSLCore FfxUInt32x3 ffxBroadcast3(FfxUInt32 value) { return FfxUInt32x3(value, value, value); @@ -317,24 +401,24 @@ FfxUInt32x3 ffxBroadcast3(FfxUInt32 value) /// @returns /// A 4-dimensional unsigned integer vector with value in each component. /// -/// @ingroup HLSL +/// @ingroup HLSLCore FfxUInt32x4 ffxBroadcast4(FfxUInt32 value) { return FfxUInt32x4(value, value, value, value); } -FfxUInt32 bitfieldExtract(FfxUInt32 src, FfxUInt32 off, FfxUInt32 bits) +FfxUInt32 ffxBitfieldExtract(FfxUInt32 src, FfxUInt32 off, FfxUInt32 bits) { FfxUInt32 mask = (1u << bits) - 1; return (src >> off) & mask; } -FfxUInt32 bitfieldInsert(FfxUInt32 src, FfxUInt32 ins, FfxUInt32 mask) +FfxUInt32 ffxBitfieldInsert(FfxUInt32 src, FfxUInt32 ins, FfxUInt32 mask) { return (ins & mask) | (src & (~mask)); } -FfxUInt32 bitfieldInsertMask(FfxUInt32 src, FfxUInt32 ins, FfxUInt32 bits) +FfxUInt32 ffxBitfieldInsertMask(FfxUInt32 src, FfxUInt32 ins, FfxUInt32 bits) { FfxUInt32 mask = (1u << bits) - 1; return (ins & mask) | (src & (~mask)); @@ -342,12 +426,12 @@ FfxUInt32 bitfieldInsertMask(FfxUInt32 src, FfxUInt32 ins, FfxUInt32 bits) /// Interprets the bit pattern of x as an unsigned integer. /// -/// @param [in] value The input value. +/// @param [in] x The input value. /// /// @returns /// The input interpreted as an unsigned integer. /// -/// @ingroup HLSL +/// @ingroup HLSLCore FfxUInt32 ffxAsUInt32(FfxFloat32 x) { return asuint(x); @@ -355,12 +439,12 @@ FfxUInt32 ffxAsUInt32(FfxFloat32 x) /// Interprets the bit pattern of x as an unsigned integer. /// -/// @param [in] value The input value. +/// @param [in] x The input value. /// /// @returns /// The input interpreted as an unsigned integer. /// -/// @ingroup HLSL +/// @ingroup HLSLCore FfxUInt32x2 ffxAsUInt32(FfxFloat32x2 x) { return asuint(x); @@ -368,12 +452,12 @@ FfxUInt32x2 ffxAsUInt32(FfxFloat32x2 x) /// Interprets the bit pattern of x as an unsigned integer. /// -/// @param [in] value The input value. +/// @param [in] x The input value. /// /// @returns /// The input interpreted as an unsigned integer. /// -/// @ingroup HLSL +/// @ingroup HLSLCore FfxUInt32x3 ffxAsUInt32(FfxFloat32x3 x) { return asuint(x); @@ -381,12 +465,12 @@ FfxUInt32x3 ffxAsUInt32(FfxFloat32x3 x) /// Interprets the bit pattern of x as an unsigned integer. /// -/// @param [in] value The input value. +/// @param [in] x The input value. /// /// @returns /// The input interpreted as an unsigned integer. /// -/// @ingroup HLSL +/// @ingroup HLSLCore FfxUInt32x4 ffxAsUInt32(FfxFloat32x4 x) { return asuint(x); @@ -394,12 +478,12 @@ FfxUInt32x4 ffxAsUInt32(FfxFloat32x4 x) /// Interprets the bit pattern of x as a floating-point number. /// -/// @param [in] value The input value. +/// @param [in] x The input value. /// /// @returns /// The input interpreted as a floating-point number. /// -/// @ingroup HLSL +/// @ingroup HLSLCore FfxFloat32 ffxAsFloat(FfxUInt32 x) { return asfloat(x); @@ -407,12 +491,12 @@ FfxFloat32 ffxAsFloat(FfxUInt32 x) /// Interprets the bit pattern of x as a floating-point number. /// -/// @param [in] value The input value. +/// @param [in] x The input value. /// /// @returns /// The input interpreted as a floating-point number. /// -/// @ingroup HLSL +/// @ingroup HLSLCore FfxFloat32x2 ffxAsFloat(FfxUInt32x2 x) { return asfloat(x); @@ -420,12 +504,12 @@ FfxFloat32x2 ffxAsFloat(FfxUInt32x2 x) /// Interprets the bit pattern of x as a floating-point number. /// -/// @param [in] value The input value. +/// @param [in] x The input value. /// /// @returns /// The input interpreted as a floating-point number. /// -/// @ingroup HLSL +/// @ingroup HLSLCore FfxFloat32x3 ffxAsFloat(FfxUInt32x3 x) { return asfloat(x); @@ -433,17 +517,121 @@ FfxFloat32x3 ffxAsFloat(FfxUInt32x3 x) /// Interprets the bit pattern of x as a floating-point number. /// -/// @param [in] value The input value. +/// @param [in] x The input value. /// /// @returns /// The input interpreted as a floating-point number. /// -/// @ingroup HLSL +/// @ingroup HLSLCore FfxFloat32x4 ffxAsFloat(FfxUInt32x4 x) { return asfloat(x); } +/// Compute the inverse of a value. +/// +/// @param [in] x The value to calulate the inverse of. +/// +/// @returns +/// The inverse of x. +/// +/// @ingroup HLSLCore +FfxFloat32 ffxReciprocal(FfxFloat32 x) +{ + return rcp(x); +} + +/// Compute the inverse of a value. +/// +/// @param [in] x The value to calulate the inverse of. +/// +/// @returns +/// The inverse of x. +/// +/// @ingroup HLSLCore +FfxFloat32x2 ffxReciprocal(FfxFloat32x2 x) +{ + return rcp(x); +} + +/// Compute the inverse of a value. +/// +/// @param [in] x The value to calulate the inverse of. +/// +/// @returns +/// The inverse of x. +/// +/// @ingroup HLSLCore +FfxFloat32x3 ffxReciprocal(FfxFloat32x3 x) +{ + return rcp(x); +} + +/// Compute the inverse of a value. +/// +/// @param [in] x The value to calulate the inverse of. +/// +/// @returns +/// The inverse of x. +/// +/// @ingroup HLSLCore +FfxFloat32x4 ffxReciprocal(FfxFloat32x4 x) +{ + return rcp(x); +} + +/// Compute the inverse square root of a value. +/// +/// @param [in] x The value to calulate the inverse square root of. +/// +/// @returns +/// The inverse square root of x. +/// +/// @ingroup HLSLCore +FfxFloat32 ffxRsqrt(FfxFloat32 x) +{ + return rsqrt(x); +} + +/// Compute the inverse square root of a value. +/// +/// @param [in] x The value to calulate the inverse square root of. +/// +/// @returns +/// The inverse square root of x. +/// +/// @ingroup HLSLCore +FfxFloat32x2 ffxRsqrt(FfxFloat32x2 x) +{ + return rsqrt(x); +} + +/// Compute the inverse square root of a value. +/// +/// @param [in] x The value to calulate the inverse square root of. +/// +/// @returns +/// The inverse square root of x. +/// +/// @ingroup HLSLCore +FfxFloat32x3 ffxRsqrt(FfxFloat32x3 x) +{ + return rsqrt(x); +} + +/// Compute the inverse square root of a value. +/// +/// @param [in] x The value to calulate the inverse square root of. +/// +/// @returns +/// The inverse square root of x. +/// +/// @ingroup HLSLCore +FfxFloat32x4 ffxRsqrt(FfxFloat32x4 x) +{ + return rsqrt(x); +} + /// Compute the linear interopation between two values. /// /// Implemented by calling the HLSL mix instrinsic function. Implements the @@ -458,7 +646,7 @@ FfxFloat32x4 ffxAsFloat(FfxUInt32x4 x) /// @returns /// A linearly interpolated value between x and y according to t. /// -/// @ingroup HLSL +/// @ingroup HLSLCore FfxFloat32 ffxLerp(FfxFloat32 x, FfxFloat32 y, FfxFloat32 t) { return lerp(x, y, t); @@ -478,7 +666,7 @@ FfxFloat32 ffxLerp(FfxFloat32 x, FfxFloat32 y, FfxFloat32 t) /// @returns /// A linearly interpolated value between x and y according to t. /// -/// @ingroup HLSL +/// @ingroup HLSLCore FfxFloat32x2 ffxLerp(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32 t) { return lerp(x, y, t); @@ -498,7 +686,7 @@ FfxFloat32x2 ffxLerp(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32 t) /// @returns /// A linearly interpolated value between x and y according to t. /// -/// @ingroup HLSL +/// @ingroup HLSLCore FfxFloat32x2 ffxLerp(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 t) { return lerp(x, y, t); @@ -518,7 +706,7 @@ FfxFloat32x2 ffxLerp(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 t) /// @returns /// A linearly interpolated value between x and y according to t. /// -/// @ingroup HLSL +/// @ingroup HLSLCore FfxFloat32x3 ffxLerp(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32 t) { return lerp(x, y, t); @@ -538,7 +726,7 @@ FfxFloat32x3 ffxLerp(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32 t) /// @returns /// A linearly interpolated value between x and y according to t. /// -/// @ingroup HLSL +/// @ingroup HLSLCore FfxFloat32x3 ffxLerp(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 t) { return lerp(x, y, t); @@ -558,7 +746,7 @@ FfxFloat32x3 ffxLerp(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 t) /// @returns /// A linearly interpolated value between x and y according to t. /// -/// @ingroup HLSL +/// @ingroup HLSLCore FfxFloat32x4 ffxLerp(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32 t) { return lerp(x, y, t); @@ -578,7 +766,7 @@ FfxFloat32x4 ffxLerp(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32 t) /// @returns /// A linearly interpolated value between x and y according to t. /// -/// @ingroup HLSL +/// @ingroup HLSLCore FfxFloat32x4 ffxLerp(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 t) { return lerp(x, y, t); @@ -591,7 +779,7 @@ FfxFloat32x4 ffxLerp(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 t) /// @returns /// The clamped version of x. /// -/// @ingroup HLSL +/// @ingroup HLSLCore FfxFloat32 ffxSaturate(FfxFloat32 x) { return saturate(x); @@ -604,7 +792,7 @@ FfxFloat32 ffxSaturate(FfxFloat32 x) /// @returns /// The clamped version of x. /// -/// @ingroup HLSL +/// @ingroup HLSLCore FfxFloat32x2 ffxSaturate(FfxFloat32x2 x) { return saturate(x); @@ -617,7 +805,7 @@ FfxFloat32x2 ffxSaturate(FfxFloat32x2 x) /// @returns /// The clamped version of x. /// -/// @ingroup HLSL +/// @ingroup HLSLCore FfxFloat32x3 ffxSaturate(FfxFloat32x3 x) { return saturate(x); @@ -630,7 +818,7 @@ FfxFloat32x3 ffxSaturate(FfxFloat32x3 x) /// @returns /// The clamped version of x. /// -/// @ingroup HLSL +/// @ingroup HLSLCore FfxFloat32x4 ffxSaturate(FfxFloat32x4 x) { return saturate(x); @@ -645,11 +833,11 @@ FfxFloat32x4 ffxSaturate(FfxFloat32x4 x) /// function. /// /// @param [in] x The value to compute the fractional part from. -/// +/// /// @returns /// The fractional part of x. /// -/// @ingroup HLSL +/// @ingroup HLSLCore FfxFloat32 ffxFract(FfxFloat32 x) { return x - floor(x); @@ -668,7 +856,7 @@ FfxFloat32 ffxFract(FfxFloat32 x) /// @returns /// The fractional part of x. /// -/// @ingroup HLSL +/// @ingroup HLSLCore FfxFloat32x2 ffxFract(FfxFloat32x2 x) { return x - floor(x); @@ -687,7 +875,7 @@ FfxFloat32x2 ffxFract(FfxFloat32x2 x) /// @returns /// The fractional part of x. /// -/// @ingroup HLSL +/// @ingroup HLSLCore FfxFloat32x3 ffxFract(FfxFloat32x3 x) { return x - floor(x); @@ -698,7 +886,7 @@ FfxFloat32x3 ffxFract(FfxFloat32x3 x) /// This function calculates x - floor(x). Where floor is the intrinsic HLSL function. /// /// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. It is -/// worth further noting that this function is intentionally distinct from the HLSL frac intrinsic +/// worth further noting that this function is intentionally distinct from the HLSL frac intrinsic /// function. /// /// @param [in] x The value to compute the fractional part from. @@ -706,24 +894,76 @@ FfxFloat32x3 ffxFract(FfxFloat32x3 x) /// @returns /// The fractional part of x. /// -/// @ingroup HLSL +/// @ingroup HLSLCore FfxFloat32x4 ffxFract(FfxFloat32x4 x) { return x - floor(x); } +/// Rounds to the nearest integer. In case the fractional part is 0.5, it will round to the nearest even integer. +/// +/// @param [in] x The value to be rounded. +/// +/// @returns +/// The nearest integer from x. The nearest even integer from x if equidistant from 2 integer. +/// +/// @ingroup HLSLCore +FfxFloat32 ffxRound(FfxFloat32 x) +{ + return round(x); +} + +/// Rounds to the nearest integer. In case the fractional part is 0.5, it will round to the nearest even integer. +/// +/// @param [in] x The value to be rounded. +/// +/// @returns +/// The nearest integer from x. The nearest even integer from x if equidistant from 2 integer. +/// +/// @ingroup HLSLCore +FfxFloat32x2 ffxRound(FfxFloat32x2 x) +{ + return round(x); +} + +/// Rounds to the nearest integer. In case the fractional part is 0.5, it will round to the nearest even integer. +/// +/// @param [in] x The value to be rounded. +/// +/// @returns +/// The nearest integer from x. The nearest even integer from x if equidistant from 2 integer. +/// +/// @ingroup HLSLCore +FfxFloat32x3 ffxRound(FfxFloat32x3 x) +{ + return round(x); +} + +/// Rounds to the nearest integer. In case the fractional part is 0.5, it will round to the nearest even integer. +/// +/// @param [in] x The value to be rounded. +/// +/// @returns +/// The nearest integer from x. The nearest even integer from x if equidistant from 2 integer. +/// +/// @ingroup HLSLCore +FfxFloat32x4 ffxRound(FfxFloat32x4 x) +{ + return round(x); +} + /// Compute the maximum of three values. /// /// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. -/// +/// /// @param [in] x The first value to include in the max calculation. /// @param [in] y The second value to include in the max calcuation. /// @param [in] z The third value to include in the max calcuation. -/// +/// /// @returns /// The maximum value of x, y, and z. /// -/// @ingroup HLSL +/// @ingroup HLSLCore FfxFloat32 ffxMax3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z) { return max(x, max(y, z)); @@ -732,7 +972,7 @@ FfxFloat32 ffxMax3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z) /// Compute the maximum of three values. /// /// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. -/// +/// /// @param [in] x The first value to include in the max calculation. /// @param [in] y The second value to include in the max calcuation. /// @param [in] z The third value to include in the max calcuation. @@ -740,7 +980,7 @@ FfxFloat32 ffxMax3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z) /// @returns /// The maximum value of x, y, and z. /// -/// @ingroup HLSL +/// @ingroup HLSLCore FfxFloat32x2 ffxMax3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z) { return max(x, max(y, z)); @@ -749,7 +989,7 @@ FfxFloat32x2 ffxMax3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z) /// Compute the maximum of three values. /// /// NOTE: This function should compile down to a single V_MAX3_F32 operation on GCN/RDNA hardware. -/// +/// /// @param [in] x The first value to include in the max calculation. /// @param [in] y The second value to include in the max calcuation. /// @param [in] z The third value to include in the max calcuation. @@ -757,7 +997,7 @@ FfxFloat32x2 ffxMax3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z) /// @returns /// The maximum value of x, y, and z. /// -/// @ingroup HLSL +/// @ingroup HLSLCore FfxFloat32x3 ffxMax3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z) { return max(x, max(y, z)); @@ -774,7 +1014,7 @@ FfxFloat32x3 ffxMax3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z) /// @returns /// The maximum value of x, y, and z. /// -/// @ingroup HLSL +/// @ingroup HLSLCore FfxFloat32x4 ffxMax3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z) { return max(x, max(y, z)); @@ -791,7 +1031,7 @@ FfxFloat32x4 ffxMax3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z) /// @returns /// The maximum value of x, y, and z. /// -/// @ingroup HLSL +/// @ingroup HLSLCore FfxUInt32 ffxMax3(FfxUInt32 x, FfxUInt32 y, FfxUInt32 z) { return max(x, max(y, z)); @@ -808,7 +1048,7 @@ FfxUInt32 ffxMax3(FfxUInt32 x, FfxUInt32 y, FfxUInt32 z) /// @returns /// The maximum value of x, y, and z. /// -/// @ingroup HLSL +/// @ingroup HLSLCore FfxUInt32x2 ffxMax3(FfxUInt32x2 x, FfxUInt32x2 y, FfxUInt32x2 z) { return max(x, max(y, z)); @@ -825,7 +1065,7 @@ FfxUInt32x2 ffxMax3(FfxUInt32x2 x, FfxUInt32x2 y, FfxUInt32x2 z) /// @returns /// The maximum value of x, y, and z. /// -/// @ingroup HLSL +/// @ingroup HLSLCore FfxUInt32x3 ffxMax3(FfxUInt32x3 x, FfxUInt32x3 y, FfxUInt32x3 z) { return max(x, max(y, z)); @@ -842,7 +1082,7 @@ FfxUInt32x3 ffxMax3(FfxUInt32x3 x, FfxUInt32x3 y, FfxUInt32x3 z) /// @returns /// The maximum value of x, y, and z. /// -/// @ingroup HLSL +/// @ingroup HLSLCore FfxUInt32x4 ffxMax3(FfxUInt32x4 x, FfxUInt32x4 y, FfxUInt32x4 z) { return max(x, max(y, z)); @@ -859,7 +1099,7 @@ FfxUInt32x4 ffxMax3(FfxUInt32x4 x, FfxUInt32x4 y, FfxUInt32x4 z) /// @returns /// The median value of x, y, and z. /// -/// @ingroup HLSL +/// @ingroup HLSLCore FfxFloat32 ffxMed3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z) { return max(min(x, y), min(max(x, y), z)); @@ -876,7 +1116,7 @@ FfxFloat32 ffxMed3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z) /// @returns /// The median value of x, y, and z. /// -/// @ingroup HLSL +/// @ingroup HLSLCore FfxFloat32x2 ffxMed3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z) { return max(min(x, y), min(max(x, y), z)); @@ -893,7 +1133,7 @@ FfxFloat32x2 ffxMed3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z) /// @returns /// The median value of x, y, and z. /// -/// @ingroup HLSL +/// @ingroup HLSLCore FfxFloat32x3 ffxMed3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z) { return max(min(x, y), min(max(x, y), z)); @@ -910,7 +1150,7 @@ FfxFloat32x3 ffxMed3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z) /// @returns /// The median value of x, y, and z. /// -/// @ingroup HLSL +/// @ingroup HLSLCore FfxFloat32x4 ffxMed3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z) { return max(min(x, y), min(max(x, y), z)); @@ -990,7 +1230,7 @@ FfxInt32x4 ffxMed3(FfxInt32x4 x, FfxInt32x4 y, FfxInt32x4 z) /// Compute the minimum of three values. /// -/// NOTE: This function should compile down to a single V_MIN3_I32 operation on GCN/RDNA hardware. +/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware. /// /// @param [in] x The first value to include in the min calculation. /// @param [in] y The second value to include in the min calcuation. @@ -999,7 +1239,7 @@ FfxInt32x4 ffxMed3(FfxInt32x4 x, FfxInt32x4 y, FfxInt32x4 z) /// @returns /// The minimum value of x, y, and z. /// -/// @ingroup HLSL +/// @ingroup HLSLCore FfxFloat32 ffxMin3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z) { return min(x, min(y, z)); @@ -1007,7 +1247,7 @@ FfxFloat32 ffxMin3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z) /// Compute the minimum of three values. /// -/// NOTE: This function should compile down to a single V_MIN3_I32 operation on GCN/RDNA hardware. +/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware. /// /// @param [in] x The first value to include in the min calculation. /// @param [in] y The second value to include in the min calcuation. @@ -1016,7 +1256,7 @@ FfxFloat32 ffxMin3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z) /// @returns /// The minimum value of x, y, and z. /// -/// @ingroup HLSL +/// @ingroup HLSLCore FfxFloat32x2 ffxMin3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z) { return min(x, min(y, z)); @@ -1024,7 +1264,7 @@ FfxFloat32x2 ffxMin3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z) /// Compute the minimum of three values. /// -/// NOTE: This function should compile down to a single V_MIN3_I32 operation on GCN/RDNA hardware. +/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware. /// /// @param [in] x The first value to include in the min calculation. /// @param [in] y The second value to include in the min calcuation. @@ -1033,7 +1273,7 @@ FfxFloat32x2 ffxMin3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z) /// @returns /// The minimum value of x, y, and z. /// -/// @ingroup HLSL +/// @ingroup HLSLCore FfxFloat32x3 ffxMin3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z) { return min(x, min(y, z)); @@ -1041,7 +1281,7 @@ FfxFloat32x3 ffxMin3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z) /// Compute the minimum of three values. /// -/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware. +/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware. /// /// @param [in] x The first value to include in the min calculation. /// @param [in] y The second value to include in the min calcuation. @@ -1050,7 +1290,7 @@ FfxFloat32x3 ffxMin3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z) /// @returns /// The minimum value of x, y, and z. /// -/// @ingroup HLSL +/// @ingroup HLSLCore FfxFloat32x4 ffxMin3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z) { return min(x, min(y, z)); @@ -1058,7 +1298,7 @@ FfxFloat32x4 ffxMin3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z) /// Compute the minimum of three values. /// -/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware. +/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware. /// /// @param [in] x The first value to include in the min calculation. /// @param [in] y The second value to include in the min calcuation. @@ -1067,7 +1307,7 @@ FfxFloat32x4 ffxMin3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z) /// @returns /// The minimum value of x, y, and z. /// -/// @ingroup HLSL +/// @ingroup HLSLCore FfxUInt32 ffxMin3(FfxUInt32 x, FfxUInt32 y, FfxUInt32 z) { return min(x, min(y, z)); @@ -1075,7 +1315,7 @@ FfxUInt32 ffxMin3(FfxUInt32 x, FfxUInt32 y, FfxUInt32 z) /// Compute the minimum of three values. /// -/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware. +/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware. /// /// @param [in] x The first value to include in the min calculation. /// @param [in] y The second value to include in the min calcuation. @@ -1084,7 +1324,7 @@ FfxUInt32 ffxMin3(FfxUInt32 x, FfxUInt32 y, FfxUInt32 z) /// @returns /// The minimum value of x, y, and z. /// -/// @ingroup HLSL +/// @ingroup HLSLCore FfxUInt32x2 ffxMin3(FfxUInt32x2 x, FfxUInt32x2 y, FfxUInt32x2 z) { return min(x, min(y, z)); @@ -1092,16 +1332,16 @@ FfxUInt32x2 ffxMin3(FfxUInt32x2 x, FfxUInt32x2 y, FfxUInt32x2 z) /// Compute the minimum of three values. /// -/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware. +/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware. /// /// @param [in] x The first value to include in the min calculation. -/// @param [in] y The second value to include in the min calcuation. -/// @param [in] z The third value to include in the min calcuation. +/// @param [in] y The second value to include in the min calculation. +/// @param [in] z The third value to include in the min calculation. /// /// @returns /// The minimum value of x, y, and z. /// -/// @ingroup HLSL +/// @ingroup HLSLCore FfxUInt32x3 ffxMin3(FfxUInt32x3 x, FfxUInt32x3 y, FfxUInt32x3 z) { return min(x, min(y, z)); @@ -1109,7 +1349,7 @@ FfxUInt32x3 ffxMin3(FfxUInt32x3 x, FfxUInt32x3 y, FfxUInt32x3 z) /// Compute the minimum of three values. /// -/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware. +/// NOTE: This function should compile down to a single V_MIN3_F32 operation on GCN/RDNA hardware. /// /// @param [in] x The first value to include in the min calculation. /// @param [in] y The second value to include in the min calcuation. @@ -1118,23 +1358,38 @@ FfxUInt32x3 ffxMin3(FfxUInt32x3 x, FfxUInt32x3 y, FfxUInt32x3 z) /// @returns /// The minimum value of x, y, and z. /// -/// @ingroup HLSL +/// @ingroup HLSLCore FfxUInt32x4 ffxMin3(FfxUInt32x4 x, FfxUInt32x4 y, FfxUInt32x4 z) { return min(x, min(y, z)); } -FfxUInt32 AShrSU1(FfxUInt32 a, FfxUInt32 b) +FfxUInt32 ffxAShrSU1(FfxUInt32 a, FfxUInt32 b) { return FfxUInt32(FfxInt32(a) >> FfxInt32(b)); } +FfxUInt32 ffxPackF32(FfxFloat32x2 v){ + FfxUInt32x2 p = FfxUInt32x2(ffxF32ToF16(FfxFloat32x2(v).x), ffxF32ToF16(FfxFloat32x2(v).y)); + return p.x | (p.y << 16); +} + +FfxFloat32x2 ffxUnpackF32(FfxUInt32 a){ + return f16tof32(FfxUInt32x2(a & 0xFFFF, a >> 16)); +} + +FfxUInt32x2 ffxPackF32x2(FfxFloat32x4 v){ + return FfxUInt32x2(ffxPackF32(v.xy), ffxPackF32(v.zw)); +} + +FfxFloat32x4 ffxUnpackF32x2(FfxUInt32x2 a){ + return FfxFloat32x4(ffxUnpackF32(a.x), ffxUnpackF32(a.y)); +} + //============================================================================================================================== // HLSL HALF //============================================================================================================================== -#if FFX_HALF - //============================================================================================================================== // Need to use manual unpack to get optimal execution (don't use packed types in buffers directly). // Unpack requires this pattern: https://gpuopen.com/first-steps-implementing-fp16/ @@ -1156,14 +1411,71 @@ FFX_MIN16_U4 ffxUint32x2ToUint16x4(FfxUInt32x2 x) { return FFX_MIN16_U4(ffxUint32ToUint16x2(x.x), ffxUint32ToUint16x2(x.y)); } + +FfxUInt32x2 ffxFloat16x4ToUint32x2(FFX_MIN16_F4 v) +{ + FfxUInt32x2 result; + result.x = ffxF32ToF16(v.x) | (ffxF32ToF16(v.y) << 16); + result.y = ffxF32ToF16(v.z) | (ffxF32ToF16(v.w) << 16); + return result; +} + +/// @brief Inverts the value while avoiding division by zero. If the value is zero, zero is returned. +/// @param v Value to invert. +/// @return If v = 0 returns 0. If v != 0 returns 1/v. +FfxFloat32 ffxInvertSafe(FfxFloat32 v){ + FfxFloat32 s = FfxFloat32(sign(v)); + FfxFloat32 s2 = s*s; + return s2/(v + s2 - 1.0); +} + +/// @brief Inverts the value while avoiding division by zero. If the value is zero, zero is returned. +/// @param v Value to invert. +/// @return If v = 0 returns 0. If v != 0 returns 1/v. +FfxFloat32x2 ffxInvertSafe(FfxFloat32x2 v){ + FfxFloat32x2 s = FfxFloat32x2(sign(v)); + FfxFloat32x2 s2 = s*s; + return s2/(v + s2 - FfxFloat32x2(1.0, 1.0)); +} + +/// @brief Inverts the value while avoiding division by zero. If the value is zero, zero is returned. +/// @param v Value to invert. +/// @return If v = 0 returns 0. If v != 0 returns 1/v. +FfxFloat32x3 ffxInvertSafe(FfxFloat32x3 v){ + FfxFloat32x3 s = FfxFloat32x3(sign(v)); + FfxFloat32x3 s2 = s*s; + return s2/(v + s2 - FfxFloat32x3(1.0, 1.0, 1.0)); +} + +/// @brief Inverts the value while avoiding division by zero. If the value is zero, zero is returned. +/// @param v Value to invert. +/// @return If v = 0 returns 0. If v != 0 returns 1/v. +FfxFloat32x4 ffxInvertSafe(FfxFloat32x4 v){ + FfxFloat32x4 s = FfxFloat32x4(sign(v)); + FfxFloat32x4 s2 = s*s; + return s2/(v + s2 - FfxFloat32x4(1.0, 1.0, 1.0, 1.0)); +} + #define FFX_UINT32_TO_FLOAT16X2(x) ffxUint32ToFloat16x2(FfxUInt32(x)) +#if FFX_HALF + #define FFX_UINT32X2_TO_FLOAT16X4(x) ffxUint32x2ToFloat16x4(FfxUInt32x2(x)) #define FFX_UINT32_TO_UINT16X2(x) ffxUint32ToUint16x2(FfxUInt32(x)) #define FFX_UINT32X2_TO_UINT16X4(x) ffxUint32x2ToUint16x4(FfxUInt32x2(x)) + +FfxUInt32 ffxPackF16(FfxFloat16x2 v){ + FfxUInt32x2 p = FfxUInt32x2(ffxF32ToF16(FfxFloat32x2(v).x), ffxF32ToF16(FfxFloat32x2(v).y)); + return p.x | (p.y << 16); +} + +FfxFloat16x2 ffxUnpackF16(FfxUInt32 a){ + return FfxFloat16x2(f16tof32(FfxUInt32x2(a & 0xFFFF, a >> 16))); +} + //------------------------------------------------------------------------------------------------------------------------------ FfxUInt32 FFX_MIN16_F2ToUint32(FFX_MIN16_F2 x) { - return f32tof16(x.x) + (f32tof16(x.y) << 16); + return ffxF32ToF16(x.x) + (ffxF32ToF16(x.y) << 16); } FfxUInt32x2 FFX_MIN16_F4ToUint32x2(FFX_MIN16_F4 x) { @@ -1182,19 +1494,19 @@ FfxUInt32x2 FFX_MIN16_U4ToUint32x2(FFX_MIN16_U4 x) #define FFX_UINT16X2_TO_UINT32(x) FFX_MIN16_U2ToUint32(FFX_MIN16_U2(x)) #define FFX_UINT16X4_TO_UINT32X2(x) FFX_MIN16_U4ToUint32x2(FFX_MIN16_U4(x)) -#if defined(FFX_HLSL_6_2) && !defined(FFX_NO_16_BIT_CAST) +#if (FFX_HLSL_SM >= 62) && !defined(FFX_NO_16_BIT_CAST) #define FFX_TO_UINT16(x) asuint16(x) #define FFX_TO_UINT16X2(x) asuint16(x) #define FFX_TO_UINT16X3(x) asuint16(x) #define FFX_TO_UINT16X4(x) asuint16(x) #else -#define FFX_TO_UINT16(a) FFX_MIN16_U(f32tof16(FfxFloat32(a))) +#define FFX_TO_UINT16(a) FFX_MIN16_U(ffxF32ToF16(FfxFloat32(a))) #define FFX_TO_UINT16X2(a) FFX_MIN16_U2(FFX_TO_UINT16((a).x), FFX_TO_UINT16((a).y)) #define FFX_TO_UINT16X3(a) FFX_MIN16_U3(FFX_TO_UINT16((a).x), FFX_TO_UINT16((a).y), FFX_TO_UINT16((a).z)) #define FFX_TO_UINT16X4(a) FFX_MIN16_U4(FFX_TO_UINT16((a).x), FFX_TO_UINT16((a).y), FFX_TO_UINT16((a).z), FFX_TO_UINT16((a).w)) -#endif // #if defined(FFX_HLSL_6_2) && !defined(FFX_NO_16_BIT_CAST) +#endif // #if (FFX_HLSL_SM>=62) && !defined(FFX_NO_16_BIT_CAST) -#if defined(FFX_HLSL_6_2) && !defined(FFX_NO_16_BIT_CAST) +#if (FFX_HLSL_SM >= 62) && !defined(FFX_NO_16_BIT_CAST) #define FFX_TO_FLOAT16(x) asfloat16(x) #define FFX_TO_FLOAT16X2(x) asfloat16(x) #define FFX_TO_FLOAT16X3(x) asfloat16(x) @@ -1204,7 +1516,7 @@ FfxUInt32x2 FFX_MIN16_U4ToUint32x2(FFX_MIN16_U4 x) #define FFX_TO_FLOAT16X2(a) FFX_MIN16_F2(FFX_TO_FLOAT16((a).x), FFX_TO_FLOAT16((a).y)) #define FFX_TO_FLOAT16X3(a) FFX_MIN16_F3(FFX_TO_FLOAT16((a).x), FFX_TO_FLOAT16((a).y), FFX_TO_FLOAT16((a).z)) #define FFX_TO_FLOAT16X4(a) FFX_MIN16_F4(FFX_TO_FLOAT16((a).x), FFX_TO_FLOAT16((a).y), FFX_TO_FLOAT16((a).z), FFX_TO_FLOAT16((a).w)) -#endif // #if defined(FFX_HLSL_6_2) && !defined(FFX_NO_16_BIT_CAST) +#endif // #if (FFX_HLSL_SM>=62) && !defined(FFX_NO_16_BIT_CAST) //============================================================================================================================== #define FFX_BROADCAST_FLOAT16(a) FFX_MIN16_F(a) @@ -1448,38 +1760,122 @@ FFX_MIN16_U4 ffxBitShiftRightHalf(FFX_MIN16_U4 a, FFX_MIN16_U4 b) //============================================================================================================================== #if defined(FFX_WAVE) // Where 'x' must be a compile time literal. -FfxFloat32 AWaveXorF1(FfxFloat32 v, FfxUInt32 x) +FfxFloat32 ffxWaveXorF1(FfxFloat32 v, FfxUInt32 x) { return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x); } -FfxFloat32x2 AWaveXorF2(FfxFloat32x2 v, FfxUInt32 x) +FfxFloat32x2 ffxWaveXorF2(FfxFloat32x2 v, FfxUInt32 x) { return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x); } -FfxFloat32x3 AWaveXorF3(FfxFloat32x3 v, FfxUInt32 x) +FfxFloat32x3 ffxWaveXorF3(FfxFloat32x3 v, FfxUInt32 x) { return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x); } -FfxFloat32x4 AWaveXorF4(FfxFloat32x4 v, FfxUInt32 x) +FfxFloat32x4 ffxWaveXorF4(FfxFloat32x4 v, FfxUInt32 x) { return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x); } -FfxUInt32 AWaveXorU1(FfxUInt32 v, FfxUInt32 x) +FfxUInt32 ffxWaveXorU1(FfxUInt32 v, FfxUInt32 x) { return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x); } -FfxUInt32x2 AWaveXorU1(FfxUInt32x2 v, FfxUInt32 x) +FfxUInt32x2 ffxWaveXorU1(FfxUInt32x2 v, FfxUInt32 x) { return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x); } -FfxUInt32x3 AWaveXorU1(FfxUInt32x3 v, FfxUInt32 x) +FfxUInt32x3 ffxWaveXorU1(FfxUInt32x3 v, FfxUInt32 x) { return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x); } -FfxUInt32x4 AWaveXorU1(FfxUInt32x4 v, FfxUInt32 x) +FfxUInt32x4 ffxWaveXorU1(FfxUInt32x4 v, FfxUInt32 x) { return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x); } +FfxBoolean ffxWaveIsFirstLane() +{ + return WaveIsFirstLane(); +} +FfxUInt32 ffxWaveLaneIndex() +{ + return WaveGetLaneIndex(); +} +FfxBoolean ffxWaveReadAtLaneIndexB1(FfxBoolean v, FfxUInt32 x) +{ + return WaveReadLaneAt(v, x); +} +FfxUInt32 ffxWavePrefixCountBits(FfxBoolean v) +{ + return WavePrefixCountBits(v); +} +FfxUInt32 ffxWaveActiveCountBits(FfxBoolean v) +{ + return WaveActiveCountBits(v); +} +FfxUInt32 ffxWaveReadLaneFirstU1(FfxUInt32 v) +{ + return WaveReadLaneFirst(v); +} +FfxUInt32x2 ffxWaveReadLaneFirstU2(FfxUInt32x2 v) +{ + return WaveReadLaneFirst(v); +} +FfxBoolean ffxWaveReadLaneFirstB1(FfxBoolean v) +{ + return WaveReadLaneFirst(v); +} +FfxUInt32 ffxWaveOr(FfxUInt32 a) +{ + return WaveActiveBitOr(a); +} +FfxUInt32 ffxWaveMin(FfxUInt32 a) +{ + return WaveActiveMin(a); +} +FfxFloat32 ffxWaveMin(FfxFloat32 a) +{ + return WaveActiveMin(a); +} +FfxUInt32 ffxWaveMax(FfxUInt32 a) +{ + return WaveActiveMax(a); +} +FfxFloat32 ffxWaveMax(FfxFloat32 a) +{ + return WaveActiveMax(a); +} +FfxUInt32 ffxWaveSum(FfxUInt32 a) +{ + return WaveActiveSum(a); +} +FfxFloat32 ffxWaveSum(FfxFloat32 a) +{ + return WaveActiveSum(a); +} +FfxUInt32 ffxWaveLaneCount() +{ + return WaveGetLaneCount(); +} +FfxBoolean ffxWaveAllTrue(FfxBoolean v) +{ + return WaveActiveAllTrue(v); +} +FfxFloat32 ffxQuadReadX(FfxFloat32 v) +{ + return QuadReadAcrossX(v); +} +FfxFloat32x2 ffxQuadReadX(FfxFloat32x2 v) +{ + return QuadReadAcrossX(v); +} +FfxFloat32 ffxQuadReadY(FfxFloat32 v) +{ + return QuadReadAcrossY(v); +} +FfxFloat32x2 ffxQuadReadY(FfxFloat32x2 v) +{ + return QuadReadAcrossY(v); +} #if FFX_HALF FfxFloat16x2 ffxWaveXorFloat16x2(FfxFloat16x2 v, FfxUInt32 x) @@ -1496,7 +1892,7 @@ FfxUInt16x2 ffxWaveXorUint16x2(FfxUInt16x2 v, FfxUInt32 x) } FfxUInt16x4 ffxWaveXorUint16x4(FfxUInt16x4 v, FfxUInt32 x) { - return AW4_FFX_UINT32(WaveReadLaneAt(FFX_UINT32_AW4(v), WaveGetLaneIndex() ^ x)); + return FFX_UINT32X2_TO_UINT16X4(WaveReadLaneAt(FFX_UINT16X4_TO_UINT32X2(v), WaveGetLaneIndex() ^ x)); } #endif // FFX_HALF #endif // #if defined(FFX_WAVE) diff --git a/thirdparty/amd-fsr2/shaders/ffx_core_portability.h b/thirdparty/amd-ffx/gpu/ffx_core_portability.h similarity index 61% rename from thirdparty/amd-fsr2/shaders/ffx_core_portability.h rename to thirdparty/amd-ffx/gpu/ffx_core_portability.h index 45be05973a84..12147b9a7bfe 100644 --- a/thirdparty/amd-fsr2/shaders/ffx_core_portability.h +++ b/thirdparty/amd-ffx/gpu/ffx_core_portability.h @@ -1,13 +1,14 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. -// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// // Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal +// of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell // copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: +// furnished to do so, subject to the following conditions : +// // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // @@ -19,32 +20,27 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. -FfxFloat32x3 opAAddOneF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b) +void ffxOpAAddOneF3(FFX_PARAMETER_OUT FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b) { d = a + ffxBroadcast3(b); - return d; } -FfxFloat32x3 opACpyF3(FfxFloat32x3 d, FfxFloat32x3 a) +void ffxOpACpyF3(FFX_PARAMETER_OUT FfxFloat32x3 d, FfxFloat32x3 a) { d = a; - return d; } -FfxFloat32x3 opAMulF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32x3 b) +void ffxOpAMulF3(FFX_PARAMETER_OUT FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32x3 b) { d = a * b; - return d; } -FfxFloat32x3 opAMulOneF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b) +void ffxOpAMulOneF3(FFX_PARAMETER_OUT FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b) { - d = a * ffxBroadcast3(b); - return d; + d = a * b; } -FfxFloat32x3 opARcpF3(FfxFloat32x3 d, FfxFloat32x3 a) +void ffxOpARcpF3(FFX_PARAMETER_OUT FfxFloat32x3 d, FfxFloat32x3 a) { - d = rcp(a); - return d; + d = ffxReciprocal(a); } diff --git a/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation.h b/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation.h new file mode 100644 index 000000000000..ccf2e23b87f7 --- /dev/null +++ b/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation.h @@ -0,0 +1,187 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifndef FFX_FRAMEINTERPOLATION_H +#define FFX_FRAMEINTERPOLATION_H + +struct InterpolationSourceColor +{ + FfxFloat32x3 fRaw; + FfxFloat32x3 fLinear; + FfxFloat32 fBilinearWeightSum; +}; + +InterpolationSourceColor NewInterpolationSourceColor() +{ + InterpolationSourceColor c; + c.fRaw = FfxFloat32x3(0.0, 0.0, 0.0); + c.fLinear = FfxFloat32x3(0.0, 0.0, 0.0); + c.fBilinearWeightSum = 0.0; + return c; +} + +InterpolationSourceColor SampleTextureBilinear(FfxBoolean isCurrent, FfxFloat32x2 fUv, FfxFloat32x2 fMotionVector, FfxInt32x2 texSize) +{ + InterpolationSourceColor result = NewInterpolationSourceColor(); + + FfxFloat32x2 fReprojectedUv = fUv + fMotionVector; + BilinearSamplingData bilinearInfo = GetBilinearSamplingData(fReprojectedUv, texSize); + + FfxFloat32x3 fColor = FfxFloat32x3(0.0, 0.0, 0.0); + FfxFloat32 fWeightSum = 0.0f; + for (FfxInt32 iSampleIndex = 0; iSampleIndex < 4; iSampleIndex++) { + + const FfxInt32x2 iOffset = bilinearInfo.iOffsets[iSampleIndex]; + const FfxInt32x2 iSamplePos = bilinearInfo.iBasePos + iOffset; + + if (IsInRect(iSamplePos, InterpolationRectBase(), InterpolationRectSize())) + { + FfxFloat32 fWeight = bilinearInfo.fWeights[iSampleIndex]; + + if (isCurrent) + fColor += LoadCurrentBackbuffer(iSamplePos).rgb * fWeight; + else + fColor += LoadPreviousBackbuffer(iSamplePos).rgb * fWeight; + fWeightSum += fWeight; + } + } + + //normalize colors + fColor = (fWeightSum != 0.0f) ? fColor / fWeightSum : FfxFloat32x3(0.0f, 0.0f, 0.0f); + + result.fRaw = fColor; + result.fLinear = RawRGBToLinear(fColor); + result.fBilinearWeightSum = fWeightSum; + + return result; +} + +void updateInPaintingWeight(inout FfxFloat32 fInPaintingWeight, FfxFloat32 fFactor) +{ + fInPaintingWeight = ffxSaturate(ffxMax(fInPaintingWeight, fFactor)); +} + +void computeInterpolatedColor(FfxUInt32x2 iPxPos, out FfxFloat32x3 fInterpolatedColor, inout FfxFloat32 fInPaintingWeight) +{ + const FfxFloat32x2 fUvInInterpolationRect = (FfxFloat32x2(iPxPos - InterpolationRectBase()) + 0.5f) / InterpolationRectSize(); + const FfxFloat32x2 fUvInScreenSpace = (FfxFloat32x2(iPxPos) + 0.5f) / DisplaySize(); + const FfxFloat32x2 fLrUvInInterpolationRect = fUvInInterpolationRect * (FfxFloat32x2(RenderSize()) / GetMaxRenderSize()); + + const FfxFloat32x2 fUvLetterBoxScale = FfxFloat32x2(InterpolationRectSize()) / DisplaySize(); + + // game MV are top left aligned, the function scales them to render res UV + VectorFieldEntry gameMv; + LoadInpaintedGameFieldMv(fUvInInterpolationRect, gameMv); + + // OF is done on the back buffers which already have black bars + VectorFieldEntry ofMv; + SampleOpticalFlowMotionVectorField(fUvInScreenSpace, ofMv); + + // Binarize disucclusion factor + FfxFloat32x2 fDisocclusionFactor = FfxFloat32x2(FFX_EQUAL(ffxSaturate(SampleDisocclusionMask(fLrUvInInterpolationRect).xy), FfxFloat32x2(1.0, 1.0))); + + InterpolationSourceColor fPrevColorGame = SampleTextureBilinear(false, fUvInScreenSpace, +gameMv.fMotionVector * fUvLetterBoxScale, DisplaySize()); // Get in previous frame buffer, the color of interpolated pixel + InterpolationSourceColor fCurrColorGame = SampleTextureBilinear(true, fUvInScreenSpace, -gameMv.fMotionVector * fUvLetterBoxScale, DisplaySize()); // Get color in current framebuffer, of color of interpolated pixel + + InterpolationSourceColor fPrevColorOF = SampleTextureBilinear(false, fUvInScreenSpace, +ofMv.fMotionVector * fUvLetterBoxScale, DisplaySize()); + InterpolationSourceColor fCurrColorOF = SampleTextureBilinear(true, fUvInScreenSpace, -ofMv.fMotionVector * fUvLetterBoxScale, DisplaySize()); + + FfxFloat32 fDisoccludedFactor = 0.0f; + + // Disocclusion logic + { + fDisocclusionFactor.x *= FfxFloat32(!gameMv.bPosOutside); // fDisocclusionFactor.x of 1 means the pos of interpolated pixel is within bounds of previous frame. + fDisocclusionFactor.y *= FfxFloat32(!gameMv.bNegOutside); // fDisocclusionFactor.y of 1 means the pos of interpolated pixel is within bounds of current frame + + // Inpaint in bi-directional disocclusion areas + updateInPaintingWeight(fInPaintingWeight, FfxFloat32(length(fDisocclusionFactor) <= FFX_FRAMEINTERPOLATION_EPSILON)); + + FfxFloat32 t = 0.5f; + t += 0.5f * (1 - (fDisocclusionFactor.x)); + t -= 0.5f * (1 - (fDisocclusionFactor.y)); + // Say if fDisocclusionFactor.x is 1 and fDisocclusionFactor.y = 0, then t will be 0. fInterpolatedColor will be entirely from fPrevColorGame + fInterpolatedColor = ffxLerp(fPrevColorGame.fRaw, fCurrColorGame.fRaw, ffxSaturate(t)); + fDisoccludedFactor = ffxSaturate(1 - ffxMin(fDisocclusionFactor.x, fDisocclusionFactor.y)); + + if (fPrevColorGame.fBilinearWeightSum == 0.0f) + { + fInterpolatedColor = fCurrColorGame.fRaw; + } + else if (fCurrColorGame.fBilinearWeightSum == 0.0f) + { + fInterpolatedColor = fPrevColorGame.fRaw; + } + if (fPrevColorGame.fBilinearWeightSum == 0 && fCurrColorGame.fBilinearWeightSum == 0) + { + fInPaintingWeight = 1.0f; + } + } + + { + + FfxFloat32 ofT = 0.5f; + + if (fPrevColorOF.fBilinearWeightSum > 0 && fCurrColorOF.fBilinearWeightSum > 0) + { + ofT = 0.5f; + } + else if (fPrevColorOF.fBilinearWeightSum > 0) + { + ofT = 0; + } else { + ofT = 1; + } + + const FfxFloat32x3 ofColor = ffxLerp(fPrevColorOF.fRaw, fCurrColorOF.fRaw, ofT); + + FfxFloat32 fOF_Sim = NormalizedDot3(fPrevColorOF.fRaw, fCurrColorOF.fRaw); + FfxFloat32 fGame_Sim = NormalizedDot3(fPrevColorGame.fRaw, fCurrColorGame.fRaw); + + fGame_Sim = ffxLerp(ffxMax(FFX_FRAMEINTERPOLATION_EPSILON, fGame_Sim), 1.0f, ffxSaturate(fDisoccludedFactor)); + FfxFloat32 fGameMvBias = ffxPow(ffxSaturate(fGame_Sim / ffxMax(FFX_FRAMEINTERPOLATION_EPSILON, fOF_Sim)), 1.0f); + + const FfxFloat32 fFrameIndexFactor = FfxFloat32(FrameIndexSinceLastReset() < 10); + fGameMvBias = ffxLerp(fGameMvBias, 1.0f, fFrameIndexFactor); + + fInterpolatedColor = ffxLerp(ofColor, fInterpolatedColor, ffxSaturate(fGameMvBias)); + } +} + +void computeFrameinterpolation(FfxInt32x2 iPxPos) +{ + FfxFloat32x3 fColor = FfxFloat32x3(0, 0, 0); + FfxFloat32 fInPaintingWeight = 0.0f; + + if (IsInRect(iPxPos, InterpolationRectBase(), InterpolationRectSize()) == false || FrameIndexSinceLastReset() == 0) + { + // if we just reset or we are out of the interpolation rect, copy the current back buffer and don't interpolate + fColor = LoadCurrentBackbuffer(iPxPos); + } + else + { + computeInterpolatedColor(iPxPos, fColor, fInPaintingWeight); + } + + StoreFrameinterpolationOutput(FfxInt32x2(iPxPos), FfxFloat32x4(fColor, fInPaintingWeight)); +} + +#endif // FFX_FRAMEINTERPOLATION_H diff --git a/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_callbacks_glsl.h b/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_callbacks_glsl.h new file mode 100644 index 000000000000..20c5c296728f --- /dev/null +++ b/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_callbacks_glsl.h @@ -0,0 +1,758 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "ffx_frameinterpolation_resources.h" +#include "ffx_core.h" + +#define COUNTER_SPD 0 +#define COUNTER_FRAME_INDEX_SINCE_LAST_RESET 1 + + /////////////////////////////////////////////// + // declare CBs and CB accessors +/////////////////////////////////////////////// +#if defined(FFX_FRAMEINTERPOLATION_BIND_CB_FRAMEINTERPOLATION) + layout (set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_CB_FRAMEINTERPOLATION, std140) uniform cbFI_t + { + FfxInt32x2 renderSize; + FfxInt32x2 displaySize; + + FfxFloat32x2 displaySizeRcp; + FfxFloat32 cameraNear; + FfxFloat32 cameraFar; + + FfxInt32x2 upscalerTargetSize; + FfxInt32 Mode; + FfxInt32 reset; + + FfxFloat32x4 fDeviceToViewDepth; + + FfxFloat32 deltaTime; + FfxInt32 HUDLessAttachedFactor; + FfxInt32x2 distortionFieldSize; + + FfxFloat32x2 opticalFlowScale; + FfxInt32 opticalFlowBlockSize; + FfxUInt32 dispatchFlags; + + FfxInt32x2 maxRenderSize; + FfxInt32 opticalFlowHalfResMode; + FfxInt32 NumInstances; + + FfxInt32x2 interpolationRectBase; + FfxInt32x2 interpolationRectSize; + + FfxFloat32x3 debugBarColor; + FfxUInt32 backBufferTransferFunction; + + FfxFloat32x2 minMaxLuminance; + FfxFloat32 fTanHalfFOV; + FfxInt32 _pad1; + + FfxFloat32x2 fJitter; + FfxFloat32x2 fMotionVectorScale; + } cbFI; + + FfxFloat32x2 Jitter() + { + return cbFI.fJitter; + } + + FfxInt32x2 InterpolationRectBase() + { + return cbFI.interpolationRectBase; + } + + FfxInt32x2 InterpolationRectSize() + { + return cbFI.interpolationRectSize; + } + + FfxFloat32x2 MotionVectorScale() + { + return cbFI.fMotionVectorScale; + } + + FfxInt32x2 RenderSize() + { + return cbFI.renderSize; + } + + FfxInt32x2 DisplaySize() + { + return cbFI.displaySize; + } + + FfxBoolean Reset() + { + return cbFI.reset == 1; + } + + FfxFloat32x4 DeviceToViewSpaceTransformFactors() + { + return cbFI.fDeviceToViewDepth; + } + + FfxInt32x2 GetOpticalFlowSize() + { + FfxInt32x2 iOpticalFlowSize = FfxInt32x2((1.0 / cbFI.opticalFlowScale) / FfxFloat32x2(cbFI.opticalFlowBlockSize.xx)); + + return iOpticalFlowSize; + } + + FfxInt32x2 GetOpticalFlowSize2() + { + return GetOpticalFlowSize() * 1; + } + + FfxFloat32x2 GetOpticalFlowScale() + { + return cbFI.opticalFlowScale; + } + + FfxInt32 GetOpticalFlowBlockSize() + { + return cbFI.opticalFlowBlockSize; + } + + FfxInt32 GetHUDLessAttachedFactor() + { + return cbFI.HUDLessAttachedFactor; + } + + FfxInt32x2 GetDistortionFieldSize() + { + return cbFI.distortionFieldSize; + } + + FfxUInt32 GetDispatchFlags() + { + return cbFI.dispatchFlags; + } + + FfxInt32x2 GetMaxRenderSize() + { + return cbFI.maxRenderSize; + } + + FfxInt32 GetOpticalFlowHalfResMode() + { + return cbFI.opticalFlowHalfResMode; + } + + FfxFloat32x3 GetDebugBarColor() + { + return cbFI.debugBarColor; + } + + FfxFloat32 TanHalfFoV() + { + return cbFI.fTanHalfFOV; + } + + FfxUInt32 BackBufferTransferFunction() + { + return cbFI.backBufferTransferFunction; + } + + FfxFloat32 MinLuminance() + { + return cbFI.minMaxLuminance[0]; + } + + FfxFloat32 MaxLuminance() + { + return cbFI.minMaxLuminance[1]; + } + +#endif // defined(FFX_FRAMEINTERPOLATION_BIND_CB_FRAMEINTERPOLATION) + + +#if defined(FFX_FRAMEINTERPOLATION_BIND_CB_INPAINTING_PYRAMID) + layout (set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_CB_INPAINTING_PYRAMID, std140) uniform cbInpaintingPyramid_t + { + FfxUInt32 mips; + FfxUInt32 numWorkGroups; + FfxUInt32x2 workGroupOffset; + } cbInpaintingPyramid; + + FfxUInt32 NumMips() + { + return cbInpaintingPyramid.mips; + } + FfxUInt32 NumWorkGroups() + { + return cbInpaintingPyramid.numWorkGroups; + } + FfxUInt32x2 WorkGroupOffset() + { + return cbInpaintingPyramid.workGroupOffset; + } + +#endif // defined(FFX_FRAMEINTERPOLATION_BIND_CB_INPAINTING_PYRAMID) + + + /////////////////////////////////////////////// + // declare samplers +/////////////////////////////////////////////// + + +layout (set = 0, binding = 1000) uniform sampler s_LinearClamp; + + /////////////////////////////////////////////// + // declare SRVs and SRV accessors +/////////////////////////////////////////////// + +#ifdef FFX_FRAMEINTERPOLATION_BIND_SRV_PREVIOUS_INTERPOLATION_SOURCE + layout (set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_SRV_PREVIOUS_INTERPOLATION_SOURCE) uniform texture2D r_previous_interpolation_source; + + FfxFloat32x3 LoadPreviousBackbuffer(FFX_PARAMETER_IN FfxInt32x2 iPxPos) + { + return texelFetch(r_previous_interpolation_source, iPxPos, 0).rgb; + } + FfxFloat32x3 SamplePreviousBackbuffer(FFX_PARAMETER_IN FfxFloat32x2 fUv) + { + return textureLod(sampler2D(r_previous_interpolation_source, s_LinearClamp), fUv, 0.0).xyz; + } +#endif + +#ifdef FFX_FRAMEINTERPOLATION_BIND_SRV_CURRENT_INTERPOLATION_SOURCE + layout (set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_SRV_CURRENT_INTERPOLATION_SOURCE) uniform texture2D r_current_interpolation_source; + + FfxFloat32x3 LoadCurrentBackbuffer(FFX_PARAMETER_IN FfxInt32x2 iPxPos) + { + return texelFetch(r_current_interpolation_source, iPxPos, 0).rgb; + } + FfxFloat32x3 SampleCurrentBackbuffer(FFX_PARAMETER_IN FfxFloat32x2 fUv) + { + return textureLod(sampler2D(r_current_interpolation_source, s_LinearClamp), fUv, 0.0).xyz; + } +#endif + +#ifdef FFX_FRAMEINTERPOLATION_BIND_SRV_DILATED_MOTION_VECTORS + layout (set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_SRV_DILATED_MOTION_VECTORS) uniform texture2D r_dilated_motion_vectors; + + FfxFloat32x2 LoadDilatedMotionVector(FFX_PARAMETER_IN FfxInt32x2 iPxPos) + { + return texelFetch(r_dilated_motion_vectors, iPxPos, 0).xy; + } +#endif + +#ifdef FFX_FRAMEINTERPOLATION_BIND_SRV_DILATED_DEPTH + layout (set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_SRV_DILATED_DEPTH) uniform texture2D r_dilated_depth; + + FfxFloat32 LoadDilatedDepth(FFX_PARAMETER_IN FfxInt32x2 iPxPos) + { + return texelFetch(r_dilated_depth, iPxPos, 0).x; + } +#endif + +#ifdef FFX_FRAMEINTERPOLATION_BIND_SRV_RECONSTRUCTED_DEPTH_PREVIOUS_FRAME + layout (set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_SRV_RECONSTRUCTED_DEPTH_PREVIOUS_FRAME) uniform utexture2D r_reconstructed_depth_previous_frame; + + FfxFloat32 LoadReconstructedDepthPreviousFrame(FFX_PARAMETER_IN FfxInt32x2 iPxInput) + { + return ffxAsFloat(texelFetch(r_reconstructed_depth_previous_frame, iPxInput, 0).x); + } +#endif + +#ifdef FFX_FRAMEINTERPOLATION_BIND_SRV_RECONSTRUCTED_DEPTH_INTERPOLATED_FRAME + layout (set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_SRV_RECONSTRUCTED_DEPTH_INTERPOLATED_FRAME) uniform utexture2D r_reconstructed_depth_interpolated_frame; + + FfxFloat32 LoadEstimatedInterpolationFrameDepth(FFX_PARAMETER_IN FfxInt32x2 iPxInput) + { + return ffxAsFloat(texelFetch(r_reconstructed_depth_interpolated_frame, iPxInput, 0).x); + } +#endif + +#ifdef FFX_FRAMEINTERPOLATION_BIND_SRV_DISOCCLUSION_MASK + layout (set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_SRV_DISOCCLUSION_MASK) uniform texture2D r_disocclusion_mask; + + FfxFloat32x4 LoadDisocclusionMask(FFX_PARAMETER_IN FfxInt32x2 iPxPos) + { + return texelFetch(r_disocclusion_mask, iPxPos, 0); + } + FfxFloat32x4 SampleDisocclusionMask(FFX_PARAMETER_IN FfxFloat32x2 fUv) + { + return textureLod(sampler2D(r_disocclusion_mask, s_LinearClamp), fUv, 0); + } +#endif + +#if defined(FFX_FRAMEINTERPOLATION_BIND_SRV_GAME_MOTION_VECTOR_FIELD_X) && \ + defined(FFX_FRAMEINTERPOLATION_BIND_SRV_GAME_MOTION_VECTOR_FIELD_Y) + layout (set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_SRV_GAME_MOTION_VECTOR_FIELD_X) uniform utexture2D r_game_motion_vector_field_x; + layout (set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_SRV_GAME_MOTION_VECTOR_FIELD_Y) uniform utexture2D r_game_motion_vector_field_y; + + FfxUInt32x2 LoadGameFieldMv(FFX_PARAMETER_IN FfxInt32x2 iPxSample) + { + FfxUInt32 packedX = texelFetch(r_game_motion_vector_field_x, iPxSample, 0).x; + FfxUInt32 packedY = texelFetch(r_game_motion_vector_field_y, iPxSample, 0).x; + + return FfxUInt32x2(packedX, packedY); + } +#endif + +#if defined(FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW_MOTION_VECTOR_FIELD_X) && \ + defined(FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW_MOTION_VECTOR_FIELD_Y) + layout (set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW_MOTION_VECTOR_FIELD_X) uniform utexture2D r_optical_flow_motion_vector_field_x; + layout (set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW_MOTION_VECTOR_FIELD_Y) uniform utexture2D r_optical_flow_motion_vector_field_y; + + FfxUInt32x2 LoadOpticalFlowFieldMv(FFX_PARAMETER_IN FfxInt32x2 iPxSample) + { + FfxUInt32 packedX = texelFetch(r_optical_flow_motion_vector_field_x, iPxSample, 0).x; + FfxUInt32 packedY = texelFetch(r_optical_flow_motion_vector_field_y, iPxSample, 0).x; + + return FfxUInt32x2(packedX, packedY); + } +#endif + +#ifdef FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW + layout (set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW) uniform itexture2D r_optical_flow; + + #if defined(FFX_FRAMEINTERPOLATION_BIND_CB_FRAMEINTERPOLATION) + FfxFloat32x2 LoadOpticalFlow(FFX_PARAMETER_IN FfxInt32x2 iPxPos) + { + return texelFetch(r_optical_flow, iPxPos, 0).xy * GetOpticalFlowScale(); + } + #endif +#endif + +#ifdef FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW_UPSAMPLED + layout (set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW_UPSAMPLED) uniform texture2D r_optical_flow_upsampled; + + FfxFloat32x2 LoadOpticalFlowUpsampled(FFX_PARAMETER_IN FfxInt32x2 iPxPos) + { + return texelFetch(r_optical_flow_upsampled, iPxPos, 0).xy; + } +#endif + +#ifdef FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW_CONFIDENCE + layout (set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW_CONFIDENCE) uniform utexture2D r_optical_flow_confidence; + + FfxFloat32 LoadOpticalFlowConfidence(FFX_PARAMETER_IN FfxInt32x2 iPxPos) + { + return texelFetch(r_optical_flow_confidence, iPxPos, 0).y; + } +#endif + +#ifdef FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW_GLOBAL_MOTION + layout (set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW_GLOBAL_MOTION) uniform utexture2D r_optical_flow_global_motion; + + FfxUInt32 LoadOpticalFlowGlobalMotion(FFX_PARAMETER_IN FfxInt32x2 iPxPos) + { + return texelFetch(r_optical_flow_global_motion, iPxPos, 0).x; + } +#endif + +#ifdef FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW_SCENE_CHANGE_DETECTION + layout (set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW_SCENE_CHANGE_DETECTION) uniform utexture2D r_optical_flow_scd; + + FfxUInt32 LoadOpticalFlowSceneChangeDetection(FFX_PARAMETER_IN FfxInt32x2 iPxPos) + { + return texelFetch(r_optical_flow_scd, iPxPos, 0).x; + } + + FfxBoolean HasSceneChanged() + { + #define SCD_OUTPUT_HISTORY_BITS_SLOT 1 + //if (FrameIndex() <= 5) // threshold according to original OpenCL code + //{ + // return 1.0; + //} + //else + { + // Report that the scene is changed if the change was detected in any of the + // 4 previous frames (0xfu - covers 4 history bits). + return ((texelFetch(r_optical_flow_scd, FfxInt32x2(SCD_OUTPUT_HISTORY_BITS_SLOT, 0), 0).x) & 0xfu) != 0; + } + } +#endif + +#ifdef FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW_DEBUG + layout (set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW_DEBUG) uniform texture2D r_optical_flow_debug; + + FfxFloat32x4 LoadOpticalFlowDebug(FFX_PARAMETER_IN FfxInt32x2 iPxPos) + { + return texelFetch(r_optical_flow_debug, iPxPos, 0); + } +#endif + +#ifdef FFX_FRAMEINTERPOLATION_BIND_SRV_OUTPUT + layout (set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_SRV_OUTPUT) uniform texture2D r_output; + + FfxFloat32x4 LoadFrameInterpolationOutput(FFX_PARAMETER_IN FfxInt32x2 iPxInput) + { + return texelFetch(r_output, iPxInput, 0); + } +#endif + +#ifdef FFX_FRAMEINTERPOLATION_BIND_SRV_INPAINTING_PYRAMID + layout (set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_SRV_INPAINTING_PYRAMID) uniform texture2D r_inpainting_pyramid; + + FfxFloat32x4 LoadInpaintingPyramid(FFX_PARAMETER_IN FfxInt32 mipLevel, FFX_PARAMETER_IN FfxUInt32x2 iPxInput) + { + return texelFetch(r_inpainting_pyramid, FfxInt32x2(iPxInput), mipLevel); + } +#endif + +#ifdef FFX_FRAMEINTERPOLATION_BIND_SRV_PRESENT_BACKBUFFER + layout (set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_SRV_PRESENT_BACKBUFFER) uniform texture2D r_present_backbuffer; + + FfxFloat32x4 LoadPresentBackbuffer(FFX_PARAMETER_IN FfxInt32x2 iPxInput) + { + return texelFetch(r_present_backbuffer, iPxInput, 0); + } + FfxFloat32x4 SamplePresentBackbuffer(FFX_PARAMETER_IN FfxFloat32x2 fUv) + { + return textureLod(sampler2D(r_present_backbuffer, s_LinearClamp), fUv, 0.0); + } +#endif + +#ifdef FFX_FRAMEINTERPOLATION_BIND_SRV_COUNTERS + layout(set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_SRV_COUNTERS) readonly buffer FrameInterpolationCounters_t + { + FfxUInt32 data[]; + } r_counters; + + FfxUInt32 LoadCounter(FFX_PARAMETER_IN FfxInt32 iPxPos) + { + return r_counters.data[iPxPos]; + } + + FfxUInt32 FrameIndexSinceLastReset() + { + return LoadCounter(COUNTER_FRAME_INDEX_SINCE_LAST_RESET); + } +#endif + + +#if defined(FFX_FRAMEINTERPOLATION_BIND_SRV_INPUT_DEPTH) + layout (set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_SRV_INPUT_DEPTH) uniform texture2D r_input_depth; + + FfxFloat32 LoadInputDepth(FfxInt32x2 iPxPos) + { + return texelFetch(r_input_depth, iPxPos, 0).x; + } +#endif + +#if defined(FFX_FRAMEINTERPOLATION_BIND_SRV_INPUT_MOTION_VECTORS) + layout (set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_SRV_INPUT_MOTION_VECTORS) uniform texture2D r_input_motion_vectors; + + FfxFloat32x2 LoadInputMotionVector(FfxInt32x2 iPxDilatedMotionVectorPos) + { + FfxFloat32x2 fSrcMotionVector = texelFetch(r_input_motion_vectors, iPxDilatedMotionVectorPos, 0).xy; + + FfxFloat32x2 fUvMotionVector = fSrcMotionVector * MotionVectorScale(); + + #if FFX_FRAMEINTERPOLATION_OPTION_JITTERED_MOTION_VECTORS + fUvMotionVector -= MotionVectorJitterCancellation(); + #endif + + return fUvMotionVector; + } +#endif + +#if defined(FFX_FRAMEINTERPOLATION_BIND_SRV_DISTORTION_FIELD) + layout(set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_SRV_DISTORTION_FIELD) uniform texture2D r_input_distortion_field; + FfxFloat32x2 SampleDistortionField(FFX_PARAMETER_IN FfxFloat32x2 fUv) + { + return textureLod(sampler2D(r_input_distortion_field, s_LinearClamp), fUv, 0.0).xy; + } +#endif + +/////////////////////////////////////////////// +// declare UAVs and UAV accessors +/////////////////////////////////////////////// +#ifdef FFX_FRAMEINTERPOLATION_BIND_UAV_OUTPUT + layout(set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_UAV_OUTPUT /* app controlled format */) uniform image2D rw_output; + + FfxFloat32x4 RWLoadFrameinterpolationOutput(FFX_PARAMETER_IN FfxInt32x2 iPxPos) + { + return imageLoad(rw_output, iPxPos); + } + + void StoreFrameinterpolationOutput(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x4 val) + { + imageStore(rw_output, iPxPos, val); + } +#endif + +#ifdef FFX_FRAMEINTERPOLATION_BIND_UAV_DILATED_MOTION_VECTORS + layout(set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_UAV_DILATED_MOTION_VECTORS, rg16f) uniform image2D rw_dilated_motion_vectors; + + FfxFloat32x2 RWLoadDilatedMotionVectors(FFX_PARAMETER_IN FfxInt32x2 iPxPos) + { + return imageLoad(rw_dilated_motion_vectors, iPxPos).xy; + } + + void StoreDilatedMotionVectors(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x2 val) + { + imageStore(rw_dilated_motion_vectors, iPxPos, FfxFloat32x4(val, 0.0, 0.0)); + } +#endif + +#ifdef FFX_FRAMEINTERPOLATION_BIND_UAV_DILATED_DEPTH + layout(set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_UAV_DILATED_DEPTH, r32f) uniform image2D rw_dilated_depth; + + FfxFloat32 RWLoadDilatedDepth(FFX_PARAMETER_IN FfxInt32x2 iPxPos) + { + return imageLoad(rw_dilated_depth, iPxPos).x; + } + + void StoreDilatedDepth(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32 val) + { + imageStore(rw_dilated_depth, iPxPos, FfxFloat32x4(val, 0.0, 0.0, 0.0)); + } +#endif + +#ifdef FFX_FRAMEINTERPOLATION_BIND_UAV_RECONSTRUCTED_DEPTH_PREVIOUS_FRAME + layout(set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_UAV_RECONSTRUCTED_DEPTH_PREVIOUS_FRAME, r32ui) uniform uimage2D rw_reconstructed_depth_previous_frame; + + FfxFloat32 RWLoadReconstructedDepthPreviousFrame(FFX_PARAMETER_IN FfxInt32x2 iPxPos) + { + return ffxAsFloat(imageLoad(rw_reconstructed_depth_previous_frame, iPxPos).x); + } + + void UpdateReconstructedDepthPreviousFrame(FfxInt32x2 iPxSample, FfxFloat32 fDepth) + { + FfxUInt32 uDepth = ffxAsUInt32(fDepth); + +#if FFX_FRAMEINTERPOLATION_OPTION_INVERTED_DEPTH + imageAtomicMax(rw_reconstructed_depth_previous_frame, iPxSample, uDepth); +#else + imageAtomicMin(rw_reconstructed_depth_previous_frame, iPxSample, uDepth); // min for standard, max for inverted depth +#endif + } +#endif + +#ifdef FFX_FRAMEINTERPOLATION_BIND_UAV_RECONSTRUCTED_DEPTH_INTERPOLATED_FRAME + layout(set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_UAV_RECONSTRUCTED_DEPTH_INTERPOLATED_FRAME, r32ui) uniform uimage2D rw_reconstructed_depth_interpolated_frame; + + FfxFloat32 RWLoadReconstructedDepthInterpolatedFrame(FFX_PARAMETER_IN FfxInt32x2 iPxPos) + { + return ffxAsFloat(imageLoad(rw_reconstructed_depth_interpolated_frame, iPxPos).x); + } + + void StoreReconstructedDepthInterpolatedFrame(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32 value) + { + FfxUInt32 uDepth = ffxAsUInt32(value); + imageStore(rw_reconstructed_depth_interpolated_frame, iPxPos, FfxUInt32x4(uDepth, 0, 0, 0)); + } + + void UpdateReconstructedDepthInterpolatedFrame(FfxInt32x2 iPxSample, FfxFloat32 fDepth) + { + FfxUInt32 uDepth = ffxAsUInt32(fDepth); + +#if FFX_FRAMEINTERPOLATION_OPTION_INVERTED_DEPTH + imageAtomicMax(rw_reconstructed_depth_interpolated_frame, iPxSample, uDepth); +#else + imageAtomicMin(rw_reconstructed_depth_interpolated_frame, iPxSample, uDepth); // min for standard, max for inverted depth +#endif + } +#endif + +#ifdef FFX_FRAMEINTERPOLATION_BIND_UAV_DISOCCLUSION_MASK + layout(set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_UAV_DISOCCLUSION_MASK, rg8) uniform image2D rw_disocclusion_mask; + + FfxFloat32x2 RWLoadDisocclusionMask(FFX_PARAMETER_IN FfxInt32x2 iPxPos) + { + return imageLoad(rw_disocclusion_mask, iPxPos).xy; + } + + void StoreDisocclusionMask(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x2 val) + { + imageStore(rw_disocclusion_mask, iPxPos, FfxFloat32x4(val, 0.0, 0.0)); + } +#endif + +#if defined(FFX_FRAMEINTERPOLATION_BIND_UAV_GAME_MOTION_VECTOR_FIELD_X) && \ + defined(FFX_FRAMEINTERPOLATION_BIND_UAV_GAME_MOTION_VECTOR_FIELD_Y) + + layout(set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_UAV_GAME_MOTION_VECTOR_FIELD_X, r32ui) uniform uimage2D rw_game_motion_vector_field_x; + layout(set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_UAV_GAME_MOTION_VECTOR_FIELD_Y, r32ui) uniform uimage2D rw_game_motion_vector_field_y; + + FfxUInt32 RWLoadGameMotionVectorFieldX(FFX_PARAMETER_IN FfxInt32x2 iPxPos) + { + return imageLoad(rw_game_motion_vector_field_x, iPxPos).x; + } + + void StoreGameMotionVectorFieldX(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxUInt32 val) + { + imageStore(rw_game_motion_vector_field_x, iPxPos, FfxUInt32x4(val, 0, 0, 0)); + } + + FfxUInt32 RWLoadGameMotionVectorFieldY(FFX_PARAMETER_IN FfxInt32x2 iPxPos) + { + return imageLoad(rw_game_motion_vector_field_y, iPxPos).x; + } + + void StoreGameMotionVectorFieldY(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxUInt32 val) + { + imageStore(rw_game_motion_vector_field_y, iPxPos, FfxUInt32x4(val, 0, 0, 0)); + } + + void UpdateGameMotionVectorField(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxUInt32x2 packedVector) + { + imageAtomicMax(rw_game_motion_vector_field_x, iPxPos, packedVector.x); + imageAtomicMax(rw_game_motion_vector_field_y, iPxPos, packedVector.y); + } + + FfxUInt32 UpdateGameMotionVectorFieldEx(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxUInt32x2 packedVector) + { + FfxUInt32 uPreviousValueX = imageAtomicMax(rw_game_motion_vector_field_x, iPxPos, packedVector.x); + FfxUInt32 uPreviousValueY = imageAtomicMax(rw_game_motion_vector_field_y, iPxPos, packedVector.y); + + const FfxUInt32 uExistingVectorFieldEntry = ffxMax(uPreviousValueX, uPreviousValueY); + + return uExistingVectorFieldEntry; + } +#endif + +#if defined(FFX_FRAMEINTERPOLATION_BIND_UAV_OPTICAL_FLOW_MOTION_VECTOR_FIELD_X) && \ + defined(FFX_FRAMEINTERPOLATION_BIND_UAV_OPTICAL_FLOW_MOTION_VECTOR_FIELD_Y) + + layout(set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_UAV_OPTICAL_FLOW_MOTION_VECTOR_FIELD_X, r32ui) uniform uimage2D rw_optical_flow_motion_vector_field_x; + layout(set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_UAV_OPTICAL_FLOW_MOTION_VECTOR_FIELD_Y, r32ui) uniform uimage2D rw_optical_flow_motion_vector_field_y; + + FfxUInt32 RWLoadOpticalflowMotionVectorFieldX(FFX_PARAMETER_IN FfxInt32x2 iPxPos) + { + return imageLoad(rw_optical_flow_motion_vector_field_x, iPxPos).x; + } + void StoreOpticalflowMotionVectorFieldX(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxUInt32 val) + { + imageStore(rw_optical_flow_motion_vector_field_x, iPxPos, FfxUInt32x4(val, 0, 0, 0)); + } + FfxUInt32 RWLoadOpticalflowMotionVectorFieldY(FFX_PARAMETER_IN FfxInt32x2 iPxPos) + { + return imageLoad(rw_optical_flow_motion_vector_field_y, iPxPos).x; + } + void StoreOpticalflowMotionVectorFieldY(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxUInt32 val) + { + imageStore(rw_optical_flow_motion_vector_field_y, iPxPos, FfxUInt32x4(val, 0, 0, 0)); + } + void UpdateOpticalflowMotionVectorField(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxUInt32x2 packedVector) + { + imageAtomicMax(rw_optical_flow_motion_vector_field_x, iPxPos, packedVector.x); + imageAtomicMax(rw_optical_flow_motion_vector_field_y, iPxPos, packedVector.y); + } +#endif + +#ifdef FFX_FRAMEINTERPOLATION_BIND_UAV_COUNTERS + layout(set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_UAV_COUNTERS) coherent buffer FrameInterpolationRWCounters_t + { + FfxUInt32 data[]; + } rw_counters; + + FfxUInt32 RWLoadCounter(FFX_PARAMETER_IN FfxInt32 iPxPos) + { + return rw_counters.data[iPxPos]; + } + + void StoreCounter(FFX_PARAMETER_IN FfxInt32 iPxPos, FFX_PARAMETER_IN FfxUInt32 counter) + { + rw_counters.data[iPxPos] = counter; + } + void AtomicIncreaseCounter(FFX_PARAMETER_IN FfxInt32 iPxPos, FFX_PARAMETER_OUT FfxUInt32 oldVal) + { + oldVal = atomicAdd(rw_counters.data[iPxPos], 1); + } +#endif + + +#if defined(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_0) && \ + defined(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_1) && \ + defined(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_2) && \ + defined(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_3) && \ + defined(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_4) && \ + defined(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_5) && \ + defined(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_6) && \ + defined(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_7) && \ + defined(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_8) && \ + defined(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_9) && \ + defined(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_10) && \ + defined(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_11) && \ + defined(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_12) + + layout(set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_0, rgba16f) uniform image2D rw_inpainting_pyramid0; + layout(set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_1, rgba16f) uniform image2D rw_inpainting_pyramid1; + layout(set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_2, rgba16f) uniform image2D rw_inpainting_pyramid2; + layout(set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_3, rgba16f) uniform image2D rw_inpainting_pyramid3; + layout(set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_4, rgba16f) uniform image2D rw_inpainting_pyramid4; + layout(set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_5, rgba16f) coherent uniform image2D rw_inpainting_pyramid5; + layout(set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_6, rgba16f) uniform image2D rw_inpainting_pyramid6; + layout(set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_7, rgba16f) uniform image2D rw_inpainting_pyramid7; + layout(set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_8, rgba16f) uniform image2D rw_inpainting_pyramid8; + layout(set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_9, rgba16f) uniform image2D rw_inpainting_pyramid9; + layout(set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_10, rgba16f) uniform image2D rw_inpainting_pyramid10; + layout(set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_11, rgba16f) uniform image2D rw_inpainting_pyramid11; + layout(set = 0, binding = FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_12, rgba16f) uniform image2D rw_inpainting_pyramid12; + + + FfxFloat32x4 RWLoadInpaintingPyramid(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxUInt32 index) + { + #define LOAD(idx) \ + if (index == idx) \ + { \ + return imageLoad(rw_inpainting_pyramid##idx, iPxPos); \ + } + LOAD(0); + LOAD(1); + LOAD(2); + LOAD(3); + LOAD(4); + LOAD(5); + LOAD(6); + LOAD(7); + LOAD(8); + LOAD(9); + LOAD(10); + LOAD(11); + LOAD(12); + return FfxFloat32x4(0.0, 0.0, 0.0, 0.0); + + #undef LOAD + } + + void StoreInpaintingPyramid(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x4 outValue, FFX_PARAMETER_IN FfxUInt32 index) + { + #define STORE(idx) \ + if (index == idx) \ + { \ + imageStore(rw_inpainting_pyramid##idx, iPxPos, outValue); \ + } + + STORE(0); + STORE(1); + STORE(2); + STORE(3); + STORE(4); + STORE(5); + STORE(6); + STORE(7); + STORE(8); + STORE(9); + STORE(10); + STORE(11); + STORE(12); + + #undef STORE + } +#endif diff --git a/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_callbacks_hlsl.h b/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_callbacks_hlsl.h new file mode 100644 index 000000000000..a58743d2f60c --- /dev/null +++ b/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_callbacks_hlsl.h @@ -0,0 +1,814 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "ffx_frameinterpolation_resources.h" + +#if defined(FFX_GPU) +#ifdef __hlsl_dx_compiler +#pragma dxc diagnostic push +#pragma dxc diagnostic ignored "-Wambig-lit-shift" +#endif //__hlsl_dx_compiler +#include "ffx_core.h" +#ifdef __hlsl_dx_compiler +#pragma dxc diagnostic pop +#endif //__hlsl_dx_compiler +#endif // #if defined(FFX_GPU) + +#if defined(FFX_GPU) + +#define COUNTER_SPD 0 +#define COUNTER_FRAME_INDEX_SINCE_LAST_RESET 1 + + /////////////////////////////////////////////// + // declare CBs and CB accessors +/////////////////////////////////////////////// +#if defined(FFX_FRAMEINTERPOLATION_BIND_CB_FRAMEINTERPOLATION) + cbuffer cbFI : FFX_DECLARE_CB(FFX_FRAMEINTERPOLATION_BIND_CB_FRAMEINTERPOLATION) + { + FfxInt32x2 renderSize; + FfxInt32x2 displaySize; + + FfxFloat32x2 displaySizeRcp; + FfxFloat32 cameraNear; + FfxFloat32 cameraFar; + + FfxInt32x2 upscalerTargetSize; + FfxInt32 Mode; + FfxInt32 reset; + + FfxFloat32x4 fDeviceToViewDepth; + + FfxFloat32 deltaTime; + FfxInt32 HUDLessAttachedFactor; + FfxInt32x2 distortionFieldSize; + + FfxFloat32x2 opticalFlowScale; + FfxInt32 opticalFlowBlockSize; + FfxUInt32 dispatchFlags; + + FfxInt32x2 maxRenderSize; + FfxInt32 opticalFlowHalfResMode; + FfxInt32 NumInstances; + + FfxInt32x2 interpolationRectBase; + FfxInt32x2 interpolationRectSize; + + FfxFloat32x3 debugBarColor; + FfxUInt32 backBufferTransferFunction; + + FfxFloat32x2 minMaxLuminance; + FfxFloat32 fTanHalfFOV; + FfxInt32 _pad1; + + FfxFloat32x2 fJitter; + FfxFloat32x2 fMotionVectorScale; + } + + const FfxFloat32x2 Jitter() + { + return fJitter; + } + + const FfxFloat32x2 MotionVectorScale() + { + return fMotionVectorScale; + } + + const FfxInt32x2 InterpolationRectBase() + { + return interpolationRectBase; + } + + const FfxInt32x2 InterpolationRectSize() + { + return interpolationRectSize; + } + + const FfxInt32x2 RenderSize() + { + return renderSize; + } + + const FfxInt32x2 DisplaySize() + { + return displaySize; + } + + const FfxBoolean Reset() + { + return reset == 1; + } + + FfxFloat32x4 DeviceToViewSpaceTransformFactors() + { + return fDeviceToViewDepth; + } + + FfxInt32x2 GetOpticalFlowSize() + { + FfxInt32x2 iOpticalFlowSize = (1.0f / opticalFlowScale) / FfxFloat32x2(opticalFlowBlockSize.xx); + + return iOpticalFlowSize; + } + + FfxInt32x2 GetOpticalFlowSize2() + { + return GetOpticalFlowSize() * 1; + } + + FfxFloat32x2 GetOpticalFlowScale() + { + return opticalFlowScale; + } + + FfxInt32 GetOpticalFlowBlockSize() + { + return opticalFlowBlockSize; + } + + FfxInt32 GetHUDLessAttachedFactor() + { + return HUDLessAttachedFactor; + } + + FfxInt32x2 GetDistortionFieldSize() + { + return distortionFieldSize; + } + + FfxUInt32 GetDispatchFlags() + { + return dispatchFlags; + } + + FfxInt32x2 GetMaxRenderSize() + { + return maxRenderSize; + } + + FfxInt32 GetOpticalFlowHalfResMode() + { + return opticalFlowHalfResMode; + } + + FfxFloat32x3 GetDebugBarColor() + { + return debugBarColor; + } + + FfxFloat32 TanHalfFoV() + { + return fTanHalfFOV; + } + + FfxUInt32 BackBufferTransferFunction() + { + return backBufferTransferFunction; + } + + FfxFloat32 MinLuminance() + { + return minMaxLuminance[0]; + } + + FfxFloat32 MaxLuminance() + { + return minMaxLuminance[1]; + } + +#endif // #if defined(FFX_FRAMEINTERPOLATION_BIND_CB_FRAMEINTERPOLATION) + +#if defined(FFX_FRAMEINTERPOLATION_BIND_CB_INPAINTING_PYRAMID) + cbuffer cbInpaintingPyramid : FFX_DECLARE_CB(FFX_FRAMEINTERPOLATION_BIND_CB_INPAINTING_PYRAMID) + { + FfxUInt32 mips; + FfxUInt32 numWorkGroups; + FfxUInt32x2 workGroupOffset; + } + + FfxUInt32 NumMips() + { + return mips; + } + FfxUInt32 NumWorkGroups() + { + return numWorkGroups; + } + FfxUInt32x2 WorkGroupOffset() + { + return workGroupOffset; + } +#endif // #if defined(FFX_FRAMEINTERPOLATION_BIND_CB_INPAINTING_PYRAMID) + +#define FFX_FRAMEINTERPOLATION_ROOTSIG_STRINGIFY(p) FFX_FRAMEINTERPOLATION_ROOTSIG_STR(p) +#define FFX_FRAMEINTERPOLATION_ROOTSIG_STR(p) #p +#define FFX_FRAMEINTERPOLATION_ROOTSIG [RootSignature( "DescriptorTable(UAV(u0, numDescriptors = " FFX_FRAMEINTERPOLATION_ROOTSIG_STRINGIFY(FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_COUNT) ")), " \ + "DescriptorTable(SRV(t0, numDescriptors = " FFX_FRAMEINTERPOLATION_ROOTSIG_STRINGIFY(FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_COUNT) ")), " \ + "CBV(b0), " \ + "StaticSampler(s0, filter = FILTER_MIN_MAG_MIP_LINEAR, " \ + "addressU = TEXTURE_ADDRESS_CLAMP, " \ + "addressV = TEXTURE_ADDRESS_CLAMP, " \ + "addressW = TEXTURE_ADDRESS_CLAMP, " \ + "comparisonFunc = COMPARISON_NEVER, " \ + "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK)" )] + +#define FFX_FRAMEINTERPOLATION_INPAINTING_ROOTSIG [RootSignature( "DescriptorTable(UAV(u0, numDescriptors = " FFX_FRAMEINTERPOLATION_ROOTSIG_STRINGIFY(FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_COUNT) ")), " \ + "DescriptorTable(SRV(t0, numDescriptors = " FFX_FRAMEINTERPOLATION_ROOTSIG_STRINGIFY(FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_COUNT) ")), " \ + "CBV(b0), " \ + "CBV(b1), " \ + "StaticSampler(s0, filter = FILTER_MIN_MAG_MIP_LINEAR, " \ + "addressU = TEXTURE_ADDRESS_CLAMP, " \ + "addressV = TEXTURE_ADDRESS_CLAMP, " \ + "addressW = TEXTURE_ADDRESS_CLAMP, " \ + "comparisonFunc = COMPARISON_NEVER, " \ + "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK)" )] + +#if defined(FFX_FRAMEINTERPOLATION_EMBED_ROOTSIG) +#define FFX_FRAMEINTERPOLATION_EMBED_ROOTSIG_CONTENT FFX_FRAMEINTERPOLATION_ROOTSIG +#define FFX_FRAMEINTERPOLATION_EMBED_INPAINTING_ROOTSIG_CONTENT FFX_FRAMEINTERPOLATION_INPAINTING_ROOTSIG +#else +#define FFX_FRAMEINTERPOLATION_EMBED_ROOTSIG_CONTENT +#define FFX_FRAMEINTERPOLATION_EMBED_INPAINTING_ROOTSIG_CONTENT +#endif // #if FFX_FRAMEINTERPOLATION_EMBED_ROOTSIG + +/////////////////////////////////////////////// +// declare samplers +/////////////////////////////////////////////// + +SamplerState s_LinearClamp : register(s0); + +/////////////////////////////////////////////// +// declare SRVs and SRV accessors +/////////////////////////////////////////////// + +#ifdef FFX_FRAMEINTERPOLATION_BIND_SRV_PREVIOUS_INTERPOLATION_SOURCE + Texture2D r_previous_interpolation_source : FFX_DECLARE_SRV(FFX_FRAMEINTERPOLATION_BIND_SRV_PREVIOUS_INTERPOLATION_SOURCE); + + FfxFloat32x3 LoadPreviousBackbuffer(FFX_PARAMETER_IN FfxInt32x2 iPxPos) + { + return r_previous_interpolation_source[iPxPos].rgb; + } + FfxFloat32x3 SamplePreviousBackbuffer(FFX_PARAMETER_IN FfxFloat32x2 fUv) + { + return r_previous_interpolation_source.SampleLevel(s_LinearClamp, fUv, 0).xyz; + } +#endif + +#ifdef FFX_FRAMEINTERPOLATION_BIND_SRV_CURRENT_INTERPOLATION_SOURCE + Texture2D r_current_interpolation_source : FFX_DECLARE_SRV(FFX_FRAMEINTERPOLATION_BIND_SRV_CURRENT_INTERPOLATION_SOURCE); + + FfxFloat32x3 LoadCurrentBackbuffer(FFX_PARAMETER_IN FfxInt32x2 iPxPos) + { + return r_current_interpolation_source[iPxPos].rgb; + } + FfxFloat32x3 SampleCurrentBackbuffer(FFX_PARAMETER_IN FfxFloat32x2 fUv) + { + return r_current_interpolation_source.SampleLevel(s_LinearClamp, fUv, 0).xyz; + } +#endif + +#ifdef FFX_FRAMEINTERPOLATION_BIND_SRV_DILATED_MOTION_VECTORS + Texture2D r_dilated_motion_vectors : FFX_DECLARE_SRV(FFX_FRAMEINTERPOLATION_BIND_SRV_DILATED_MOTION_VECTORS); + + FfxFloat32x2 LoadDilatedMotionVector(FFX_PARAMETER_IN FfxInt32x2 iPxPos) + { + return r_dilated_motion_vectors[iPxPos].xy; + } +#endif + +#ifdef FFX_FRAMEINTERPOLATION_BIND_SRV_DILATED_DEPTH + Texture2D r_dilated_depth : FFX_DECLARE_SRV(FFX_FRAMEINTERPOLATION_BIND_SRV_DILATED_DEPTH); + + FfxFloat32 LoadDilatedDepth(FFX_PARAMETER_IN FfxInt32x2 iPxPos) + { + return r_dilated_depth[iPxPos].x; + } +#endif + +#ifdef FFX_FRAMEINTERPOLATION_BIND_SRV_RECONSTRUCTED_DEPTH_PREVIOUS_FRAME + Texture2D r_reconstructed_depth_previous_frame : FFX_DECLARE_SRV(FFX_FRAMEINTERPOLATION_BIND_SRV_RECONSTRUCTED_DEPTH_PREVIOUS_FRAME); + + FfxFloat32 LoadReconstructedDepthPreviousFrame(FFX_PARAMETER_IN FfxInt32x2 iPxInput) + { + return asfloat(r_reconstructed_depth_previous_frame[iPxInput]); + } +#endif + +#ifdef FFX_FRAMEINTERPOLATION_BIND_SRV_RECONSTRUCTED_DEPTH_INTERPOLATED_FRAME + Texture2D r_reconstructed_depth_interpolated_frame : FFX_DECLARE_SRV(FFX_FRAMEINTERPOLATION_BIND_SRV_RECONSTRUCTED_DEPTH_INTERPOLATED_FRAME); + + FfxFloat32 LoadEstimatedInterpolationFrameDepth(FFX_PARAMETER_IN FfxInt32x2 iPxInput) + { + return asfloat(r_reconstructed_depth_interpolated_frame[iPxInput]); + } +#endif + +#ifdef FFX_FRAMEINTERPOLATION_BIND_SRV_DISOCCLUSION_MASK + Texture2D r_disocclusion_mask : FFX_DECLARE_SRV(FFX_FRAMEINTERPOLATION_BIND_SRV_DISOCCLUSION_MASK); + + FfxFloat32x4 LoadDisocclusionMask(FFX_PARAMETER_IN FfxInt32x2 iPxPos) + { + return r_disocclusion_mask[iPxPos]; + } + FfxFloat32x4 SampleDisocclusionMask(FFX_PARAMETER_IN FfxFloat32x2 fUv) + { + return r_disocclusion_mask.SampleLevel(s_LinearClamp, fUv, 0); + } +#endif + +#if defined(FFX_FRAMEINTERPOLATION_BIND_SRV_GAME_MOTION_VECTOR_FIELD_X) && \ + defined(FFX_FRAMEINTERPOLATION_BIND_SRV_GAME_MOTION_VECTOR_FIELD_Y) + Texture2D r_game_motion_vector_field_x : FFX_DECLARE_SRV(FFX_FRAMEINTERPOLATION_BIND_SRV_GAME_MOTION_VECTOR_FIELD_X); + Texture2D r_game_motion_vector_field_y : FFX_DECLARE_SRV(FFX_FRAMEINTERPOLATION_BIND_SRV_GAME_MOTION_VECTOR_FIELD_Y); + + FfxUInt32x2 LoadGameFieldMv(FFX_PARAMETER_IN FfxInt32x2 iPxSample) + { + FfxUInt32 packedX = r_game_motion_vector_field_x[iPxSample]; + FfxUInt32 packedY = r_game_motion_vector_field_y[iPxSample]; + + return FfxUInt32x2(packedX, packedY); + } +#endif + +#if defined(FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW_MOTION_VECTOR_FIELD_X) && \ + defined(FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW_MOTION_VECTOR_FIELD_Y) + Texture2D r_optical_flow_motion_vector_field_x : FFX_DECLARE_SRV(FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW_MOTION_VECTOR_FIELD_X); + Texture2D r_optical_flow_motion_vector_field_y : FFX_DECLARE_SRV(FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW_MOTION_VECTOR_FIELD_Y); + + FfxUInt32x2 LoadOpticalFlowFieldMv(FFX_PARAMETER_IN FfxInt32x2 iPxSample) + { + FfxUInt32 packedX = r_optical_flow_motion_vector_field_x[iPxSample]; + FfxUInt32 packedY = r_optical_flow_motion_vector_field_y[iPxSample]; + + return FfxUInt32x2(packedX, packedY); + } +#endif + +#ifdef FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW + Texture2D r_optical_flow : FFX_DECLARE_SRV(FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW); + + #if defined(FFX_FRAMEINTERPOLATION_BIND_CB_FRAMEINTERPOLATION) + FfxFloat32x2 LoadOpticalFlow(FFX_PARAMETER_IN FfxInt32x2 iPxPos) + { + return r_optical_flow[iPxPos] * GetOpticalFlowScale(); + } + #endif +#endif + +#ifdef FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW_UPSAMPLED + Texture2D r_optical_flow_upsampled : FFX_DECLARE_SRV(FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW_UPSAMPLED); + + FfxFloat32x2 LoadOpticalFlowUpsampled(FFX_PARAMETER_IN FfxInt32x2 iPxPos) + { + return r_optical_flow_upsampled[iPxPos]; + } +#endif + +#ifdef FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW_CONFIDENCE + Texture2D r_optical_flow_confidence : FFX_DECLARE_SRV(FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW_CONFIDENCE); + + FfxFloat32 LoadOpticalFlowConfidence(FFX_PARAMETER_IN FfxInt32x2 iPxPos) + { + return r_optical_flow_confidence[iPxPos].y; + } +#endif + +#ifdef FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW_GLOBAL_MOTION + Texture2D r_optical_flow_global_motion : FFX_DECLARE_SRV(FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW_GLOBAL_MOTION); + + FfxUInt32 LoadOpticalFlowGlobalMotion(FFX_PARAMETER_IN FfxInt32x2 iPxPos) + { + return r_optical_flow_global_motion[iPxPos]; + } +#endif + +#ifdef FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW_SCENE_CHANGE_DETECTION + Texture2D r_optical_flow_scd : FFX_DECLARE_SRV(FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW_SCENE_CHANGE_DETECTION); + + FfxUInt32 LoadOpticalFlowSceneChangeDetection(FFX_PARAMETER_IN FfxInt32x2 iPxPos) + { + return r_optical_flow_scd[iPxPos]; + } + + FfxBoolean HasSceneChanged() + { + #define SCD_OUTPUT_HISTORY_BITS_SLOT 1 + //if (FrameIndex() <= 5) // threshold according to original OpenCL code + //{ + // return 1.0; + //} + //else + { + // Report that the scene is changed if the change was detected in any of the + // 4 previous frames (0xfu - covers 4 history bits). + return (r_optical_flow_scd[FfxInt32x2(SCD_OUTPUT_HISTORY_BITS_SLOT, 0)] & 0xfu) != 0; + } + } +#endif + +#ifdef FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW_DEBUG + Texture2D r_optical_flow_debug : FFX_DECLARE_SRV(FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW_DEBUG); + + FfxFloat32x4 LoadOpticalFlowDebug(FFX_PARAMETER_IN FfxInt32x2 iPxPos) + { + return r_optical_flow_debug[iPxPos]; + } +#endif + +#if defined(FFX_FRAMEINTERPOLATION_BIND_SRV_INPAINTING_MASK) && defined(FFX_FRAMEINTERPOLATION_BIND_SRV_OUTPUT) + Texture2D r_output : FFX_DECLARE_SRV(FFX_FRAMEINTERPOLATION_BIND_SRV_OUTPUT); + Texture2D r_inpainting_mask : FFX_DECLARE_SRV(FFX_FRAMEINTERPOLATION_BIND_SRV_INPAINTING_MASK); + + FfxFloat32x4 LoadFrameInterpolationOutput(FFX_PARAMETER_IN FfxInt32x2 iPxInput) + { + return FfxFloat32x4(r_output[iPxInput], r_inpainting_mask[iPxInput]); + } +#elif defined(FFX_FRAMEINTERPOLATION_BIND_SRV_OUTPUT) + Texture2D r_output : FFX_DECLARE_SRV(FFX_FRAMEINTERPOLATION_BIND_SRV_OUTPUT); + FfxFloat32x4 LoadFrameInterpolationOutput(FFX_PARAMETER_IN FfxInt32x2 iPxInput) + { + return r_output[iPxInput]; + } +#endif + +#ifdef FFX_FRAMEINTERPOLATION_BIND_SRV_INPAINTING_PYRAMID + Texture2D r_inpainting_pyramid : FFX_DECLARE_SRV(FFX_FRAMEINTERPOLATION_BIND_SRV_INPAINTING_PYRAMID); + + FfxFloat32x4 LoadInpaintingPyramid(FFX_PARAMETER_IN FfxInt32 mipLevel, FFX_PARAMETER_IN FfxUInt32x2 iPxInput) + { + return r_inpainting_pyramid.mips[mipLevel][iPxInput]; + } +#endif + +#ifdef FFX_FRAMEINTERPOLATION_BIND_SRV_PRESENT_BACKBUFFER + Texture2D r_present_backbuffer : FFX_DECLARE_SRV(FFX_FRAMEINTERPOLATION_BIND_SRV_PRESENT_BACKBUFFER); + + FfxFloat32x4 LoadPresentBackbuffer(FFX_PARAMETER_IN FfxInt32x2 iPxInput) + { + return r_present_backbuffer[iPxInput]; + } + FfxFloat32x4 SamplePresentBackbuffer(FFX_PARAMETER_IN FfxFloat32x2 fUv) + { + return r_present_backbuffer.SampleLevel(s_LinearClamp, fUv, 0); + } +#endif + +#ifdef FFX_FRAMEINTERPOLATION_BIND_SRV_COUNTERS + StructuredBuffer r_counters : FFX_DECLARE_SRV(FFX_FRAMEINTERPOLATION_BIND_SRV_COUNTERS); + + FfxUInt32 LoadCounter(FFX_PARAMETER_IN FfxInt32 iPxPos) + { + return r_counters[iPxPos]; + } + + const FfxUInt32 FrameIndexSinceLastReset() + { + return LoadCounter(COUNTER_FRAME_INDEX_SINCE_LAST_RESET); + } +#endif + +#if defined(FFX_FRAMEINTERPOLATION_BIND_SRV_INPUT_DEPTH) +Texture2D r_input_depth : FFX_DECLARE_SRV(FFX_FRAMEINTERPOLATION_BIND_SRV_INPUT_DEPTH); +FfxFloat32 LoadInputDepth(FfxInt32x2 iPxPos) +{ + return r_input_depth[iPxPos]; +} +#endif + +#if defined(FFX_FRAMEINTERPOLATION_BIND_SRV_INPUT_MOTION_VECTORS) +Texture2D r_input_motion_vectors : FFX_DECLARE_SRV(FFX_FRAMEINTERPOLATION_BIND_SRV_INPUT_MOTION_VECTORS); +FfxFloat32x2 LoadInputMotionVector(FfxInt32x2 iPxDilatedMotionVectorPos) +{ + FfxFloat32x2 fSrcMotionVector = r_input_motion_vectors[iPxDilatedMotionVectorPos].xy; + + FfxFloat32x2 fUvMotionVector = fSrcMotionVector * MotionVectorScale(); + +#if FFX_FRAMEINTERPOLATION_OPTION_JITTERED_MOTION_VECTORS + fUvMotionVector -= MotionVectorJitterCancellation(); +#endif + + return fUvMotionVector; +} +#endif + +#if defined(FFX_FRAMEINTERPOLATION_BIND_SRV_DISTORTION_FIELD) + Texture2D r_input_distortion_field : FFX_DECLARE_SRV(FFX_FRAMEINTERPOLATION_BIND_SRV_DISTORTION_FIELD); + FfxFloat32x2 SampleDistortionField(FFX_PARAMETER_IN FfxFloat32x2 fUv) + { + return r_input_distortion_field.SampleLevel(s_LinearClamp, fUv, 0); + } +#endif + +/////////////////////////////////////////////// +// declare UAVs and UAV accessors +/////////////////////////////////////////////// +#if defined(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_MASK) && defined(FFX_FRAMEINTERPOLATION_BIND_UAV_OUTPUT) + RWTexture2D rw_output : FFX_DECLARE_UAV(FFX_FRAMEINTERPOLATION_BIND_UAV_OUTPUT); + RWTexture2D rw_inpainting_mask : FFX_DECLARE_UAV(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_MASK); + + FfxFloat32x4 RWLoadFrameinterpolationOutput(FFX_PARAMETER_IN FfxInt32x2 iPxPos) + { + return FfxFloat32x4(rw_output[iPxPos], rw_inpainting_mask[iPxPos]); + } + + void StoreFrameinterpolationOutput(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x4 val) + { + rw_output[iPxPos] = val.rgb; + rw_inpainting_mask[iPxPos] = val.a; + } + +#elif defined(FFX_FRAMEINTERPOLATION_BIND_UAV_OUTPUT) + RWTexture2D rw_output : FFX_DECLARE_UAV(FFX_FRAMEINTERPOLATION_BIND_UAV_OUTPUT); + + FfxFloat32x4 RWLoadFrameinterpolationOutput(FFX_PARAMETER_IN FfxInt32x2 iPxPos) + { + return rw_output[iPxPos]; + } + + void StoreFrameinterpolationOutput(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x4 val) + { + rw_output[iPxPos] = val; + } +#endif + +#ifdef FFX_FRAMEINTERPOLATION_BIND_UAV_DILATED_MOTION_VECTORS + RWTexture2D rw_dilated_motion_vectors : FFX_DECLARE_UAV(FFX_FRAMEINTERPOLATION_BIND_UAV_DILATED_MOTION_VECTORS); + + FfxFloat32x2 RWLoadDilatedMotionVectors(FFX_PARAMETER_IN FfxInt32x2 iPxPos) + { + return rw_dilated_motion_vectors[iPxPos]; + } + + void StoreDilatedMotionVectors(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x2 val) + { + rw_dilated_motion_vectors[iPxPos] = val; + } +#endif + +#ifdef FFX_FRAMEINTERPOLATION_BIND_UAV_DILATED_DEPTH + RWTexture2D rw_dilated_depth : FFX_DECLARE_UAV(FFX_FRAMEINTERPOLATION_BIND_UAV_DILATED_DEPTH); + + FfxFloat32 RWLoadDilatedDepth(FFX_PARAMETER_IN FfxInt32x2 iPxPos) + { + return rw_dilated_depth[iPxPos]; + } + + void StoreDilatedDepth(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32 val) + { + rw_dilated_depth[iPxPos] = val; + } +#endif + +#ifdef FFX_FRAMEINTERPOLATION_BIND_UAV_RECONSTRUCTED_DEPTH_PREVIOUS_FRAME + RWTexture2D rw_reconstructed_depth_previous_frame : FFX_DECLARE_UAV(FFX_FRAMEINTERPOLATION_BIND_UAV_RECONSTRUCTED_DEPTH_PREVIOUS_FRAME); + + FfxFloat32 RWLoadReconstructedDepthPreviousFrame(FFX_PARAMETER_IN FfxInt32x2 iPxPos) + { + return ffxAsFloat(rw_reconstructed_depth_previous_frame[iPxPos]); + } + + void UpdateReconstructedDepthPreviousFrame(FfxInt32x2 iPxSample, FfxFloat32 fDepth) + { + FfxUInt32 uDepth = ffxAsUInt32(fDepth); + +#if FFX_FRAMEINTERPOLATION_OPTION_INVERTED_DEPTH + InterlockedMax(rw_reconstructed_depth_previous_frame[iPxSample], uDepth); +#else + InterlockedMin(rw_reconstructed_depth_previous_frame[iPxSample], uDepth); // min for standard, max for inverted depth +#endif + } +#endif + +#ifdef FFX_FRAMEINTERPOLATION_BIND_UAV_RECONSTRUCTED_DEPTH_INTERPOLATED_FRAME + RWTexture2D rw_reconstructed_depth_interpolated_frame : FFX_DECLARE_UAV(FFX_FRAMEINTERPOLATION_BIND_UAV_RECONSTRUCTED_DEPTH_INTERPOLATED_FRAME); + + FfxFloat32 RWLoadReconstructedDepthInterpolatedFrame(FFX_PARAMETER_IN FfxInt32x2 iPxPos) + { + return ffxAsFloat(rw_reconstructed_depth_interpolated_frame[iPxPos]); + } + + void StoreReconstructedDepthInterpolatedFrame(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32 value) + { + FfxUInt32 uDepth = ffxAsUInt32(value); + rw_reconstructed_depth_interpolated_frame[iPxPos] = uDepth; + } + + void UpdateReconstructedDepthInterpolatedFrame(FfxInt32x2 iPxSample, FfxFloat32 fDepth) + { + FfxUInt32 uDepth = ffxAsUInt32(fDepth); + +#if FFX_FRAMEINTERPOLATION_OPTION_INVERTED_DEPTH + InterlockedMax(rw_reconstructed_depth_interpolated_frame[iPxSample], uDepth); +#else + InterlockedMin(rw_reconstructed_depth_interpolated_frame[iPxSample], uDepth); // min for standard, max for inverted depth +#endif + } +#endif + +#ifdef FFX_FRAMEINTERPOLATION_BIND_UAV_DISOCCLUSION_MASK + RWTexture2D rw_disocclusion_mask : FFX_DECLARE_UAV(FFX_FRAMEINTERPOLATION_BIND_UAV_DISOCCLUSION_MASK); + + FfxFloat32x2 RWLoadDisocclusionMask(FFX_PARAMETER_IN FfxInt32x2 iPxPos) + { + return rw_disocclusion_mask[iPxPos]; + } + + void StoreDisocclusionMask(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x2 val) + { + rw_disocclusion_mask[iPxPos] = val; + } +#endif + +#if defined(FFX_FRAMEINTERPOLATION_BIND_UAV_GAME_MOTION_VECTOR_FIELD_X) && \ + defined(FFX_FRAMEINTERPOLATION_BIND_UAV_GAME_MOTION_VECTOR_FIELD_Y) + + RWTexture2D rw_game_motion_vector_field_x : FFX_DECLARE_UAV(FFX_FRAMEINTERPOLATION_BIND_UAV_GAME_MOTION_VECTOR_FIELD_X); + RWTexture2D rw_game_motion_vector_field_y : FFX_DECLARE_UAV(FFX_FRAMEINTERPOLATION_BIND_UAV_GAME_MOTION_VECTOR_FIELD_Y); + + FfxUInt32 RWLoadGameMotionVectorFieldX(FFX_PARAMETER_IN FfxInt32x2 iPxPos) + { + return rw_game_motion_vector_field_x[iPxPos]; + } + + void StoreGameMotionVectorFieldX(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxUInt32 val) + { + rw_game_motion_vector_field_x[iPxPos] = val; + } + + FfxUInt32 RWLoadGameMotionVectorFieldY(FFX_PARAMETER_IN FfxInt32x2 iPxPos) + { + return rw_game_motion_vector_field_y[iPxPos]; + } + + void StoreGameMotionVectorFieldY(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxUInt32 val) + { + rw_game_motion_vector_field_y[iPxPos] = val; + } + + void UpdateGameMotionVectorField(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxUInt32x2 packedVector) + { + InterlockedMax(rw_game_motion_vector_field_x[iPxPos], packedVector.x); + InterlockedMax(rw_game_motion_vector_field_y[iPxPos], packedVector.y); + } + + FfxUInt32 UpdateGameMotionVectorFieldEx(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxUInt32x2 packedVector) + { + FfxUInt32 uPreviousValueX = 0; + FfxUInt32 uPreviousValueY = 0; + InterlockedMax(rw_game_motion_vector_field_x[iPxPos], packedVector.x, uPreviousValueX); + InterlockedMax(rw_game_motion_vector_field_y[iPxPos], packedVector.y, uPreviousValueY); + + const FfxUInt32 uExistingVectorFieldEntry = ffxMax(uPreviousValueX, uPreviousValueY); + + return uExistingVectorFieldEntry; + } +#endif + +#if defined(FFX_FRAMEINTERPOLATION_BIND_UAV_OPTICAL_FLOW_MOTION_VECTOR_FIELD_X) && \ + defined(FFX_FRAMEINTERPOLATION_BIND_UAV_OPTICAL_FLOW_MOTION_VECTOR_FIELD_Y) + + RWTexture2D rw_optical_flow_motion_vector_field_x : FFX_DECLARE_UAV(FFX_FRAMEINTERPOLATION_BIND_UAV_OPTICAL_FLOW_MOTION_VECTOR_FIELD_X); + RWTexture2D rw_optical_flow_motion_vector_field_y : FFX_DECLARE_UAV(FFX_FRAMEINTERPOLATION_BIND_UAV_OPTICAL_FLOW_MOTION_VECTOR_FIELD_Y); + + FfxUInt32 RWLoadOpticalflowMotionVectorFieldX(FFX_PARAMETER_IN FfxInt32x2 iPxPos) + { + return rw_optical_flow_motion_vector_field_x[iPxPos]; + } + void StoreOpticalflowMotionVectorFieldX(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxUInt32 val) + { + rw_optical_flow_motion_vector_field_x[iPxPos] = val; + } + FfxUInt32 RWLoadOpticalflowMotionVectorFieldY(FFX_PARAMETER_IN FfxInt32x2 iPxPos) + { + return rw_optical_flow_motion_vector_field_y[iPxPos]; + } + void StoreOpticalflowMotionVectorFieldY(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxUInt32 val) + { + rw_optical_flow_motion_vector_field_y[iPxPos] = val; + } + void UpdateOpticalflowMotionVectorField(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxUInt32x2 packedVector) + { + InterlockedMax(rw_optical_flow_motion_vector_field_x[iPxPos], packedVector.x); + InterlockedMax(rw_optical_flow_motion_vector_field_y[iPxPos], packedVector.y); + } +#endif + +#ifdef FFX_FRAMEINTERPOLATION_BIND_UAV_COUNTERS + globallycoherent RWStructuredBuffer rw_counters : FFX_DECLARE_UAV(FFX_FRAMEINTERPOLATION_BIND_UAV_COUNTERS); + + FfxUInt32 RWLoadCounter(FFX_PARAMETER_IN FfxInt32 iPxPos) + { + return rw_counters[iPxPos]; + } + + void StoreCounter(FFX_PARAMETER_IN FfxInt32 iPxPos, FFX_PARAMETER_IN FfxUInt32 counter) + { + rw_counters[iPxPos] = counter; + } + void AtomicIncreaseCounter(FFX_PARAMETER_IN FfxInt32 iPxPos, FFX_PARAMETER_OUT FfxUInt32 oldVal) + { + InterlockedAdd(rw_counters[iPxPos], 1, oldVal); + } +#endif + + +#if defined(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_0) && \ + defined(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_1) && \ + defined(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_2) && \ + defined(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_3) && \ + defined(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_4) && \ + defined(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_5) && \ + defined(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_6) && \ + defined(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_7) && \ + defined(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_8) && \ + defined(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_9) && \ + defined(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_10) && \ + defined(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_11) && \ + defined(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_12) + + RWTexture2D rw_inpainting_pyramid0 : FFX_DECLARE_UAV(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_0); + RWTexture2D rw_inpainting_pyramid1 : FFX_DECLARE_UAV(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_1); + RWTexture2D rw_inpainting_pyramid2 : FFX_DECLARE_UAV(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_2); + RWTexture2D rw_inpainting_pyramid3 : FFX_DECLARE_UAV(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_3); + RWTexture2D rw_inpainting_pyramid4 : FFX_DECLARE_UAV(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_4); + globallycoherent RWTexture2D rw_inpainting_pyramid5 : FFX_DECLARE_UAV(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_5); + RWTexture2D rw_inpainting_pyramid6 : FFX_DECLARE_UAV(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_6); + RWTexture2D rw_inpainting_pyramid7 : FFX_DECLARE_UAV(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_7); + RWTexture2D rw_inpainting_pyramid8 : FFX_DECLARE_UAV(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_8); + RWTexture2D rw_inpainting_pyramid9 : FFX_DECLARE_UAV(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_9); + RWTexture2D rw_inpainting_pyramid10 : FFX_DECLARE_UAV(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_10); + RWTexture2D rw_inpainting_pyramid11 : FFX_DECLARE_UAV(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_11); + RWTexture2D rw_inpainting_pyramid12 : FFX_DECLARE_UAV(FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_12); + + + FfxFloat32x4 RWLoadInpaintingPyramid(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxUInt32 index) + { + #define LOAD(idx) \ + if (index == idx) \ + { \ + return rw_inpainting_pyramid##idx[iPxPos]; \ + } + LOAD(0); + LOAD(1); + LOAD(2); + LOAD(3); + LOAD(4); + LOAD(5); + LOAD(6); + LOAD(7); + LOAD(8); + LOAD(9); + LOAD(10); + LOAD(11); + LOAD(12); + return 0; + + #undef LOAD + } + + void StoreInpaintingPyramid(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x4 outValue, FFX_PARAMETER_IN FfxUInt32 index) + { + #define STORE(idx) \ + if (index == idx) \ + { \ + rw_inpainting_pyramid##idx[iPxPos] = outValue; \ + } + + STORE(0); + STORE(1); + STORE(2); + STORE(3); + STORE(4); + STORE(5); + STORE(6); + STORE(7); + STORE(8); + STORE(9); + STORE(10); + STORE(11); + STORE(12); + + #undef STORE + } +#endif + +#endif // #if defined(FFX_GPU) diff --git a/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_common.h b/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_common.h new file mode 100644 index 000000000000..8206bf43bca1 --- /dev/null +++ b/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_common.h @@ -0,0 +1,445 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#if !defined(FFX_FRAMEINTERPOLATION_COMMON_H) +#define FFX_FRAMEINTERPOLATION_COMMON_H + +#define FFX_FRAMEINTERPOLATION_DISPATCH_DRAW_DEBUG_TEAR_LINES (1 << 0) +#define FFX_FRAMEINTERPOLATION_DISPATCH_DRAW_DEBUG_RESET_INDICATORS (1 << 1) +#define FFX_FRAMEINTERPOLATION_DISPATCH_DRAW_DEBUG_VIEW (1 << 2) + +FFX_STATIC const FfxFloat32 FFX_FRAMEINTERPOLATION_EPSILON = 1e-03f; +FFX_STATIC const FfxFloat32 FFX_FRAMEINTERPOLATION_FLT_MAX = 3.402823466e+38f; +FFX_STATIC const FfxFloat32 FFX_FRAMEINTERPOLATION_FLT_MIN = 1.175494351e-38f; + +FFX_STATIC const FfxFloat32 fReconstructedDepthBilinearWeightThreshold = FFX_FRAMEINTERPOLATION_EPSILON; + +FfxFloat32 RGBToLuma(FfxFloat32x3 fLinearRgb) +{ + return dot(fLinearRgb, FfxFloat32x3(0.2126f, 0.7152f, 0.0722f)); +} + +FfxFloat32 LinearRec2020ToLuminance(FfxFloat32x3 linearRec2020RGB) +{ + FfxFloat32 fY = 0.2627 * linearRec2020RGB.x + 0.678 * linearRec2020RGB.y + 0.0593 * linearRec2020RGB.z; + return fY; +} + +FfxFloat32x3 ffxscRGBToLinear(FfxFloat32x3 value, FfxFloat32 minLuminance, FfxFloat32 maxLuminance) +{ + FfxFloat32x3 p = value - ffxBroadcast3(minLuminance / 80.0f); + return p / ffxBroadcast3((maxLuminance - minLuminance) / 80.0f); +} + +FfxFloat32x3 RawRGBToLinear(FfxFloat32x3 fRawRgb) +{ + FfxFloat32x3 fLinearRgb; + + switch (BackBufferTransferFunction()) + { + case 0: + fLinearRgb = ffxLinearFromSrgb(fRawRgb); + break; + case 1: + fLinearRgb = ffxLinearFromPQ(fRawRgb) * (10000.0f / MaxLuminance()); + break; + case 2: + fLinearRgb = ffxscRGBToLinear(fRawRgb, MinLuminance(), MaxLuminance()); + break; + } + + return fLinearRgb; +} + +FfxFloat32 RawRGBToLuminance(FfxFloat32x3 fRawRgb) +{ + FfxFloat32 fLuminance = 0.0f; + + switch (BackBufferTransferFunction()) + { + case 0: + fLuminance = RGBToLuma(RawRGBToLinear(fRawRgb)); + break; + case 1: + fLuminance = LinearRec2020ToLuminance(RawRGBToLinear(fRawRgb)); + break; + case 2: + fLuminance = RGBToLuma(RawRGBToLinear(fRawRgb)); + break; + } + + return fLuminance; +} + +FfxFloat32 RawRGBToPerceivedLuma(FfxFloat32x3 fRawRgb) +{ + FfxFloat32 fLuminance = RawRGBToLuminance(fRawRgb); + + FfxFloat32 fPercievedLuminance = 0; + if (fLuminance <= 216.0f / 24389.0f) + { + fPercievedLuminance = fLuminance * (24389.0f / 27.0f); + } + else + { + fPercievedLuminance = ffxPow(fLuminance, 1.0f / 3.0f) * 116.0f - 16.0f; + } + + return fPercievedLuminance * 0.01f; +} + +struct BilinearSamplingData +{ + FfxInt32x2 iOffsets[4]; + FfxFloat32 fWeights[4]; + FfxInt32x2 iBasePos; +}; + +BilinearSamplingData GetBilinearSamplingData(FfxFloat32x2 fUv, FfxInt32x2 iSize) +{ + BilinearSamplingData data; + + FfxFloat32x2 fPxSample = (fUv * iSize) - FfxFloat32x2(0.5f, 0.5f); + data.iBasePos = FfxInt32x2(floor(fPxSample)); + FfxFloat32x2 fPxFrac = ffxFract(fPxSample); + + data.iOffsets[0] = FfxInt32x2(0, 0); + data.iOffsets[1] = FfxInt32x2(1, 0); + data.iOffsets[2] = FfxInt32x2(0, 1); + data.iOffsets[3] = FfxInt32x2(1, 1); + + data.fWeights[0] = (1 - fPxFrac.x) * (1 - fPxFrac.y); + data.fWeights[1] = (fPxFrac.x) * (1 - fPxFrac.y); + data.fWeights[2] = (1 - fPxFrac.x) * (fPxFrac.y); + data.fWeights[3] = (fPxFrac.x) * (fPxFrac.y); + + return data; +} + +#if defined(FFX_FRAMEINTERPOLATION_BIND_CB_FRAMEINTERPOLATION) +FfxFloat32 ConvertFromDeviceDepthToViewSpace(FfxFloat32 fDeviceDepth) +{ + const FfxFloat32x4 deviceToViewDepth = DeviceToViewSpaceTransformFactors(); + return deviceToViewDepth[1] / (fDeviceDepth - deviceToViewDepth[0]); +} + +FfxFloat32x2 ComputeNdc(FfxFloat32x2 fPxPos, FfxInt32x2 iSize) +{ + return fPxPos / FfxFloat32x2(iSize) * FfxFloat32x2(2.0f, -2.0f) + FfxFloat32x2(-1.0f, 1.0f); +} + +FfxFloat32x3 GetViewSpacePosition(FfxInt32x2 iViewportPos, FfxInt32x2 iViewportSize, FfxFloat32 fDeviceDepth) +{ + const FfxFloat32x4 fDeviceToViewDepth = DeviceToViewSpaceTransformFactors(); + + const FfxFloat32 Z = ConvertFromDeviceDepthToViewSpace(fDeviceDepth); + + const FfxFloat32x2 fNdcPos = ComputeNdc(iViewportPos, iViewportSize); + const FfxFloat32 X = fDeviceToViewDepth[2] * fNdcPos.x * Z; + const FfxFloat32 Y = fDeviceToViewDepth[3] * fNdcPos.y * Z; + + return FfxFloat32x3(X, Y, Z); +} +#endif + +FfxBoolean IsOnScreen(FfxInt32x2 pos, FfxInt32x2 size) +{ + return all(FFX_LESS_THAN(FfxUInt32x2(pos), FfxUInt32x2(size))); +} + +FfxBoolean IsUvInside(FfxFloat32x2 fUv) +{ + return (fUv.x > 0.0f && fUv.x < 1.0f) && (fUv.y > 0.0f && fUv.y < 1.0f); +} + +FfxBoolean IsInRect(FfxInt32x2 pos, FfxInt32x2 iRectCorner, FfxInt32x2 iRectSize) +{ + return (pos.x >= iRectCorner.x && pos.x < (iRectSize.x + iRectCorner.x) && pos.y >= iRectCorner.y && pos.y < (iRectSize.y + iRectCorner.y)); +} + +FfxFloat32 MinDividedByMax(const FfxFloat32 v0, const FfxFloat32 v1) +{ + const FfxFloat32 m = ffxMax(v0, v1); + return m != 0 ? ffxMin(v0, v1) / m : 0; +} + +FfxFloat32 NormalizedDot3(const FfxFloat32x3 v0, const FfxFloat32x3 v1) +{ + FfxFloat32 fMaxLength = ffxMax(length(v0), length(v1)); + + return fMaxLength > 0.0f ? dot(v0 / fMaxLength, v1 / fMaxLength) : 1.0f; +} + +FfxFloat32 NormalizedDot2(const FfxFloat32x2 v0, const FfxFloat32x2 v1) +{ + FfxFloat32 fMaxLength = ffxMax(length(v0), length(v1)); + + return fMaxLength > 0.0f ? dot(v0 / fMaxLength, v1 / fMaxLength) : 1.0f; +} + +FfxFloat32 CalculateStaticContentFactor(FfxFloat32x3 fCurrentInterpolationSource, FfxFloat32x3 fPresentColor) +{ + const FfxFloat32x3 fFactor = ffxSaturate(FfxFloat32x3( + ffxSaturate((1.0f - MinDividedByMax(fCurrentInterpolationSource.r, fPresentColor.r)) / 0.1f), + ffxSaturate((1.0f - MinDividedByMax(fCurrentInterpolationSource.g, fPresentColor.g)) / 0.1f), + ffxSaturate((1.0f - MinDividedByMax(fCurrentInterpolationSource.b, fPresentColor.b)) / 0.1f) + )); + + return max(fFactor.x, max(fFactor.y, fFactor.z)); +} + +// +// MOTION VECTOR FIELD +// + +FFX_STATIC const FfxUInt32 MOTION_VECTOR_FIELD_ENTRY_BIT_COUNT = 32; + +// Make sure all bit counts add up to MOTION_VECTOR_FIELD_ENTRY_BIT_COUNT +FFX_STATIC const FfxUInt32 MOTION_VECTOR_FIELD_VECTOR_COEFFICIENT_BIT_COUNT = 16; +FFX_STATIC const FfxUInt32 MOTION_VECTOR_FIELD_PRIORITY_LOW_BIT_COUNT = 5; +FFX_STATIC const FfxUInt32 MOTION_VECTOR_FIELD_PRIORITY_HIGH_BIT_COUNT = 10; +FFX_STATIC const FfxUInt32 MOTION_VECTOR_PRIMARY_VECTOR_INDICATION_BIT_COUNT = 1; + +FFX_STATIC const FfxUInt32 MOTION_VECTOR_FIELD_PRIMARY_VECTOR_INDICATION_BIT = (1U << (MOTION_VECTOR_FIELD_ENTRY_BIT_COUNT - 1)); + +FFX_STATIC const FfxUInt32 PRIORITY_LOW_MAX = (1U << MOTION_VECTOR_FIELD_PRIORITY_LOW_BIT_COUNT) - 1; +FFX_STATIC const FfxUInt32 PRIORITY_HIGH_MAX = (1U << MOTION_VECTOR_FIELD_PRIORITY_HIGH_BIT_COUNT) - 1; + +FFX_STATIC const FfxUInt32 PRIORITY_LOW_OFFSET = MOTION_VECTOR_FIELD_VECTOR_COEFFICIENT_BIT_COUNT; +FFX_STATIC const FfxUInt32 PRIORITY_HIGH_OFFSET = PRIORITY_LOW_OFFSET + MOTION_VECTOR_FIELD_PRIORITY_LOW_BIT_COUNT; +FFX_STATIC const FfxUInt32 PRIMARY_VECTOR_INDICATION_OFFSET = PRIORITY_HIGH_OFFSET + MOTION_VECTOR_FIELD_PRIORITY_HIGH_BIT_COUNT; + +struct VectorFieldEntry +{ + FfxFloat32x2 fMotionVector; + FfxFloat32 uHighPriorityFactor; + FfxFloat32 uLowPriorityFactor; + FfxBoolean bValid; + FfxBoolean bPrimary; + FfxBoolean bSecondary; + FfxBoolean bInPainted; + FfxFloat32 fVelocity; + FfxBoolean bNegOutside; + FfxBoolean bPosOutside; +}; + +VectorFieldEntry NewVectorFieldEntry() +{ + VectorFieldEntry vfe; + vfe.fMotionVector = FfxFloat32x2(0.0, 0.0); + vfe.uHighPriorityFactor = 0.0; + vfe.uLowPriorityFactor = 0.0; + vfe.bValid = false; + vfe.bPrimary = false; + vfe.bSecondary = false; + vfe.bInPainted = false; + vfe.fVelocity = 0.0; + vfe.bNegOutside = false; + vfe.bPosOutside = false; + return vfe; +} + +FfxBoolean PackedVectorFieldEntryIsPrimary(FfxUInt32 packedEntry) +{ + return ((packedEntry & MOTION_VECTOR_FIELD_PRIMARY_VECTOR_INDICATION_BIT) != 0); +} + +FfxUInt32x2 PackVectorFieldEntries(FfxBoolean bIsPrimary, FfxUInt32 uHighPriorityFactor, FfxUInt32 uLowPriorityFactor, FfxFloat32x2 fMotionVector) +{ + const FfxUInt32 uPriority = + (FfxUInt32(bIsPrimary) * MOTION_VECTOR_FIELD_PRIMARY_VECTOR_INDICATION_BIT) + | ((uHighPriorityFactor & PRIORITY_HIGH_MAX) << PRIORITY_HIGH_OFFSET) + | ((uLowPriorityFactor & PRIORITY_LOW_MAX) << PRIORITY_LOW_OFFSET); + + FfxUInt32 packedX = uPriority | ffxF32ToF16(fMotionVector.x); + FfxUInt32 packedY = uPriority | ffxF32ToF16(fMotionVector.y); + + return FfxUInt32x2(packedX, packedY); +} + +void UnpackVectorFieldEntries(FfxUInt32x2 packed, out VectorFieldEntry vfElement) +{ + vfElement.uHighPriorityFactor = FfxFloat32((packed.x >> PRIORITY_HIGH_OFFSET) & PRIORITY_HIGH_MAX) / PRIORITY_HIGH_MAX; + vfElement.uLowPriorityFactor = FfxFloat32((packed.x >> PRIORITY_LOW_OFFSET) & PRIORITY_LOW_MAX) / PRIORITY_LOW_MAX; + + vfElement.bPrimary = PackedVectorFieldEntryIsPrimary(packed.x); + vfElement.bValid = (vfElement.uHighPriorityFactor > 0.0f); + vfElement.bSecondary = vfElement.bValid && !vfElement.bPrimary; + + // Reverse priority factor for secondary vectors + if (vfElement.bSecondary) + { + vfElement.uHighPriorityFactor = 1.0f - vfElement.uHighPriorityFactor; + } + + vfElement.fMotionVector.x = ffxUnpackF32(packed.x).x; + vfElement.fMotionVector.y = ffxUnpackF32(packed.y).x; + vfElement.bInPainted = false; +} + +// +// MOTION VECTOR FIELD +// + +#if defined(FFX_FRAMEINTERPOLATION_BIND_SRV_INPAINTING_PYRAMID) +FfxFloat32x4 ComputeMvInpaintingLevel(FfxFloat32x2 fUv, const FfxInt32 iMipLevel, const FfxInt32x2 iTexSize) +{ + BilinearSamplingData bilinearInfo = GetBilinearSamplingData(fUv, iTexSize); + + FfxFloat32 fSum = 0.0f; + FfxFloat32x4 fColor = FfxFloat32x4(0.0, 0.0, 0.0, 0.0); + fColor.z = 0; + + const FfxFloat32 fMaxPriorityFactor = 1.0f; + + for (FfxInt32 iSampleIndex = 0; iSampleIndex < 4; iSampleIndex++) + { + const FfxInt32x2 iOffset = bilinearInfo.iOffsets[iSampleIndex]; + const FfxInt32x2 iSamplePos = bilinearInfo.iBasePos + iOffset; + + if (IsOnScreen(iSamplePos, iTexSize)) + { + FfxFloat32x4 fSample = LoadInpaintingPyramid(iMipLevel, iSamplePos); + + const FfxFloat32 fPriorityFactor = fSample.z; + const FfxFloat32 fValidMvFactor = FfxFloat32(fSample.z > 0); + const FfxFloat32 fSampleWeight = bilinearInfo.fWeights[iSampleIndex] * fValidMvFactor * fPriorityFactor; + + fSum += fSampleWeight; + fColor += fSample * fSampleWeight; + } + } + + fColor /= (fSum > 0.0f) ? fSum : 1.0f; + + return fColor; +} +#if defined(FFX_FRAMEINTERPOLATION_BIND_CB_FRAMEINTERPOLATION) && \ + defined(FFX_FRAMEINTERPOLATION_BIND_SRV_GAME_MOTION_VECTOR_FIELD_X) && \ + defined(FFX_FRAMEINTERPOLATION_BIND_SRV_GAME_MOTION_VECTOR_FIELD_Y) + +void LoadInpaintedGameFieldMv(FfxFloat32x2 fUv, out VectorFieldEntry vfElement) +{ + FfxInt32x2 iPxSample = FfxInt32x2(fUv * RenderSize()); + FfxUInt32x2 packedGameFieldMv = LoadGameFieldMv(iPxSample); + UnpackVectorFieldEntries(packedGameFieldMv, vfElement); + + if (!vfElement.bValid) + { + //FfxFloat32x2 fUv = (FfxFloat32x2(iPxSample) + 0.5f) / RenderSize(); + FfxInt32x2 iTexSize = RenderSize(); + + FfxFloat32x4 fInPaintedVector = FfxFloat32x4(0.0, 0.0, 0.0, 0.0); + for (FfxInt32 iMipLevel = 0; iMipLevel < 11 && (fInPaintedVector.w == 0.0f); iMipLevel++) + { + iTexSize /= 2; + + fInPaintedVector = ComputeMvInpaintingLevel(fUv, iMipLevel, iTexSize); + } + + vfElement.fMotionVector = fInPaintedVector.xy; + vfElement.uHighPriorityFactor = fInPaintedVector.z; + vfElement.uLowPriorityFactor = fInPaintedVector.w; + vfElement.bInPainted = true; + } + + vfElement.bNegOutside = !IsUvInside(fUv - vfElement.fMotionVector); + vfElement.bPosOutside = !IsUvInside(fUv + vfElement.fMotionVector); + vfElement.fVelocity = length(vfElement.fMotionVector); +} +#endif +#endif + +#if defined(FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW_MOTION_VECTOR_FIELD_X) && \ + defined(FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW_MOTION_VECTOR_FIELD_Y) && \ + defined(FFX_FRAMEINTERPOLATION_BIND_CB_FRAMEINTERPOLATION) +void SampleOpticalFlowMotionVectorField(FfxFloat32x2 fUv, out VectorFieldEntry vfElement) +{ + const FfxFloat32 scaleFactor = 1.0f; + + BilinearSamplingData bilinearInfo = GetBilinearSamplingData(fUv, FfxInt32x2(GetOpticalFlowSize2() * scaleFactor)); + + vfElement = NewVectorFieldEntry(); + + FfxFloat32 fWeightSum = 0.0f; + for (FfxInt32 iSampleIndex = 0; iSampleIndex < 4; iSampleIndex++) + { + const FfxInt32x2 iOffset = bilinearInfo.iOffsets[iSampleIndex]; + const FfxInt32x2 iSamplePos = bilinearInfo.iBasePos + iOffset; + + if (IsOnScreen(iSamplePos, FfxInt32x2(GetOpticalFlowSize2() * scaleFactor))) + { + const FfxFloat32 fWeight = bilinearInfo.fWeights[iSampleIndex]; + + VectorFieldEntry fOfVectorSample = NewVectorFieldEntry(); + FfxInt32x2 packedOpticalFlowMv = FfxInt32x2(LoadOpticalFlowFieldMv(iSamplePos)); + UnpackVectorFieldEntries(packedOpticalFlowMv, fOfVectorSample); + + vfElement.fMotionVector += fOfVectorSample.fMotionVector * fWeight; + vfElement.uHighPriorityFactor += fOfVectorSample.uHighPriorityFactor * fWeight; + vfElement.uLowPriorityFactor += fOfVectorSample.uLowPriorityFactor * fWeight; + + fWeightSum += fWeight; + } + } + + if (fWeightSum > 0.0f) + { + vfElement.fMotionVector /= fWeightSum; + vfElement.uHighPriorityFactor /= fWeightSum; + vfElement.uLowPriorityFactor /= fWeightSum; + } + + vfElement.bNegOutside = !IsUvInside(fUv - vfElement.fMotionVector); + vfElement.bPosOutside = !IsUvInside(fUv + vfElement.fMotionVector); + vfElement.fVelocity = length(vfElement.fMotionVector); +} +#endif + +FfxFloat32x3 Tonemap(FfxFloat32x3 fRgb) +{ + return fRgb / (ffxMax(ffxMax(0.f, fRgb.r), ffxMax(fRgb.g, fRgb.b)) + 1.f).xxx; +} + +FfxFloat32x3 InverseTonemap(FfxFloat32x3 fRgb) +{ + return fRgb / ffxMax(FFX_TONEMAP_EPSILON, 1.f - ffxMax(fRgb.r, ffxMax(fRgb.g, fRgb.b))).xxx; +} + +FfxInt32x2 ComputeHrPosFromLrPos(FfxInt32x2 iPxLrPos) +{ + FfxFloat32x2 fSrcJitteredPos = FfxFloat32x2(iPxLrPos) + 0.5f - Jitter(); + FfxFloat32x2 fLrPosInHr = (fSrcJitteredPos / RenderSize()) * DisplaySize(); + FfxInt32x2 iPxHrPos = FfxInt32x2(floor(fLrPosInHr)); + return iPxHrPos; +} +#if FFX_HALF +FFX_MIN16_I2 ComputeHrPosFromLrPos(FFX_MIN16_I2 iPxLrPos) +{ + FFX_MIN16_F2 fSrcJitteredPos = FFX_MIN16_F2(iPxLrPos) + FFX_MIN16_F(0.5f) - FFX_MIN16_F2(Jitter()); + FFX_MIN16_F2 fLrPosInHr = (fSrcJitteredPos / FFX_MIN16_F2(RenderSize())) * FFX_MIN16_F2(DisplaySize()); + FFX_MIN16_I2 iPxHrPos = FFX_MIN16_I2(floor(fLrPosInHr)); + return iPxHrPos; +} +#endif + +#endif //!defined(FFX_FRAMEINTERPOLATION_COMMON_H) diff --git a/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_compute_game_vector_field_inpainting_pyramid.h b/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_compute_game_vector_field_inpainting_pyramid.h new file mode 100644 index 000000000000..3c6132b65d61 --- /dev/null +++ b/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_compute_game_vector_field_inpainting_pyramid.h @@ -0,0 +1,121 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifndef FFX_FRAMEINTERPOLATION_COMPUTE_GAME_VECTOR_FIELD_INPAINTING_PYRAMID_H +#define FFX_FRAMEINTERPOLATION_COMPUTE_GAME_VECTOR_FIELD_INPAINTING_PYRAMID_H + +#include "ffx_frameinterpolation_common.h" +//-------------------------------------------------------------------------------------- +// Buffer definitions - global atomic counter +//-------------------------------------------------------------------------------------- + +FFX_GROUPSHARED FfxUInt32 spdCounter; +FFX_GROUPSHARED FfxFloat32 spdIntermediateR[16][16]; +FFX_GROUPSHARED FfxFloat32 spdIntermediateG[16][16]; +FFX_GROUPSHARED FfxFloat32 spdIntermediateB[16][16]; +FFX_GROUPSHARED FfxFloat32 spdIntermediateA[16][16]; + +FfxFloat32x4 SpdLoadSourceImage(FfxInt32x2 tex, FfxUInt32 slice) +{ + VectorFieldEntry gameMv; + FfxUInt32x2 packedGameFieldMv = LoadGameFieldMv(tex); + UnpackVectorFieldEntries(packedGameFieldMv, gameMv); + + return FfxFloat32x4(gameMv.fMotionVector, gameMv.uHighPriorityFactor, gameMv.uLowPriorityFactor) * FfxFloat32(DisplaySize().x > 0); +} + +FfxFloat32x4 SpdLoad(FfxInt32x2 tex, FfxUInt32 slice) +{ + return RWLoadInpaintingPyramid(tex, 5); +} + +void SpdStore(FfxInt32x2 pix, FfxFloat32x4 outValue, FfxUInt32 index, FfxUInt32 slice) +{ + StoreInpaintingPyramid(pix, outValue, index); +} + +void SpdIncreaseAtomicCounter(FfxUInt32 slice) +{ + AtomicIncreaseCounter(COUNTER_SPD, spdCounter); +} + +FfxUInt32 SpdGetAtomicCounter() +{ + return spdCounter; +} +void SpdResetAtomicCounter(FfxUInt32 slice) +{ + StoreCounter(COUNTER_SPD, 0); +} + +FfxFloat32x4 SpdLoadIntermediate(FfxUInt32 x, FfxUInt32 y) +{ + return FfxFloat32x4( + spdIntermediateR[x][y], + spdIntermediateG[x][y], + spdIntermediateB[x][y], + spdIntermediateA[x][y]); +} + +void SpdStoreIntermediate(FfxUInt32 x, FfxUInt32 y, FfxFloat32x4 value) +{ + spdIntermediateR[x][y] = value.x; + spdIntermediateG[x][y] = value.y; + spdIntermediateB[x][y] = value.z; + spdIntermediateA[x][y] = value.w; +} + +FfxFloat32x4 SpdReduce4(FfxFloat32x4 v0, FfxFloat32x4 v1, FfxFloat32x4 v2, FfxFloat32x4 v3) +{ + FfxFloat32x4 vec = FfxFloat32x4(0,0,0,0); + + FfxFloat32 fWeightSum = 0.0f; +#define ADD(SAMPLE) { \ + FfxFloat32 fWeight = FfxFloat32(SAMPLE.z > 0.0f); \ + vec += SAMPLE * fWeight; \ + fWeightSum += fWeight; \ + } + + ADD(v0); + ADD(v1); + ADD(v2); + ADD(v3); + + vec /= (fWeightSum > FFX_FRAMEINTERPOLATION_EPSILON) ? fWeightSum : 1.0f; + + return vec; +} + +#include "../spd/ffx_spd.h" + +void computeFrameinterpolationGameVectorFieldInpaintingPyramid(FfxInt32x3 iGroupId, FfxInt32 iLocalIndex) +{ + SpdDownsample( + FfxUInt32x2(iGroupId.xy), + FfxUInt32(iLocalIndex), + FfxUInt32(NumMips()), + FfxUInt32(NumWorkGroups()), + FfxUInt32(iGroupId.z), + FfxUInt32x2(WorkGroupOffset())); +} + +#endif // FFX_FRAMEINTERPOLATION_COMPUTE_GAME_VECTOR_FIELD_INPAINTING_PYRAMID_H diff --git a/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_compute_inpainting_pyramid.h b/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_compute_inpainting_pyramid.h new file mode 100644 index 000000000000..c9b3d7a73915 --- /dev/null +++ b/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_compute_inpainting_pyramid.h @@ -0,0 +1,120 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifndef FFX_FRAMEINTERPOLATION_COMPUTE_INPAINTING_PYRAMID_H +#define FFX_FRAMEINTERPOLATION_COMPUTE_INPAINTING_PYRAMID_H + +//-------------------------------------------------------------------------------------- +// Buffer definitions - global atomic counter +//-------------------------------------------------------------------------------------- + +FFX_GROUPSHARED FfxUInt32 spdCounter; +FFX_GROUPSHARED FfxFloat32 spdIntermediateR[16][16]; +FFX_GROUPSHARED FfxFloat32 spdIntermediateG[16][16]; +FFX_GROUPSHARED FfxFloat32 spdIntermediateB[16][16]; +FFX_GROUPSHARED FfxFloat32 spdIntermediateA[16][16]; + +FfxFloat32x4 SpdLoadSourceImage(FfxInt32x2 tex, FfxUInt32 slice) +{ + FfxFloat32x4 fColor = LoadFrameInterpolationOutput(tex) * FfxFloat32(DisplaySize().x > 0); + + // reverse sample weights + fColor.w = ffxSaturate(1.0f - fColor.w); + + + if (tex.x < InterpolationRectBase().x || tex.x >= (InterpolationRectSize().x + InterpolationRectBase().x) || tex.y < InterpolationRectBase().y || + tex.y >= (InterpolationRectSize().y + InterpolationRectBase().y)) + { + fColor.w = 0.0f; // don't take contributions from outside of the interpolation rect + } + + return fColor; +} + +FfxFloat32x4 SpdLoad(FfxInt32x2 tex, FfxUInt32 slice) +{ + return RWLoadInpaintingPyramid(tex, 5); +} + +void SpdStore(FfxInt32x2 pix, FfxFloat32x4 outValue, FfxUInt32 index, FfxUInt32 slice) +{ + StoreInpaintingPyramid(pix, outValue, index); +} + +void SpdIncreaseAtomicCounter(FfxUInt32 slice) +{ + AtomicIncreaseCounter(COUNTER_SPD, spdCounter); +} + +FfxUInt32 SpdGetAtomicCounter() +{ + return spdCounter; +} +void SpdResetAtomicCounter(FfxUInt32 slice) +{ + StoreCounter(COUNTER_SPD, 0); +} + +FfxFloat32x4 SpdLoadIntermediate(FfxUInt32 x, FfxUInt32 y) +{ + return FfxFloat32x4( + spdIntermediateR[x][y], + spdIntermediateG[x][y], + spdIntermediateB[x][y], + spdIntermediateA[x][y]); +} + +void SpdStoreIntermediate(FfxUInt32 x, FfxUInt32 y, FfxFloat32x4 value) +{ + spdIntermediateR[x][y] = value.x; + spdIntermediateG[x][y] = value.y; + spdIntermediateB[x][y] = value.z; + spdIntermediateA[x][y] = value.w; +} + +FfxFloat32x4 SpdReduce4(FfxFloat32x4 v0, FfxFloat32x4 v1, FfxFloat32x4 v2, FfxFloat32x4 v3) +{ + FfxFloat32x4 w = FfxFloat32x4(v0.w, v1.w, v2.w, v3.w); + + FfxFloat32 sum = (w[0] + w[1] + w[2] + w[3]); + + if (sum == 0.0f) { + return FfxFloat32x4(0.0, 0.0, 0.0, 0.0); + } + + return (v0 * w[0] + v1 * w[1] + v2 * w[2] + v3 * w[3]) / sum; +} + +#include "../spd/ffx_spd.h" + +void computeFrameinterpolationInpaintingPyramid(FfxInt32x3 iGroupId, FfxInt32 iLocalIndex) +{ + SpdDownsample( + FfxUInt32x2(iGroupId.xy), + FfxUInt32(iLocalIndex), + FfxUInt32(NumMips()), + FfxUInt32(NumWorkGroups()), + FfxUInt32(iGroupId.z), + FfxUInt32x2(WorkGroupOffset())); +} + +#endif // FFX_FRAMEINTERPOLATION_COMPUTE_INPAINTING_PYRAMID_H diff --git a/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_debug_view.h b/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_debug_view.h new file mode 100644 index 000000000000..bcd3a52a7941 --- /dev/null +++ b/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_debug_view.h @@ -0,0 +1,172 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifndef FFX_FRAMEINTERPOLATION_DEBUG_VIEW_H +#define FFX_FRAMEINTERPOLATION_DEBUG_VIEW_H + +struct FfxFrameInterpolationDebugViewport +{ + FfxInt32x2 offset; + FfxInt32x2 size; +}; + +// Macro to cull and draw debug viewport +#define DRAW_VIEWPORT(function, pos, vp) \ + { \ + if (pointIsInsideViewport(pos, vp)) \ + { \ + function(pos, vp); \ + } \ + } + +FfxFloat32x2 getTransformedUv(FfxInt32x2 iPxPos, FfxFrameInterpolationDebugViewport vp) +{ + FfxFloat32x2 fUv = (FfxFloat32x2(iPxPos - vp.offset) + 0.5f) / vp.size; + + return fUv; +} + +FfxFloat32x4 getMotionVectorColor(FfxFloat32x2 fMotionVector) +{ + return FfxFloat32x4(0.5f + fMotionVector * DisplaySize() * 0.1f, 0.5f, 1.0f); +} + +FfxFloat32x4 getUnusedIndicationColor(FfxInt32x2 iPxPos, FfxFrameInterpolationDebugViewport vp) +{ + FfxInt32x2 basePos = iPxPos - vp.offset; + + FfxFloat32 ar = FfxFloat32(vp.size.x) / FfxFloat32(vp.size.y); + + return FfxFloat32x4(basePos.x == FfxInt32(basePos.y * ar), 0, 0, 1); +} + +void drawGameMotionVectorFieldVectors(FfxInt32x2 iPxPos, FfxFrameInterpolationDebugViewport vp) +{ + FfxFloat32x2 fUv = getTransformedUv(iPxPos, vp); + + VectorFieldEntry gameMv; + LoadInpaintedGameFieldMv(fUv, gameMv); + + StoreFrameinterpolationOutput(iPxPos, getMotionVectorColor(gameMv.fMotionVector)); +} + +void drawGameMotionVectorFieldDepthPriority(FfxInt32x2 iPxPos, FfxFrameInterpolationDebugViewport vp) +{ + FfxFloat32x2 fUv = getTransformedUv(iPxPos, vp); + + VectorFieldEntry gameMv; + LoadInpaintedGameFieldMv(fUv, gameMv); + + StoreFrameinterpolationOutput(iPxPos, FfxFloat32x4(0, gameMv.uHighPriorityFactor, 0, 1)); +} + +void drawOpticalFlowMotionVectorField(FfxInt32x2 iPxPos, FfxFrameInterpolationDebugViewport vp) +{ + FfxFloat32x2 fUv = getTransformedUv(iPxPos, vp); + + VectorFieldEntry ofMv; + SampleOpticalFlowMotionVectorField(fUv, ofMv); + + StoreFrameinterpolationOutput(iPxPos, getMotionVectorColor(ofMv.fMotionVector)); +} + +void drawDisocclusionMask(FfxInt32x2 iPxPos, FfxFrameInterpolationDebugViewport vp) +{ + FfxFloat32x2 fUv = getTransformedUv(iPxPos, vp); + + FfxFloat32x2 fLrUv = fUv * (FfxFloat32x2(RenderSize()) / GetMaxRenderSize()); + + FfxFloat32x2 fDisocclusionFactor = ffxSaturate(SampleDisocclusionMask(fLrUv).xy); + + StoreFrameinterpolationOutput(iPxPos, FfxFloat32x4(fDisocclusionFactor, 0, 1)); +} + +FfxFloat32x4 getDistortionField(FfxInt32x2 iPxPos, FfxFrameInterpolationDebugViewport vp) +{ + FfxFloat32x2 fUv = getTransformedUv(iPxPos, vp); + + FfxFloat32x2 fDistortionFieldUv = abs(SampleDistortionField(fUv).xy); + + return FfxFloat32x4(fDistortionFieldUv * 10.0f, 0.0f, 1.0f); +} + +void drawPresentBackbuffer(FfxInt32x2 iPxPos, FfxFrameInterpolationDebugViewport vp) +{ + FfxFloat32x2 fUv = getTransformedUv(iPxPos, vp); + + FfxFloat32x4 fPresentColor = getDistortionField(iPxPos, vp); + + if (GetHUDLessAttachedFactor() == 1) + { + fPresentColor = SamplePresentBackbuffer(fUv); + } + + StoreFrameinterpolationOutput(iPxPos, fPresentColor); +} + +void drawCurrentInterpolationSource(FfxInt32x2 iPxPos, FfxFrameInterpolationDebugViewport vp) +{ + FfxFloat32x2 fUv = getTransformedUv(iPxPos, vp); + + FfxFloat32x4 fCurrentBackBuffer = FfxFloat32x4(SampleCurrentBackbuffer(fUv), 1.0f); + + StoreFrameinterpolationOutput(iPxPos, fCurrentBackBuffer); +} + +FfxBoolean pointIsInsideViewport(FfxInt32x2 iPxPos, FfxFrameInterpolationDebugViewport vp) +{ + FfxInt32x2 extent = vp.offset + vp.size; + + return (iPxPos.x >= vp.offset.x && iPxPos.x < extent.x) && (iPxPos.y >= vp.offset.y && iPxPos.y < extent.y); +} + +void computeDebugView(FfxInt32x2 iPxPos) +{ +#define VIEWPORT_GRID_SIZE_X 3 +#define VIEWPORT_GRID_SIZE_Y 3 + + FfxFloat32x2 fViewportScale = FfxFloat32x2(1.0f / VIEWPORT_GRID_SIZE_X, 1.0f / VIEWPORT_GRID_SIZE_Y); + FfxInt32x2 iViewportSize = FfxInt32x2(DisplaySize() * fViewportScale); + + // compute grid [y][x] for easier placement of viewports + FfxFrameInterpolationDebugViewport vp[VIEWPORT_GRID_SIZE_Y][VIEWPORT_GRID_SIZE_X]; + for (FfxInt32 y = 0; y < VIEWPORT_GRID_SIZE_Y; y++) + { + for (FfxInt32 x = 0; x < VIEWPORT_GRID_SIZE_X; x++) + { + vp[y][x].offset = iViewportSize * FfxInt32x2(x, y); + vp[y][x].size = iViewportSize; + } + } + + // top row + DRAW_VIEWPORT(drawGameMotionVectorFieldVectors, iPxPos, vp[0][0]); + DRAW_VIEWPORT(drawGameMotionVectorFieldDepthPriority, iPxPos, vp[0][1]); + DRAW_VIEWPORT(drawOpticalFlowMotionVectorField, iPxPos, vp[0][2]); + + // bottom row + DRAW_VIEWPORT(drawDisocclusionMask, iPxPos, vp[2][0]); + DRAW_VIEWPORT(drawCurrentInterpolationSource, iPxPos, vp[2][1]); + DRAW_VIEWPORT(drawPresentBackbuffer, iPxPos, vp[2][2]); +} + +#endif // FFX_FRAMEINTERPOLATION_DEBUG_VIEW_H diff --git a/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_disocclusion_mask.h b/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_disocclusion_mask.h new file mode 100644 index 000000000000..03adc96f3a88 --- /dev/null +++ b/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_disocclusion_mask.h @@ -0,0 +1,146 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifndef FFX_FRAMEINTERPOLATION_DISOCCLUSION_MASK_H +#define FFX_FRAMEINTERPOLATION_DISOCCLUSION_MASK_H + +FFX_STATIC const FfxFloat32 DepthClipBaseScale = 1.0f; + +FfxFloat32 ComputeSampleDepthClip(FfxInt32x2 iPxSamplePos, FfxFloat32 fPreviousDepth, FfxFloat32 fPreviousDepthBilinearWeight, FfxFloat32 fCurrentDepthViewSpace) +{ + FfxFloat32 fPrevNearestDepthViewSpace = ConvertFromDeviceDepthToViewSpace(fPreviousDepth); + + // Depth separation logic ref: See "Minimum Triangle Separation for Correct Z-Buffer Occlusion" + // Intention: worst case of formula in Figure4 combined with Ksep factor in Section 4 + const FfxFloat32 fHalfViewportWidth = RenderSize().x * 0.5f; + FfxFloat32 fDepthThreshold = ffxMax(fCurrentDepthViewSpace, fPrevNearestDepthViewSpace); + + // WARNING: Ksep only works with reversed-z with infinite projection. + const FfxFloat32 Ksep = 1.37e-05f; + FfxFloat32 fRequiredDepthSeparation = Ksep * fDepthThreshold * TanHalfFoV() * fHalfViewportWidth; + FfxFloat32 fDepthDiff = fCurrentDepthViewSpace - fPrevNearestDepthViewSpace; + + FfxFloat32 fDepthClipFactor = (fDepthDiff > 0) ? ffxSaturate(fRequiredDepthSeparation / fDepthDiff) : 1.0f; + + return fPreviousDepthBilinearWeight * fDepthClipFactor * ffxLerp(1.0f, DepthClipBaseScale, ffxSaturate(fDepthDiff * fDepthDiff)); +} + +FfxFloat32 LoadEstimatedDepth(FfxUInt32 estimatedIndex, FfxInt32x2 iSamplePos) +{ + const FfxFloat32x2 fUv = FfxFloat32x2(iSamplePos + 0.5f) / RenderSize(); + const FfxFloat32x2 fDistortionFieldUv = SampleDistortionField(fUv); + FfxInt32x2 iDistortionPixelOffset = FfxInt32x2(fDistortionFieldUv.xy * RenderSize()); + + if (estimatedIndex == 0) + { + return LoadReconstructedDepthPreviousFrame(iSamplePos + iDistortionPixelOffset); + } + else if (estimatedIndex == 1) + { + return LoadDilatedDepth(iSamplePos + iDistortionPixelOffset); + } + + return 0; +} + +FfxFloat32 ComputeDepthClip(FfxUInt32 estimatedIndex, FfxFloat32x2 fUvSample, FfxFloat32 fCurrentDepthSample) +{ + FfxFloat32 fCurrentDepthViewSpace = ConvertFromDeviceDepthToViewSpace(fCurrentDepthSample); + BilinearSamplingData bilinearInfo = GetBilinearSamplingData(fUvSample, RenderSize()); + + FfxFloat32 fDilatedSum = 0.0f; + FfxFloat32 fDepth = 0.0f; + FfxFloat32 fWeightSum = 0.0f; + for (FfxInt32 iSampleIndex = 0; iSampleIndex < 4; iSampleIndex++) { + + const FfxInt32x2 iOffset = bilinearInfo.iOffsets[iSampleIndex]; + const FfxInt32x2 iSamplePos = bilinearInfo.iBasePos + iOffset; + + if (IsOnScreen(iSamplePos, RenderSize())) { + const FfxFloat32 fWeight = bilinearInfo.fWeights[iSampleIndex]; + if (fWeight > fReconstructedDepthBilinearWeightThreshold) { + + const FfxFloat32 fPrevDepthSample = LoadEstimatedDepth(estimatedIndex, iSamplePos); + const FfxFloat32 fPrevNearestDepthViewSpace = ConvertFromDeviceDepthToViewSpace(fPrevDepthSample); + + const FfxFloat32 fDepthDiff = fCurrentDepthViewSpace - fPrevNearestDepthViewSpace; + + if (fDepthDiff > 0.0f) { + +#if FFX_FRAMEINTERPOLATION_OPTION_INVERTED_DEPTH + const FfxFloat32 fPlaneDepth = ffxMin(fPrevDepthSample, fCurrentDepthSample); +#else + const FfxFloat32 fPlaneDepth = ffxMax(fPrevDepthSample, fCurrentDepthSample); +#endif + + const FfxFloat32x3 fCenter = GetViewSpacePosition(FfxInt32x2(RenderSize() * 0.5f), RenderSize(), fPlaneDepth); + const FfxFloat32x3 fCorner = GetViewSpacePosition(FfxInt32x2(0, 0), RenderSize(), fPlaneDepth); + + const FfxFloat32 fHalfViewportWidth = length(FfxFloat32x2(RenderSize())); + const FfxFloat32 fDepthThreshold = ffxMin(fCurrentDepthViewSpace, fPrevNearestDepthViewSpace); + + const FfxFloat32 Ksep = 1.37e-05f; + const FfxFloat32 Kfov = length(fCorner) / length(fCenter); + const FfxFloat32 fRequiredDepthSeparation = Ksep * Kfov * fHalfViewportWidth * fDepthThreshold; + + const FfxFloat32 fResolutionFactor = ffxSaturate(length(FfxFloat32x2(RenderSize())) / length(FfxFloat32x2(1920.0f, 1080.0f))); + const FfxFloat32 fPower = ffxLerp(1.0f, 3.0f, fResolutionFactor); + + fDepth += FfxFloat32((fRequiredDepthSeparation / fDepthDiff) >= 1.0f) * fWeight; + fWeightSum += fWeight; + } + } + } + } + + return (fWeightSum > 0.0f) ? ffxSaturate(1.0f - fDepth / fWeightSum) : 0.0f; +} + +void computeDisocclusionMask(FfxInt32x2 iPxPos) +{ + FfxFloat32 fDilatedDepth = LoadEstimatedInterpolationFrameDepth(iPxPos); + + FfxFloat32x2 fDepthUv = (iPxPos + 0.5f) / RenderSize(); + FfxFloat32 fCurrentDepthViewSpace = ConvertFromDeviceDepthToViewSpace(fDilatedDepth); + + VectorFieldEntry gameMv; + LoadInpaintedGameFieldMv(fDepthUv, gameMv); + + const FfxFloat32 fDepthClipInterpolatedToPrevious = 1.0f - ComputeDepthClip(0, fDepthUv + gameMv.fMotionVector, fDilatedDepth); + const FfxFloat32 fDepthClipInterpolatedToCurrent = 1.0f - ComputeDepthClip(1, fDepthUv - gameMv.fMotionVector, fDilatedDepth); + FfxFloat32x2 fDisocclusionMask = FfxFloat32x2(fDepthClipInterpolatedToPrevious, fDepthClipInterpolatedToCurrent); + + fDisocclusionMask = FfxFloat32x2(FFX_GREATER_THAN_EQUAL(fDisocclusionMask, ffxBroadcast2(FFX_FRAMEINTERPOLATION_EPSILON))); + + // Avoid false disocclusion if primary game vector pointer outside screen area + const FfxFloat32x2 fSrcMotionVector = gameMv.fMotionVector * 2.0f; + const FfxInt32x2 iSamplePosPrevious = FfxInt32x2((fDepthUv + fSrcMotionVector) * RenderSize()); + fDisocclusionMask.x = ffxSaturate(fDisocclusionMask.x + FfxFloat32(!IsOnScreen(iSamplePosPrevious, RenderSize()))); + + const FfxInt32x2 iSamplePosCurrent = FfxInt32x2((fDepthUv - fSrcMotionVector) * RenderSize()); + fDisocclusionMask.y = ffxSaturate(fDisocclusionMask.y + FfxFloat32(!IsOnScreen(iSamplePosCurrent, RenderSize()))); + + StoreDisocclusionMask(iPxPos, fDisocclusionMask); + +} + +#endif // FFX_FRAMEINTERPOLATION_DISOCCLUSION_MASK_H diff --git a/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_game_motion_vector_field.h b/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_game_motion_vector_field.h new file mode 100644 index 000000000000..574e3dff2670 --- /dev/null +++ b/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_game_motion_vector_field.h @@ -0,0 +1,123 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifndef FFX_FRAMEINTERPOLATION_GAME_MOTION_VECTOR_FIELD_H +#define FFX_FRAMEINTERPOLATION_GAME_MOTION_VECTOR_FIELD_H + +FfxUInt32 getPriorityFactorFromViewSpaceDepth(FfxFloat32 fViewSpaceDepthInMeters) +{ + fViewSpaceDepthInMeters = ffxPow(fViewSpaceDepthInMeters, 0.33f); + + FfxUInt32 uPriorityFactor = FfxUInt32(FfxFloat32(1 - (fViewSpaceDepthInMeters * (1.0f / (1.0f + fViewSpaceDepthInMeters)))) * PRIORITY_HIGH_MAX); + + return ffxMax(1, uPriorityFactor); +} + +void computeGameFieldMvs(FfxInt32x2 iPxPos) +{ + const FfxFloat32x2 fUvInScreenSpace = (FfxFloat32x2(iPxPos) + 0.5f) / RenderSize(); + + const FfxFloat32x2 fDistortionFieldUv = SampleDistortionField(fUvInScreenSpace); + FfxInt32x2 iDistortionPixelOffset = FfxInt32x2(fDistortionFieldUv.xy * RenderSize()); + + const FfxFloat32x2 fUvInInterpolationRectStart = FfxFloat32x2(InterpolationRectBase()) / DisplaySize(); + const FfxFloat32x2 fUvLetterBoxScale = FfxFloat32x2(InterpolationRectSize()) / DisplaySize(); + const FfxFloat32x2 fUvInInterpolationRect = fUvInInterpolationRectStart + fUvInScreenSpace * fUvLetterBoxScale; + + const FfxFloat32 fDepthSample = LoadDilatedDepth(iPxPos + iDistortionPixelOffset); + const FfxFloat32x2 fGameMotionVector = LoadDilatedMotionVector(iPxPos + iDistortionPixelOffset); + const FfxFloat32x2 fMotionVectorHalf = fGameMotionVector * 0.5f; + const FfxFloat32x2 fInterpolatedLocationUv = fUvInScreenSpace + fMotionVectorHalf; + + const FfxFloat32 fViewSpaceDepth = ConvertFromDeviceDepthToViewSpace(fDepthSample); + const FfxUInt32 uHighPriorityFactorPrimary = getPriorityFactorFromViewSpaceDepth(fViewSpaceDepth); + + // pixel position in current frame + Game Motion Vector -> pixel position in previous frame + FfxFloat32x3 prevBackbufferCol = SamplePreviousBackbuffer(fUvInInterpolationRect+ fGameMotionVector * fUvLetterBoxScale).xyz; //returns color of current frame's pixel in previous frame buffer + FfxFloat32x3 curBackbufferCol = SampleCurrentBackbuffer(fUvInInterpolationRect).xyz; // returns color of current frame's pixel in current frame buffer + FfxFloat32 prevLuma = 0.001f + RawRGBToLuminance(prevBackbufferCol); + FfxFloat32 currLuma = 0.001f + RawRGBToLuminance(curBackbufferCol); + + FfxUInt32 uLowPriorityFactor = FfxUInt32(ffxRound(ffxPow(MinDividedByMax(prevLuma, currLuma), 1.0f / 1.0f) * PRIORITY_LOW_MAX)) + * FfxUInt32(IsUvInside(fUvInInterpolationRect + fGameMotionVector * fUvLetterBoxScale)); + + // Update primary motion vectors + { + const FfxUInt32x2 packedVectorPrimary = PackVectorFieldEntries(true, uHighPriorityFactorPrimary, uLowPriorityFactor, fMotionVectorHalf); + + BilinearSamplingData bilinearInfo = GetBilinearSamplingData(fInterpolatedLocationUv, RenderSize()); + for (FfxInt32 iSampleIndex = 0; iSampleIndex < 4; iSampleIndex++) + { + const FfxInt32x2 iOffset = bilinearInfo.iOffsets[iSampleIndex]; + const FfxInt32x2 iSamplePos = bilinearInfo.iBasePos + iOffset; + + if (IsOnScreen(iSamplePos, RenderSize())) + { + UpdateGameMotionVectorField(iSamplePos, packedVectorPrimary); + } + } + } + + // Update secondary vectors + // Main purpose of secondary vectors is to improve quality of inpainted vectors + const FfxBoolean bWriteSecondaryVectors = length(fMotionVectorHalf * RenderSize()) > FFX_FRAMEINTERPOLATION_EPSILON; + if (bWriteSecondaryVectors) + { + FfxBoolean bWriteSecondary = true; + FfxUInt32 uNumPrimaryHits = 0; + const FfxFloat32 fSecondaryStepScale = length(1.0f / RenderSize()); + const FfxFloat32x2 fStepMv = normalize(fGameMotionVector); + const FfxFloat32 fBreakDist = ffxMin(length(fMotionVectorHalf), length(FfxFloat32x2(0.5f, 0.5f))); + + for (FfxFloat32 fMvScale = fSecondaryStepScale; fMvScale <= fBreakDist && bWriteSecondary; fMvScale += fSecondaryStepScale) + { + const FfxFloat32x2 fSecondaryLocationUv = fInterpolatedLocationUv - fStepMv * fMvScale; + BilinearSamplingData bilinearInfo = GetBilinearSamplingData(fSecondaryLocationUv, RenderSize()); + + // Reverse depth prio for secondary vectors + FfxUInt32 uHighPriorityFactorSecondary = ffxMax(1, PRIORITY_HIGH_MAX - uHighPriorityFactorPrimary); + + const FfxFloat32x2 fToCenter = normalize(FfxFloat32x2(0.5f, 0.5f) - fSecondaryLocationUv); + uLowPriorityFactor = FfxUInt32(ffxMax(0.0f, dot(fToCenter, fStepMv)) * PRIORITY_LOW_MAX); + const FfxUInt32x2 packedVectorSecondary = PackVectorFieldEntries(false, uHighPriorityFactorSecondary, uLowPriorityFactor, fMotionVectorHalf); + + // Only write secondary mvs to single bilinear location + for (FfxInt32 iSampleIndex = 0; iSampleIndex < 1; iSampleIndex++) + { + const FfxInt32x2 iOffset = bilinearInfo.iOffsets[iSampleIndex]; + const FfxInt32x2 iSamplePos = bilinearInfo.iBasePos + iOffset; + + bWriteSecondary = bWriteSecondary && IsOnScreen(iSamplePos, RenderSize()); + + if (bWriteSecondary) + { + const FfxUInt32 uExistingVectorFieldEntry = UpdateGameMotionVectorFieldEx(iSamplePos, packedVectorSecondary); + + uNumPrimaryHits += FfxUInt32(PackedVectorFieldEntryIsPrimary(uExistingVectorFieldEntry)); + bWriteSecondary = bWriteSecondary && (uNumPrimaryHits <= 3); + } + } + } + } +} + +#endif // FFX_FRAMEINTERPOLATION_GAME_MOTION_VECTOR_FIELD_H diff --git a/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_inpainting.h b/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_inpainting.h new file mode 100644 index 000000000000..7ad3630e0eab --- /dev/null +++ b/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_inpainting.h @@ -0,0 +1,150 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifndef FFX_FRAMEINTERPOLATION_INPAINTING_H +#define FFX_FRAMEINTERPOLATION_INPAINTING_H + +FfxFloat32x4 ComputeInpaintingLevel(FfxFloat32x2 fUv, const FfxInt32 iMipLevel, const FfxInt32x2 iTexSize) +{ + BilinearSamplingData bilinearInfo = GetBilinearSamplingData(fUv, iTexSize); + + FfxFloat32x4 fColor = FfxFloat32x4(0.0, 0.0, 0.0, 0.0); + + for (FfxInt32 iSampleIndex = 0; iSampleIndex < 4; iSampleIndex++) { + + const FfxInt32x2 iOffset = bilinearInfo.iOffsets[iSampleIndex]; + const FfxInt32x2 iSamplePos = bilinearInfo.iBasePos + iOffset; + + if (IsOnScreen(iSamplePos, iTexSize)) { + + FfxFloat32x4 fSample = LoadInpaintingPyramid(iMipLevel, iSamplePos); + + const FfxFloat32 fWeight = bilinearInfo.fWeights[iSampleIndex] * FfxFloat32(fSample.w > 0.0f); + + fColor += FfxFloat32x4(fSample.rgb * fWeight, fWeight); + } + } + + return fColor; +} + +FfxFloat32x3 ComputeInpainting(FfxInt32x2 iPxPos) +{ + FfxFloat32x2 fUv = (iPxPos + 0.5f) / (DisplaySize()); + + FfxFloat32x4 fColor = FfxFloat32x4(0.0, 0.0, 0.0, 0.0); + FfxFloat32 fWeightSum = 0.0f; + FfxInt32x2 iTexSize = DisplaySize(); + + for (FfxInt32 iMipLevel = 0; iMipLevel < 10; iMipLevel++) { + + iTexSize /= 2; + + FfxFloat32x4 fMipColor = ComputeInpaintingLevel(fUv, iMipLevel, iTexSize); + + if (fMipColor.w > 0.0f) { + const FfxFloat32x3 fNormalizedMipColor = fMipColor.rgb / fMipColor.w; + const FfxFloat32 fMipWeight = ffxPow(1.0f - iMipLevel / 10.0f, 3.0f) * fMipColor.w; + + fColor += FfxFloat32x4(fNormalizedMipColor, 1.0f) * fMipWeight; + } + } + + return fColor.rgb / fColor.w; +} + +void drawDebugTearLines(FfxInt32x2 iPxPos, inout FfxFloat32x3 fColor, inout FfxBoolean bWriteColor) +{ + if (iPxPos.x < 16) + { + fColor.g = 1.f; + bWriteColor = true; + } + else if (iPxPos.x > DisplaySize().x - 16) + { + fColor += GetDebugBarColor(); + bWriteColor = true; + } + +} + +void drawDebugResetIndicators(FfxInt32x2 iPxPos, inout FfxFloat32x3 fColor, inout FfxBoolean bWriteColor) +{ + if (iPxPos.y < 32 && Reset()) + { + fColor.r = 1.f; + bWriteColor = true; + } + else if (iPxPos.y > 32 && iPxPos.y < 64 && HasSceneChanged()) + { + fColor.b = 1.f; + bWriteColor = true; + } +} + +void computeInpainting(FfxInt32x2 iPxPos) +{ + FfxBoolean bWriteColor = false; + FfxFloat32x4 fInterpolatedColor = RWLoadFrameinterpolationOutput(iPxPos); + + const FfxFloat32 fInPaintingWeight = fInterpolatedColor.w; + if (fInPaintingWeight > FFX_FRAMEINTERPOLATION_EPSILON) + { + fInterpolatedColor.rgb = ffxLerp(fInterpolatedColor.rgb, ComputeInpainting(iPxPos) * FfxFloat32(DisplaySize().x > 0), fInPaintingWeight); + bWriteColor = true; + } + + if (GetHUDLessAttachedFactor() == 1) + { + const FfxFloat32x3 fCurrentInterpolationSource = LoadCurrentBackbuffer(iPxPos).rgb; + const FfxFloat32x3 fPresentColor = LoadPresentBackbuffer(iPxPos).rgb; + + if (any(FFX_GREATER_THAN(abs(fCurrentInterpolationSource - fPresentColor), FfxFloat32x3(0.0, 0.0, 0.0)))) + { + const FfxFloat32 fStaticFactor = CalculateStaticContentFactor(RawRGBToLinear(fCurrentInterpolationSource), RawRGBToLinear(fPresentColor)); + + if (fStaticFactor > FFX_FRAMEINTERPOLATION_EPSILON) + { + fInterpolatedColor.rgb = ffxLerp(fInterpolatedColor.rgb, fPresentColor, fStaticFactor); + bWriteColor = true; + } + } + } + + if ((GetDispatchFlags() & FFX_FRAMEINTERPOLATION_DISPATCH_DRAW_DEBUG_TEAR_LINES) != 0) + { + drawDebugTearLines(iPxPos, fInterpolatedColor.rgb, bWriteColor); + } + + if ((GetDispatchFlags() & FFX_FRAMEINTERPOLATION_DISPATCH_DRAW_DEBUG_RESET_INDICATORS) != 0) + { + drawDebugResetIndicators(iPxPos, fInterpolatedColor.rgb, bWriteColor); + } + + if (bWriteColor) + { + StoreFrameinterpolationOutput(iPxPos, FfxFloat32x4(fInterpolatedColor.rgb, 1.0f)); + } + +} + +#endif // FFX_FRAMEINTERPOLATION_INPAINTING_H diff --git a/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_optical_flow_vector_field.h b/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_optical_flow_vector_field.h new file mode 100644 index 000000000000..8432132604d1 --- /dev/null +++ b/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_optical_flow_vector_field.h @@ -0,0 +1,123 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifndef FFX_FRAMEINTERPOLATION_OPTICAL_FLOW_VECTOR_FIELD_H +#define FFX_FRAMEINTERPOLATION_OPTICAL_FLOW_VECTOR_FIELD_H + +void computeOpticalFlowFieldMvs(FfxUInt32x2 dtID, FfxFloat32x2 fOpticalFlowVector) +{ + FfxFloat32x2 fUv = FfxFloat32x2(FfxFloat32x2(dtID)+0.5f) / GetOpticalFlowSize2(); + + const FfxFloat32 scaleFactor = 1.0f; + FfxFloat32x2 fMotionVectorHalf = fOpticalFlowVector * 0.5f; + + // pixel position in current frame + fOpticalFlowVector-> pixel position in previous frame + FfxFloat32x3 prevBackbufferCol = SamplePreviousBackbuffer(fUv + fOpticalFlowVector).xyz; // returns previous backbuffer color of current frame pixel position in previous frame + FfxFloat32x3 curBackbufferCol = SampleCurrentBackbuffer(fUv).xyz; // returns current backbuffer color at current frame pixel position + + FfxFloat32 prevLuma = 0.001f + RawRGBToLuminance(prevBackbufferCol); + FfxFloat32 currLuma = 0.001f + RawRGBToLuminance(curBackbufferCol); + + FfxFloat32 fVelocity = length(fOpticalFlowVector * InterpolationRectSize()); + FfxUInt32 uHighPriorityFactor = FfxUInt32(fVelocity > 1.0f) * FfxUInt32(ffxSaturate(fVelocity / length(InterpolationRectSize() * 0.05f)) * PRIORITY_HIGH_MAX); + + if(uHighPriorityFactor > 0) { + FfxUInt32 uLowPriorityFactor = FfxUInt32(ffxRound(ffxPow(MinDividedByMax(prevLuma, currLuma), 1.0f / 1.0f) * PRIORITY_LOW_MAX)) + * FfxUInt32(IsUvInside(fUv + fOpticalFlowVector)); + + // Project current depth into previous frame locations. + // Push to all pixels having some contribution if reprojection is using bilinear logic. + + const FfxUInt32x2 packedVectorPrimary = PackVectorFieldEntries(true, uHighPriorityFactor, uLowPriorityFactor, fMotionVectorHalf); + + BilinearSamplingData bilinearInfo = GetBilinearSamplingData(fUv + fMotionVectorHalf, GetOpticalFlowSize2()); + for (FfxInt32 iSampleIndex = 0; iSampleIndex < 4; iSampleIndex++) + { + const FfxInt32x2 iOffset = bilinearInfo.iOffsets[iSampleIndex]; + const FfxInt32x2 iSamplePos = bilinearInfo.iBasePos + iOffset; + + if (IsOnScreen(iSamplePos, GetOpticalFlowSize2())) + { + UpdateOpticalflowMotionVectorField(iSamplePos, packedVectorPrimary); + } + } + } +} + +void computeOpticalFlowVectorField(FfxInt32x2 iPxPos) +{ + FfxFloat32x2 fOpticalFlowVector = FfxFloat32x2(0.0, 0.0); + FfxFloat32x2 fOpticalFlowVector3x3Avg = FfxFloat32x2(0.0, 0.0); + FfxInt32 size = 1; + FfxFloat32 sw = 0.0f; + + for(FfxInt32 y = -size; y <= size; y++) { + for(FfxInt32 x = -size; x <= size; x++) { + + FfxInt32x2 samplePos = iPxPos + FfxInt32x2(x, y); + + FfxFloat32x2 vs = LoadOpticalFlow(samplePos); + FfxFloat32 fConfidenceFactor = ffxMax(FFX_FRAMEINTERPOLATION_EPSILON, LoadOpticalFlowConfidence(samplePos)); + + + FfxFloat32 len = length(vs * InterpolationRectSize()); + FfxFloat32 len_factor = ffxMax(0.0f, 512.0f - len) * FfxFloat32(len > 1.0f); + FfxFloat32 w = len_factor; + + fOpticalFlowVector3x3Avg += vs * w; + + sw += w; + } + } + + fOpticalFlowVector3x3Avg /= sw; + + + sw = 0.0f; + for(FfxInt32 y = -size; y <= size; y++) { + for(FfxInt32 x = -size; x <= size; x++) { + + FfxInt32x2 samplePos = iPxPos + FfxInt32x2(x, y); + + FfxFloat32x2 vs = LoadOpticalFlow(samplePos); + + FfxFloat32 fConfidenceFactor = ffxMax(FFX_FRAMEINTERPOLATION_EPSILON, LoadOpticalFlowConfidence(samplePos)); + FfxFloat32 len = length(vs * InterpolationRectSize()); + FfxFloat32 len_factor = ffxMax(0.0f, 512.0f - len) * FfxFloat32(len > 1.0f); + + + FfxFloat32 w = ffxMax(0.0f, ffxPow(dot(fOpticalFlowVector3x3Avg, vs), 1.25f)) * len_factor; + + fOpticalFlowVector += vs * w; + sw += w; + } + } + + if (sw > FFX_FRAMEINTERPOLATION_EPSILON) + { + fOpticalFlowVector /= sw; + } + + computeOpticalFlowFieldMvs(iPxPos, fOpticalFlowVector); +} + +#endif // FFX_FRAMEINTERPOLATION_OPTICAL_FLOW_VECTOR_FIELD_H diff --git a/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_reconstruct_dilated_velocity_and_previous_depth.h b/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_reconstruct_dilated_velocity_and_previous_depth.h new file mode 100644 index 000000000000..e92a57900a6b --- /dev/null +++ b/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_reconstruct_dilated_velocity_and_previous_depth.h @@ -0,0 +1,123 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifndef FFX_FRAMEINTERPOLATION_RECONSTRUCT_DILATED_VELOCITY_AND_PREVIOUS_DEPTH_H +#define FFX_FRAMEINTERPOLATION_RECONSTRUCT_DILATED_VELOCITY_AND_PREVIOUS_DEPTH_H + +void ReconstructPrevDepth(FfxInt32x2 iPxPos, FfxFloat32 fDepth, FfxFloat32x2 fMotionVector, FfxInt32x2 iPxDepthSize) +{ + fMotionVector *= FfxFloat32(length(fMotionVector * DisplaySize()) > 0.1f); + + FfxFloat32x2 fUv = (iPxPos + FfxFloat32(0.5)) / iPxDepthSize; + FfxFloat32x2 fReprojectedUv = fUv + fMotionVector; + + BilinearSamplingData bilinearInfo = GetBilinearSamplingData(fReprojectedUv, RenderSize()); + + // Project current depth into previous frame locations. + // Push to all pixels having some contribution if reprojection is using bilinear logic. + for (FfxInt32 iSampleIndex = 0; iSampleIndex < 4; iSampleIndex++) { + + const FfxInt32x2 iOffset = bilinearInfo.iOffsets[iSampleIndex]; + FfxFloat32 fWeight = bilinearInfo.fWeights[iSampleIndex]; + + if (fWeight > fReconstructedDepthBilinearWeightThreshold) { + + FfxInt32x2 iStorePos = bilinearInfo.iBasePos + iOffset; + if (IsOnScreen(iStorePos, iPxDepthSize)) { + UpdateReconstructedDepthPreviousFrame(iStorePos, fDepth); + } + } + } +} + +void FindNearestDepth(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxInt32x2 iPxSize, FFX_PARAMETER_OUT FfxFloat32 fNearestDepth, FFX_PARAMETER_OUT FfxInt32x2 fNearestDepthCoord) +{ + const FfxInt32 iSampleCount = 9; + const FfxInt32x2 iSampleOffsets[iSampleCount] = { + FfxInt32x2(+0, +0), + FfxInt32x2(+1, +0), + FfxInt32x2(+0, +1), + FfxInt32x2(+0, -1), + FfxInt32x2(-1, +0), + FfxInt32x2(-1, +1), + FfxInt32x2(+1, +1), + FfxInt32x2(-1, -1), + FfxInt32x2(+1, -1), + }; + + // pull out the depth loads to allow SC to batch them + FfxFloat32 depth[9]; + FfxInt32 iSampleIndex = 0; + FFX_UNROLL + for (iSampleIndex = 0; iSampleIndex < iSampleCount; ++iSampleIndex) { + + FfxInt32x2 iPos = iPxPos + iSampleOffsets[iSampleIndex]; + depth[iSampleIndex] = LoadInputDepth(iPos); + } + + // find closest depth + fNearestDepthCoord = iPxPos; + fNearestDepth = depth[0]; + FFX_UNROLL + for (iSampleIndex = 1; iSampleIndex < iSampleCount; ++iSampleIndex) { + + FfxInt32x2 iPos = iPxPos + iSampleOffsets[iSampleIndex]; + if (IsOnScreen(iPos, iPxSize)) { + + FfxFloat32 fNdDepth = depth[iSampleIndex]; +#if FFX_FRAMEINTERPOLATION_OPTION_INVERTED_DEPTH + if (fNdDepth > fNearestDepth) { +#else + if (fNdDepth < fNearestDepth) { +#endif + fNearestDepthCoord = iPos; + fNearestDepth = fNdDepth; + } + } + } +} + +void ReconstructAndDilate(FfxInt32x2 iPxLrPos) +{ + FfxFloat32 fDilatedDepth; + FfxInt32x2 iNearestDepthCoord; + + FindNearestDepth(iPxLrPos, RenderSize(), fDilatedDepth, iNearestDepthCoord); + +#if FFX_FRAMEINTERPOLATION_OPTION_LOW_RES_MOTION_VECTORS + FfxInt32x2 iSamplePos = iPxLrPos; + FfxInt32x2 iMotionVectorPos = iNearestDepthCoord; +#else + FfxInt32x2 iSamplePos = ComputeHrPosFromLrPos(iPxLrPos); + FfxInt32x2 iMotionVectorPos = ComputeHrPosFromLrPos(iNearestDepthCoord); +#endif + + FfxFloat32x2 fDilatedMotionVector = LoadInputMotionVector(iMotionVectorPos); + + StoreDilatedDepth(iPxLrPos, fDilatedDepth); + StoreDilatedMotionVectors(iPxLrPos, fDilatedMotionVector); + + ReconstructPrevDepth(iPxLrPos, fDilatedDepth, fDilatedMotionVector, RenderSize()); +} + + +#endif //!defined( FFX_FRAMEINTERPOLATION_RECONSTRUCT_DILATED_VELOCITY_AND_PREVIOUS_DEPTH_H ) diff --git a/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_reconstruct_previous_depth.h b/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_reconstruct_previous_depth.h new file mode 100644 index 000000000000..d85f67312c59 --- /dev/null +++ b/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_reconstruct_previous_depth.h @@ -0,0 +1,63 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifndef FFX_FRAMEINTERPOLATION_RECONSTRUCT_PREVIOUS_DEPTH_H +#define FFX_FRAMEINTERPOLATION_RECONSTRUCT_PREVIOUS_DEPTH_H + +void ReconstructPrevDepth(FfxInt32x2 iPxPos, FfxUInt32 depthTarget, FfxFloat32 fDepth, FfxFloat32x2 fMotionVector, FfxInt32x2 iPxDepthSize) +{ + const FfxFloat32x2 fUv = (iPxPos + FfxFloat32(0.5)) / iPxDepthSize; + + // Project current depth into previous frame locations. + // Push to all pixels having some contribution if reprojection is using bilinear logic. + BilinearSamplingData bilinearInfo = GetBilinearSamplingData(fUv + fMotionVector, RenderSize()); + for (FfxInt32 iSampleIndex = 0; iSampleIndex < 4; iSampleIndex++) + { + const FfxInt32x2 iOffset = bilinearInfo.iOffsets[iSampleIndex]; + const FfxInt32x2 iSamplePos = bilinearInfo.iBasePos + iOffset; + const FfxFloat32 fSampleWeight = bilinearInfo.fWeights[iSampleIndex]; + + if (fSampleWeight > fReconstructedDepthBilinearWeightThreshold) + { + if (IsOnScreen(iSamplePos, RenderSize())) + { + if (depthTarget != 0) { + UpdateReconstructedDepthInterpolatedFrame(iSamplePos, fDepth); + } + } + } + } +} + +void reconstructPreviousDepth(FfxInt32x2 iPxPos) +{ + const FfxFloat32x2 fUv = (iPxPos + FfxFloat32(0.5f)) / RenderSize(); + const FfxFloat32x2 fDistortionFieldUv = SampleDistortionField(fUv); + FfxInt32x2 iDistortionPixelOffset = FfxInt32x2(fDistortionFieldUv.xy * RenderSize()); + + FfxFloat32x2 fMotionVector = LoadDilatedMotionVector(iPxPos + iDistortionPixelOffset); + FfxFloat32 fDilatedDepth = LoadDilatedDepth(iPxPos + iDistortionPixelOffset); + + ReconstructPrevDepth(iPxPos, 1, fDilatedDepth, fMotionVector * 0.5f, RenderSize()); +} + +#endif // FFX_FRAMEINTERPOLATION_RECONSTRUCT_PREVIOUS_DEPTH_H diff --git a/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_resources.h b/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_resources.h new file mode 100644 index 000000000000..5505f6d15869 --- /dev/null +++ b/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_resources.h @@ -0,0 +1,95 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifndef FFX_FRAMEINTERPOLATION_RESOURCES_H +#define FFX_FRAMEINTERPOLATION_RESOURCES_H + +#if defined(FFX_CPU) || defined(FFX_GPU) +#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_NULL 0 +#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OUTPUT 1 +//#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_BACKBUFFER 2 +#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_PREVIOUS_INTERPOLATION_SOURCE 3 +#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_CURRENT_INTERPOLATION_SOURCE 4 + +#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DEPTH 5 +#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_MOTION_VECTORS 6 + +#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_RESERVED_2 7 +#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_RESERVED_3 8 +#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_RESERVED_4 9 + +#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DILATED_DEPTH 10 +#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS 11 +#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_RECONSTRUCTED_DEPTH_PREVIOUS_FRAME 12 +#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_RECONSTRUCTED_DEPTH_INTERPOLATED_FRAME 13 +#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DISOCCLUSION_MASK 14 + +#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_GAME_MOTION_VECTOR_FIELD_X 15 +#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_GAME_MOTION_VECTOR_FIELD_Y 16 + +#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OPTICAL_FLOW_MOTION_VECTOR_FIELD_X 17 +#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OPTICAL_FLOW_MOTION_VECTOR_FIELD_Y 18 + +#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OPTICAL_FLOW_VECTOR 19 +#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_RESERVED_5 20 +#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OPTICAL_FLOW_CONFIDENCE 21 +#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OPTICAL_FLOW_GLOBAL_MOTION 22 +#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_OPTICAL_FLOW_SCENE_CHANGE_DETECTION 23 + +#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_RESERVED_6 25 +#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_RESERVED_7 26 + +#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DEBUG_OUTPUT_0 27 +#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DEBUG_OUTPUT_1 28 +#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DEBUG_OUTPUT 29 + +#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_MASK 30 +#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_PRESENT_BACKBUFFER 31 + +#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_COUNTERS 32 +#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID 33 // same as FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID_MIPMAP_0 +#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID_MIPMAP_0 33 +#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID_MIPMAP_1 34 +#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID_MIPMAP_2 35 +#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID_MIPMAP_3 36 +#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID_MIPMAP_4 37 +#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID_MIPMAP_5 38 +#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID_MIPMAP_6 39 +#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID_MIPMAP_7 40 +#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID_MIPMAP_8 41 +#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID_MIPMAP_9 42 +#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID_MIPMAP_10 43 +#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID_MIPMAP_11 44 +#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_INPAINTING_PYRAMID_MIPMAP_12 45 + +#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DEFAULT_DISTORTION_FIELD 46 +#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_DISTORTION_FIELD 47 + +#define FFX_FRAMEINTERPOLATION_RESOURCE_IDENTIFIER_COUNT 48 + +#define FFX_FRAMEINTERPOLATION_CONSTANTBUFFER_IDENTIFIER 0 +#define FFX_FRAMEINTERPOLATION_INPAINTING_PYRAMID_CONSTANTBUFFER_IDENTIFIER 1 +#define FFX_FRAMEINTERPOLATION_CONSTANTBUFFER_COUNT 2 + +#endif // #if defined(FFX_CPU) || defined(FFX_GPU) + +#endif //!defined( FFX_FRAMEINTERPOLATION_RESOURCES_H ) diff --git a/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_setup.h b/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_setup.h new file mode 100644 index 000000000000..60dd32377621 --- /dev/null +++ b/thirdparty/amd-ffx/gpu/frameinterpolation/ffx_frameinterpolation_setup.h @@ -0,0 +1,50 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifndef FFX_FRAMEINTERPOLATION_SETUP_H +#define FFX_FRAMEINTERPOLATION_SETUP_H + +void setupFrameinterpolationResources(FfxInt32x2 iPxPos) +{ + // Update reset counters + StoreCounter(COUNTER_SPD, 0); + if (all(FFX_EQUAL(iPxPos, FfxInt32x2(0, 0)))) + { + if(Reset() || HasSceneChanged()) { + StoreCounter(COUNTER_FRAME_INDEX_SINCE_LAST_RESET, 0); + } else { + FfxUInt32 counter = RWLoadCounter(COUNTER_FRAME_INDEX_SINCE_LAST_RESET); + StoreCounter(COUNTER_FRAME_INDEX_SINCE_LAST_RESET, counter + 1); + } + } + + // Reset resources + StoreGameMotionVectorFieldX(iPxPos, 0); + StoreGameMotionVectorFieldY(iPxPos, 0); + + StoreOpticalflowMotionVectorFieldX(iPxPos, 0); + StoreOpticalflowMotionVectorFieldY(iPxPos, 0); + + StoreDisocclusionMask(iPxPos, FfxFloat32x2(0.0, 0.0)); +} + +#endif // FFX_FRAMEINTERPOLATION_SETUP_H diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr1.h b/thirdparty/amd-ffx/gpu/fsr1/ffx_fsr1.h similarity index 97% rename from thirdparty/amd-fsr2/shaders/ffx_fsr1.h rename to thirdparty/amd-ffx/gpu/fsr1/ffx_fsr1.h index 1ac23cf3de3d..82ebf21fed29 100644 --- a/thirdparty/amd-fsr2/shaders/ffx_fsr1.h +++ b/thirdparty/amd-ffx/gpu/fsr1/ffx_fsr1.h @@ -1,13 +1,14 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. -// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// // Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal +// of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell // copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: +// furnished to do so, subject to the following conditions : +// // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // @@ -19,9 +20,10 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. -#ifdef __clang__ -#pragma clang diagnostic ignored "-Wunused-variable" -#endif +/// @defgroup FfxGPUFsr1 FidelityFX FSR1 +/// FidelityFX Super Resolution 1 GPU documentation +/// +/// @ingroup FfxGPUEffects /// Setup required constant values for EASU (works on CPU or GPU). /// @@ -36,7 +38,7 @@ /// @param [in] outputSizeInPixelsX The display resolution which the input image gets upscaled to in X dimension. /// @param [in] outputSizeInPixelsY The display resolution which the input image gets upscaled to in Y dimension. /// -/// @ingroup FSR1 +/// @ingroup FfxGPUFsr1 FFX_STATIC void ffxFsrPopulateEasuConstants( FFX_PARAMETER_INOUT FfxUInt32x4 con0, FFX_PARAMETER_INOUT FfxUInt32x4 con1, @@ -102,7 +104,7 @@ FFX_STATIC void ffxFsrPopulateEasuConstants( /// @param [in] inputOffsetInPixelsX The input image offset in the X dimension into the resource containing it (useful for dynamic resolution). /// @param [in] inputOffsetInPixelsY The input image offset in the Y dimension into the resource containing it (useful for dynamic resolution). /// -/// @ingroup FSR1 +/// @ingroup FfxGPUFsr1 FFX_STATIC void ffxFsrPopulateEasuConstantsOffset( FFX_PARAMETER_INOUT FfxUInt32x4 con0, FFX_PARAMETER_INOUT FfxUInt32x4 con1, @@ -329,7 +331,7 @@ void ffxFsrEasuFloat( // Normalize with approximation, and cleanup close to zero. FfxFloat32x2 dir2 = dir * dir; FfxFloat32 dirR = dir2.x + dir2.y; - FfxUInt32 zro = dirR < FfxFloat32(1.0 / 32768.0); + FfxBoolean zro = dirR < FfxFloat32(1.0 / 32768.0); dirR = ffxApproximateReciprocalSquareRoot(dirR); dirR = zro ? FfxFloat32(1.0) : dirR; dir.x = zro ? FfxFloat32(1.0) : dir.x; @@ -382,7 +384,7 @@ void ffxFsrEasuFloat( fsrEasuTapFloat(aC, aW, FfxFloat32x2(0.0, 2.0) - pp, dir, len2, lob, clp, FfxFloat32x3(zzonR.w, zzonG.w, zzonB.w)); // n // Normalize and dering. - pix = ffxMin(max4, max(min4, aC * ffxBroadcast3(rcp(aW)))); + pix = ffxMin(max4, max(min4, aC * ffxBroadcast3(ffxReciprocal(aW)))); } #endif // #if defined(FFX_GPU) && defined(FFX_FSR_EASU_FLOAT) @@ -526,8 +528,8 @@ void FsrEasuH( dirPY, lenP, ppp, - FfxUInt32(true), - FfxUInt32(false), + FfxBoolean(true), + FfxBoolean(false), FfxFloat16x2(bL, cL), FfxFloat16x2(eL, fL), FfxFloat16x2(fL, gL), @@ -537,8 +539,8 @@ void FsrEasuH( dirPY, lenP, ppp, - FfxUInt32(false), - FfxUInt32(true), + FfxBoolean(false), + FfxBoolean(true), FfxFloat16x2(fL, gL), FfxFloat16x2(iL, jL), FfxFloat16x2(jL, kL), @@ -549,7 +551,7 @@ void FsrEasuH( FfxFloat16x2 dir2 = dir * dir; FfxFloat16 dirR = dir2.x + dir2.y; - FfxBoolean zro = FfxBoolean(dirR < FFX_BROADCAST_FLOAT16(1.0 / 32768.0)); + FfxUInt32 zro = FfxUInt32(dirR < FFX_BROADCAST_FLOAT16(1.0 / 32768.0)); dirR = ffxApproximateReciprocalSquareRootHalf(dirR); dirR = (zro > 0) ? FFX_BROADCAST_FLOAT16(1.0) : dirR; dir.x = (zro > 0) ? FFX_BROADCAST_FLOAT16(1.0) : dir.x; @@ -664,7 +666,7 @@ void FsrEasuH( sharpness = exp2(-sharpness); FfxFloat32x2 hSharp = {sharpness, sharpness}; con[0] = ffxAsUInt32(sharpness); - con[1] = packHalf2x16(hSharp); + con[1] = ffxPackHalf2x16(hSharp); con[2] = 0; con[3] = 0; } @@ -746,12 +748,12 @@ void FsrEasuH( // Immediate constants for peak range. FfxFloat32x2 peakC = FfxFloat32x2(1.0, -1.0 * 4.0); // Limiters, these need to be high precision RCPs. - FfxFloat32 hitMinR = mn4R * rcp(FfxFloat32(4.0) * mx4R); - FfxFloat32 hitMinG = mn4G * rcp(FfxFloat32(4.0) * mx4G); - FfxFloat32 hitMinB = mn4B * rcp(FfxFloat32(4.0) * mx4B); - FfxFloat32 hitMaxR = (peakC.x - mx4R) * rcp(FfxFloat32(4.0) * mn4R + peakC.y); - FfxFloat32 hitMaxG = (peakC.x - mx4G) * rcp(FfxFloat32(4.0) * mn4G + peakC.y); - FfxFloat32 hitMaxB = (peakC.x - mx4B) * rcp(FfxFloat32(4.0) * mn4B + peakC.y); + FfxFloat32 hitMinR = mn4R * ffxReciprocal(FfxFloat32(4.0) * mx4R); + FfxFloat32 hitMinG = mn4G * ffxReciprocal(FfxFloat32(4.0) * mx4G); + FfxFloat32 hitMinB = mn4B * ffxReciprocal(FfxFloat32(4.0) * mx4B); + FfxFloat32 hitMaxR = (peakC.x - mx4R) * ffxReciprocal(FfxFloat32(4.0) * mn4R + peakC.y); + FfxFloat32 hitMaxG = (peakC.x - mx4G) * ffxReciprocal(FfxFloat32(4.0) * mn4G + peakC.y); + FfxFloat32 hitMaxB = (peakC.x - mx4B) * ffxReciprocal(FfxFloat32(4.0) * mn4B + peakC.y); FfxFloat32 lobeR = max(-hitMinR, hitMaxR); FfxFloat32 lobeG = max(-hitMinG, hitMaxG); FfxFloat32 lobeB = max(-hitMinB, hitMaxB); @@ -1050,10 +1052,10 @@ void FsrEasuH( #if defined(FFX_GPU) void FsrSrtmF(inout FfxFloat32x3 c) { - c *= ffxBroadcast3(rcp(ffxMax3(c.r, c.g, c.b) + FfxFloat32(1.0))); + c *= ffxBroadcast3(ffxReciprocal(ffxMax3(c.r, c.g, c.b) + FfxFloat32(1.0))); } // The extra max solves the c=1.0 case (which is a /0). - void FsrSrtmInvF(inout FfxFloat32x3 c){c*=ffxBroadcast3(rcp(max(FfxFloat32(1.0/32768.0),FfxFloat32(1.0)-ffxMax3(c.r,c.g,c.b))));} + void FsrSrtmInvF(inout FfxFloat32x3 c){c*=ffxBroadcast3(ffxReciprocal(max(FfxFloat32(1.0/32768.0),FfxFloat32(1.0)-ffxMax3(c.r,c.g,c.b))));} #endif //============================================================================================================================== #if defined(FFX_GPU )&& FFX_HALF == 1 diff --git a/thirdparty/amd-ffx/gpu/fsr1/ffx_fsr1_callbacks_glsl.h b/thirdparty/amd-ffx/gpu/fsr1/ffx_fsr1_callbacks_glsl.h new file mode 100644 index 000000000000..1fd8ed7df764 --- /dev/null +++ b/thirdparty/amd-ffx/gpu/fsr1/ffx_fsr1_callbacks_glsl.h @@ -0,0 +1,221 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "ffx_fsr1_resources.h" + +#if defined(FFX_GPU) +#include "../ffx_core.h" + +#ifndef FFX_PREFER_WAVE64 +#define FFX_PREFER_WAVE64 +#endif // #ifndef FFX_PREFER_WAVE64 + +#if defined(FSR1_BIND_CB_FSR1) + layout (set = 0, binding = FSR1_BIND_CB_FSR1, std140) uniform cbFSR1_t + { + FfxUInt32x4 const0; + FfxUInt32x4 const1; + FfxUInt32x4 const2; + FfxUInt32x4 const3; + FfxUInt32x4 sample0; + } cbFSR1; +#endif + +FfxUInt32x4 Const0() +{ + return cbFSR1.const0; +} + +FfxUInt32x4 Const1() +{ + return cbFSR1.const1; +} + +FfxUInt32x4 Const2() +{ + return cbFSR1.const2; +} + +FfxUInt32x4 Const3() +{ + return cbFSR1.const3; +} + +FfxUInt32x4 EASUSample() +{ + return cbFSR1.sample0; +} + +FfxUInt32x4 RCasSample() +{ + return cbFSR1.sample0; +} + +FfxUInt32x4 RCasConfig() +{ + return cbFSR1.const0; +} + +// GODOT BEGINS +// Godot DX12 backend doesn't support binding numbers larger than 1000, so we have to remap them. +layout (set = 0, binding = 100 /*1000*/) uniform sampler s_LinearClamp; +// GODOT ENDS + +// SRVs +#if defined FSR1_BIND_SRV_INPUT_COLOR + layout (set = 0, binding = FSR1_BIND_SRV_INPUT_COLOR) uniform texture2D r_input_color; +#endif +#if defined FSR1_BIND_SRV_INTERNAL_UPSCALED_COLOR + layout (set = 0, binding = FSR1_BIND_SRV_INTERNAL_UPSCALED_COLOR) uniform texture2D r_internal_upscaled_color; +#endif +#if defined FSR1_BIND_SRV_UPSCALED_OUTPUT + layout (set = 0, binding = FSR1_BIND_SRV_UPSCALED_OUTPUT) uniform texture2D r_upscaled_output; +#endif + +// UAV declarations +#if defined FSR1_BIND_UAV_INPUT_COLOR + layout (set = 0, binding = FSR1_BIND_UAV_INPUT_COLOR, rgba32f) uniform image2D rw_input_color; +#endif +#if defined FSR1_BIND_UAV_INTERNAL_UPSCALED_COLOR + layout (set = 0, binding = FSR1_BIND_UAV_INTERNAL_UPSCALED_COLOR, rgba32f) uniform image2D rw_internal_upscaled_color; +#endif +#if defined FSR1_BIND_UAV_UPSCALED_OUTPUT + layout (set = 0, binding = FSR1_BIND_UAV_UPSCALED_OUTPUT, rgba32f) uniform image2D rw_upscaled_output; +#endif + +#if FFX_HALF + +#if defined(FSR1_BIND_SRV_INPUT_COLOR) + FfxFloat16x4 GatherEasuRed(FfxFloat32x2 fPxPos) + { + return FfxFloat16x4(textureGather(sampler2D(r_input_color, s_LinearClamp), fPxPos, 0)); + } +#endif // defined(FSR1_BIND_SRV_INPUT_COLOR) + +#if defined(FSR1_BIND_SRV_INPUT_COLOR) + FfxFloat16x4 GatherEasuGreen(FfxFloat32x2 fPxPos) + { + return FfxFloat16x4(textureGather(sampler2D(r_input_color, s_LinearClamp), fPxPos, 1)); + } +#endif // defined(FSR1_BIND_SRV_INPUT_COLOR) + +#if defined(FSR1_BIND_SRV_INPUT_COLOR) + FfxFloat16x4 GatherEasuBlue(FfxFloat32x2 fPxPos) + { + return FfxFloat16x4(textureGather(sampler2D(r_input_color, s_LinearClamp), fPxPos, 2)); + } +#endif // defined(FSR1_BIND_SRV_INPUT_COLOR) + +#if FFX_FSR1_OPTION_APPLY_RCAS + #if defined(FSR1_BIND_UAV_INTERNAL_UPSCALED_COLOR) + void StoreEASUOutput(FfxUInt32x2 iPxPos, FfxFloat16x3 fColor) + { + imageStore(rw_internal_upscaled_color, FfxInt32x2(iPxPos), FfxFloat32x4(fColor, 1.f)); + } + #endif // #if defined(FSR1_BIND_UAV_INTERNAL_UPSCALED_COLOR) +#else + #if defined(FSR1_BIND_UAV_UPSCALED_OUTPUT) + void StoreEASUOutput(FfxUInt32x2 iPxPos, FfxFloat16x3 fColor) + { + imageStore(rw_upscaled_output, FfxInt32x2(iPxPos), FfxFloat32x4(fColor, 1.f)); + } + #endif // #if defined(FSR1_BIND_UAV_UPSCALED_OUTPUT) +#endif // #if FFX_FSR1_OPTION_APPLY_RCAS + +#if defined(FSR1_BIND_SRV_INTERNAL_UPSCALED_COLOR) + FfxFloat16x4 LoadRCas_Input(FfxInt16x2 iPxPos) + { + return FfxFloat16x4(texelFetch(r_internal_upscaled_color, FfxInt32x2(iPxPos), 0)); + } +#endif // defined(FSR1_BIND_UAV_INTERNAL_UPSCALED_COLOR) + +#if defined(FSR1_BIND_UAV_UPSCALED_OUTPUT) + void StoreRCasOutputHx2(FfxInt16x2 iPxPos, FfxFloat16x2 fColorR, FfxFloat16x2 fColorG, FfxFloat16x2 fColorB, FfxFloat16x2 fColorA) + { + imageStore(rw_upscaled_output, FfxInt32x2(iPxPos), FfxFloat32x4(fColorR.x, fColorG.x, fColorB.x, fColorA.x)); + iPxPos.x += FfxInt16(8); + imageStore(rw_upscaled_output, FfxInt32x2(iPxPos), FfxFloat32x4(fColorR.y, fColorG.y, fColorB.y, fColorA.y)); + } + +#endif // defined(FSR1_BIND_UAV_UPSCALED_OUTPUT) + +#else // FFX_HALF + +#if defined(FSR1_BIND_SRV_INPUT_COLOR) + FfxFloat32x4 GatherEasuRed(FfxFloat32x2 fPxPos) + { + return textureGather(sampler2D(r_input_color, s_LinearClamp), fPxPos, 0); + } +#endif // defined(FSR1_BIND_SRV_INPUT_COLOR) + +#if defined(FSR1_BIND_SRV_INPUT_COLOR) + FfxFloat32x4 GatherEasuGreen(FfxFloat32x2 fPxPos) + { + return textureGather(sampler2D(r_input_color, s_LinearClamp), fPxPos, 1); + } +#endif // defined(FSR1_BIND_SRV_INPUT_COLOR) + +#if defined(FSR1_BIND_SRV_INPUT_COLOR) + FfxFloat32x4 GatherEasuBlue(FfxFloat32x2 fPxPos) + { + return textureGather(sampler2D(r_input_color, s_LinearClamp), fPxPos, 2); + } +#endif // defined(FSR1_BIND_SRV_INPUT_COLOR) + +#if FFX_FSR1_OPTION_APPLY_RCAS + #if defined(FSR1_BIND_UAV_INTERNAL_UPSCALED_COLOR) + void StoreEASUOutput(FfxUInt32x2 iPxPos, FfxFloat32x3 fColor) + { + imageStore(rw_internal_upscaled_color, FfxInt32x2(iPxPos), FfxFloat32x4(fColor, 1.f)); + } + #endif // #if defined(FSR1_BIND_UAV_INTERNAL_UPSCALED_COLOR) +#else + #if defined(FSR1_BIND_UAV_UPSCALED_OUTPUT) + void StoreEASUOutput(FfxUInt32x2 iPxPos, FfxFloat32x3 fColor) + { + imageStore(rw_upscaled_output, FfxInt32x2(iPxPos), FfxFloat32x4(fColor, 1.f)); + } + #endif // #if defined(FSR1_BIND_UAV_UPSCALED_OUTPUT) +#endif // #if FFX_FSR1_OPTION_APPLY_RCAS + +#if defined(FSR1_BIND_SRV_INTERNAL_UPSCALED_COLOR) + FfxFloat32x4 LoadRCas_Input(FfxInt32x2 iPxPos) + { + return texelFetch(r_internal_upscaled_color, iPxPos, 0); + } +#endif // defined(FSR1_BIND_UAV_INTERNAL_UPSCALED_COLOR) + +#if defined(FSR1_BIND_UAV_UPSCALED_OUTPUT) + void StoreRCasOutput(FfxInt32x2 iPxPos, FfxFloat32x4 fColor) + { + imageStore(rw_upscaled_output, iPxPos, fColor); + } + + void StoreRCasOutput(FfxInt32x2 iPxPos, FfxFloat32x3 fColor) + { + StoreRCasOutput(iPxPos, FfxFloat32x4(fColor, 1.f)); + } +#endif // defined(FSR1_BIND_UAV_UPSCALED_OUTPUT) + +#endif // FFX_HALF + +#endif // #if defined(FFX_GPU) diff --git a/thirdparty/amd-ffx/gpu/fsr1/ffx_fsr1_callbacks_hlsl.h b/thirdparty/amd-ffx/gpu/fsr1/ffx_fsr1_callbacks_hlsl.h new file mode 100644 index 000000000000..03e609337fcd --- /dev/null +++ b/thirdparty/amd-ffx/gpu/fsr1/ffx_fsr1_callbacks_hlsl.h @@ -0,0 +1,257 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "ffx_fsr1_resources.h" + +#if defined(FFX_GPU) +#ifdef __hlsl_dx_compiler +#pragma dxc diagnostic push +#pragma dxc diagnostic ignored "-Wambig-lit-shift" +#endif //__hlsl_dx_compiler +#include "ffx_core.h" +#ifdef __hlsl_dx_compiler +#pragma dxc diagnostic pop +#endif //__hlsl_dx_compiler + +#ifndef FFX_PREFER_WAVE64 +#define FFX_PREFER_WAVE64 +#endif // #ifndef FFX_PREFER_WAVE64 + +#pragma warning(disable: 3205) // conversion from larger type to smaller + +#define DECLARE_SRV_REGISTER(regIndex) t##regIndex +#define DECLARE_UAV_REGISTER(regIndex) u##regIndex +#define DECLARE_CB_REGISTER(regIndex) b##regIndex +#define FFX_FSR1_DECLARE_SRV(regIndex) register(DECLARE_SRV_REGISTER(regIndex)) +#define FFX_FSR1_DECLARE_UAV(regIndex) register(DECLARE_UAV_REGISTER(regIndex)) +#define FFX_FSR1_DECLARE_CB(regIndex) register(DECLARE_CB_REGISTER(regIndex)) + +#if defined(FSR1_BIND_CB_FSR1) + cbuffer cbFSR1 : FFX_FSR1_DECLARE_CB(FSR1_BIND_CB_FSR1) + { + FfxUInt32x4 const0; + FfxUInt32x4 const1; + FfxUInt32x4 const2; + FfxUInt32x4 const3; + FfxUInt32x4 sample; + #define FFX_FSR1_CONSTANT_BUFFER_1_SIZE 20 // Number of 32-bit values. This must be kept in sync with the cbFSR1 size. + }; +#else + #define const0 0 + #define const1 0 + #define const2 0 + #define const3 0 + #define sample 0 +#endif + +#if defined(FFX_GPU) +#define FFX_FSR1_ROOTSIG_STRINGIFY(p) FFX_FSR1_ROOTSIG_STR(p) +#define FFX_FSR1_ROOTSIG_STR(p) #p +#define FFX_FSR1_ROOTSIG [RootSignature( "DescriptorTable(UAV(u0, numDescriptors = " FFX_FSR1_ROOTSIG_STRINGIFY(FFX_FSR1_RESOURCE_IDENTIFIER_COUNT) ")), " \ + "DescriptorTable(SRV(t0, numDescriptors = " FFX_FSR1_ROOTSIG_STRINGIFY(FFX_FSR1_RESOURCE_IDENTIFIER_COUNT) ")), " \ + "CBV(b0), " \ + "StaticSampler(s0, filter = FILTER_MIN_MAG_MIP_LINEAR, " \ + "addressU = TEXTURE_ADDRESS_CLAMP, " \ + "addressV = TEXTURE_ADDRESS_CLAMP, " \ + "addressW = TEXTURE_ADDRESS_CLAMP, " \ + "comparisonFunc = COMPARISON_NEVER, " \ + "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK)" )] + +#if defined(FFX_FSR1_EMBED_ROOTSIG) +#define FFX_FSR1_EMBED_ROOTSIG_CONTENT FFX_FSR1_ROOTSIG +#else +#define FFX_FSR1_EMBED_ROOTSIG_CONTENT +#endif // #if FFX_FSR1_EMBED_ROOTSIG +#endif // #if defined(FFX_GPU) + + +FfxUInt32x4 Const0() +{ + return const0; +} + +FfxUInt32x4 Const1() +{ + return const1; +} + +FfxUInt32x4 Const2() +{ + return const2; +} + +FfxUInt32x4 Const3() +{ + return const3; +} + +FfxUInt32x4 EASUSample() +{ + return sample; +} + +FfxUInt32x4 RCasSample() +{ + return sample; +} + +FfxUInt32x4 RCasConfig() +{ + return const0; +} + +SamplerState s_LinearClamp : register(s0); + + // SRVs + #if defined FSR1_BIND_SRV_INPUT_COLOR + Texture2D r_input_color : FFX_FSR1_DECLARE_SRV(FSR1_BIND_SRV_INPUT_COLOR); + #endif + #if defined FSR1_BIND_SRV_INTERNAL_UPSCALED_COLOR + Texture2D r_internal_upscaled_color : FFX_FSR1_DECLARE_SRV(FSR1_BIND_SRV_INTERNAL_UPSCALED_COLOR); + #endif + #if defined FSR1_BIND_SRV_UPSCALED_OUTPUT + Texture2D r_upscaled_output : FFX_FSR1_DECLARE_SRV(FSR1_BIND_SRV_UPSCALED_OUTPUT); + #endif + + // UAV declarations + #if defined FSR1_BIND_UAV_INPUT_COLOR + RWTexture2D rw_input_color : FFX_FSR1_DECLARE_UAV(FSR1_BIND_UAV_INPUT_COLOR); + #endif + #if defined FSR1_BIND_UAV_INTERNAL_UPSCALED_COLOR + RWTexture2D rw_internal_upscaled_color : FFX_FSR1_DECLARE_UAV(FSR1_BIND_UAV_INTERNAL_UPSCALED_COLOR); + #endif + #if defined FSR1_BIND_UAV_UPSCALED_OUTPUT + RWTexture2D rw_upscaled_output : FFX_FSR1_DECLARE_UAV(FSR1_BIND_UAV_UPSCALED_OUTPUT); + #endif + +#if FFX_HALF + +#if defined(FSR1_BIND_SRV_INPUT_COLOR) + FfxFloat16x4 GatherEasuRed(FfxFloat32x2 fPxPos) + { + return (FfxFloat16x4)r_input_color.GatherRed(s_LinearClamp, fPxPos, FfxInt32x2(0,0)); + } +#endif // defined(FSR1_BIND_SRV_INPUT_COLOR) + +#if defined(FSR1_BIND_SRV_INPUT_COLOR) + FfxFloat16x4 GatherEasuGreen(FfxFloat32x2 fPxPos) + { + return (FfxFloat16x4)r_input_color.GatherGreen(s_LinearClamp, fPxPos, FfxInt32x2(0, 0)); + } +#endif // defined(FSR1_BIND_SRV_INPUT_COLOR) + +#if defined(FSR1_BIND_SRV_INPUT_COLOR) + FfxFloat16x4 GatherEasuBlue(FfxFloat32x2 fPxPos) + { + return (FfxFloat16x4)r_input_color.GatherBlue(s_LinearClamp, fPxPos, FfxInt32x2(0, 0)); + } +#endif // defined(FSR1_BIND_SRV_INPUT_COLOR) + +#if FFX_FSR1_OPTION_APPLY_RCAS + #if defined(FSR1_BIND_UAV_INTERNAL_UPSCALED_COLOR) + void StoreEASUOutput(FfxUInt32x2 iPxPos, FfxFloat16x3 fColor) + { + rw_internal_upscaled_color[iPxPos] = FfxFloat32x4(fColor, 1.f); + } + #endif // #if defined(FSR1_BIND_UAV_INTERNAL_UPSCALED_COLOR) +#else + #if defined(FSR1_BIND_UAV_UPSCALED_OUTPUT) + void StoreEASUOutput(FfxUInt32x2 iPxPos, FfxFloat16x3 fColor) + { + rw_upscaled_output[iPxPos] = FfxFloat32x4(fColor, 1.f); + } + #endif // #if defined(FSR1_BIND_UAV_UPSCALED_OUTPUT) +#endif // #if FFX_FSR1_OPTION_APPLY_RCAS + +#if defined(FSR1_BIND_SRV_INTERNAL_UPSCALED_COLOR) + FfxFloat16x4 LoadRCas_Input(FfxInt16x2 iPxPos) + { + return (FfxFloat16x4)r_internal_upscaled_color[iPxPos]; + } +#endif // defined(FSR1_BIND_UAV_INTERNAL_UPSCALED_COLOR) + +#if defined(FSR1_BIND_UAV_UPSCALED_OUTPUT) + void StoreRCasOutputHx2(FfxInt16x2 iPxPos, FfxFloat16x2 fColorR, FfxFloat16x2 fColorG, FfxFloat16x2 fColorB, FfxFloat16x2 fColorA) + { + rw_upscaled_output[iPxPos] = FfxFloat32x4(fColorR.x, fColorG.x, fColorB.x, fColorA.x); + iPxPos.x += 8; + rw_upscaled_output[iPxPos] = FfxFloat32x4(fColorR.y, fColorG.y, fColorB.y, fColorA.y); + } +#endif // defined(FSR1_BIND_UAV_UPSCALED_OUTPUT) + +#else // FFX_HALF + +#if defined(FSR1_BIND_SRV_INPUT_COLOR) + FfxFloat32x4 GatherEasuRed(FfxFloat32x2 fPxPos) + { + return r_input_color.GatherRed(s_LinearClamp, fPxPos, FfxInt32x2(0, 0)); + } +#endif // defined(FSR1_BIND_SRV_INPUT_COLOR) + +#if defined(FSR1_BIND_SRV_INPUT_COLOR) + FfxFloat32x4 GatherEasuGreen(FfxFloat32x2 fPxPos) + { + return r_input_color.GatherGreen(s_LinearClamp, fPxPos, FfxInt32x2(0, 0)); + } +#endif // defined(FSR1_BIND_SRV_INPUT_COLOR) + +#if defined(FSR1_BIND_SRV_INPUT_COLOR) + FfxFloat32x4 GatherEasuBlue(FfxFloat32x2 fPxPos) + { + return r_input_color.GatherBlue(s_LinearClamp, fPxPos, FfxInt32x2(0, 0)); + } +#endif // defined(FSR1_BIND_SRV_INPUT_COLOR) + + +#if FFX_FSR1_OPTION_APPLY_RCAS + #if defined(FSR1_BIND_UAV_INTERNAL_UPSCALED_COLOR) + void StoreEASUOutput(FfxUInt32x2 iPxPos, FfxFloat32x3 fColor) + { + rw_internal_upscaled_color[iPxPos] = FfxFloat32x4(fColor, 1.f); + } + #endif // #if defined(FSR1_BIND_UAV_INTERNAL_UPSCALED_COLOR) +#else + #if defined(FSR1_BIND_UAV_UPSCALED_OUTPUT) + void StoreEASUOutput(FfxUInt32x2 iPxPos, FfxFloat32x3 fColor) + { + rw_upscaled_output[iPxPos] = FfxFloat32x4(fColor, 1.f); + } + #endif // #if defined(FSR1_BIND_UAV_UPSCALED_OUTPUT) +#endif // #if FFX_FSR1_OPTION_APPLY_RCAS + +#if defined(FSR1_BIND_SRV_INTERNAL_UPSCALED_COLOR) + FfxFloat32x4 LoadRCas_Input(FfxInt32x2 iPxPos) + { + return r_internal_upscaled_color[iPxPos]; + } +#endif // defined(FSR1_BIND_UAV_INTERNAL_UPSCALED_COLOR) + +#if defined(FSR1_BIND_UAV_UPSCALED_OUTPUT) + void StoreRCasOutput(FfxInt32x2 iPxPos, FfxFloat32x4 fColor) + { + rw_upscaled_output[iPxPos] = fColor; + } +#endif // defined(FSR1_BIND_UAV_UPSCALED_OUTPUT) + +#endif // FFX_HALF + +#endif // #if defined(FFX_GPU) diff --git a/thirdparty/amd-ffx/gpu/fsr1/ffx_fsr1_easu.h b/thirdparty/amd-ffx/gpu/fsr1/ffx_fsr1_easu.h new file mode 100644 index 000000000000..0aebee2ce8a7 --- /dev/null +++ b/thirdparty/amd-ffx/gpu/fsr1/ffx_fsr1_easu.h @@ -0,0 +1,98 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#define GROUP_SIZE 8 +#define FSR_RCAS_DENOISE 1 + +#include "../ffx_core.h" + +#if FFX_HALF + + #define FFX_FSR_EASU_HALF 1 + FfxFloat16x4 FsrEasuRH(FfxFloat32x2 p) { return GatherEasuRed(p); } + FfxFloat16x4 FsrEasuGH(FfxFloat32x2 p) { return GatherEasuGreen(p); } + FfxFloat16x4 FsrEasuBH(FfxFloat32x2 p) { return GatherEasuBlue(p); } + +#else + + #define FFX_FSR_EASU_FLOAT 1 + FfxFloat32x4 FsrEasuRF(FfxFloat32x2 p) { return GatherEasuRed(p); } + FfxFloat32x4 FsrEasuGF(FfxFloat32x2 p) { return GatherEasuGreen(p); } + FfxFloat32x4 FsrEasuBF(FfxFloat32x2 p) { return GatherEasuBlue(p); } + +#endif // FFX_HALF + +#if FFX_FSR1_OPTION_RCAS_PASSTHROUGH_ALPHA + #define FSR_RCAS_PASSTHROUGH_ALPHA +#endif // FFX_FSR1_OPTION_RCAS_PASSTHROUGH_ALPHA + +#include "ffx_fsr1.h" + +void CurrFilter(FfxUInt32x2 pos) +{ +#if FFX_HALF + + FfxFloat16x3 c; + FsrEasuH(c, pos, Const0(), Const1(), Const2(), Const3()); + if (EASUSample().x == 1) + { + c *= c; + } + +#if FFX_FSR1_OPTION_SRGB_CONVERSIONS + // Apply gamma if this is an sRGB format (auto-degamma'd on sampler read) + c = pow(c, FfxFloat16x3(1.0 / 2.2, 1.0 / 2.2, 1.0 / 2.2)); +#endif // FFX_FSR1_OPTION_SRGB_CONVERSIONS + + StoreEASUOutput(pos, c); + +#else + + FfxFloat32x3 c; + ffxFsrEasuFloat(c, pos, Const0(), Const1(), Const2(), Const3()); + if (EASUSample().x == 1) + { + c *= c; + } + +#if FFX_FSR1_OPTION_SRGB_CONVERSIONS + // Apply gamma if this is an sRGB format (auto-degamma'd on sampler read) + c = pow(c, FfxFloat32x3(1.f / 2.2f, 1.f / 2.2f, 1.f / 2.2f)); +#endif // FFX_FSR1_OPTION_SRGB_CONVERSIONS + + StoreEASUOutput(pos, c); + +#endif // FFX_HALF +} + +void EASU(FfxUInt32x3 LocalThreadId, FfxUInt32x3 WorkGroupId, FfxUInt32x3 Dtid) +{ + // Do remapping of local xy in workgroup for a more PS-like swizzle pattern. + FfxUInt32x2 gxy = ffxRemapForQuad(LocalThreadId.x) + FfxUInt32x2(WorkGroupId.x << 4u, WorkGroupId.y << 4u); + CurrFilter(gxy); + gxy.x += 8u; + CurrFilter(gxy); + gxy.y += 8u; + CurrFilter(gxy); + gxy.x -= 8u; + CurrFilter(gxy); +} diff --git a/thirdparty/amd-ffx/gpu/fsr1/ffx_fsr1_rcas.h b/thirdparty/amd-ffx/gpu/fsr1/ffx_fsr1_rcas.h new file mode 100644 index 000000000000..60b3d3cafc3d --- /dev/null +++ b/thirdparty/amd-ffx/gpu/fsr1/ffx_fsr1_rcas.h @@ -0,0 +1,116 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#define GROUP_SIZE 8 +#define FSR_RCAS_DENOISE 1 + +#include "../ffx_core.h" + +#if FFX_HALF + #define FSR_RCAS_HX2 1 + FfxFloat16x4 FsrRcasLoadHx2(FfxInt16x2 p) + { + return LoadRCas_Input(p); + } + void FsrRcasInputHx2(inout FfxFloat16x2 r,inout FfxFloat16x2 g,inout FfxFloat16x2 b) {} +#else + #define FSR_RCAS_F 1 + FfxFloat32x4 FsrRcasLoadF(FfxInt32x2 p) + { + return LoadRCas_Input(p); + } + void FsrRcasInputF(inout FfxFloat32 r, inout FfxFloat32 g, inout FfxFloat32 b) {} +#endif // FFX_HALF + +#if FFX_FSR1_OPTION_RCAS_PASSTHROUGH_ALPHA + #define FSR_RCAS_PASSTHROUGH_ALPHA +#endif // FFX_FSR1_OPTION_RCAS_PASSTHROUGH_ALPHA + +#include "ffx_fsr1.h" + +void CurrFilter(FFX_MIN16_U2 pos) +{ +#if FFX_HALF + +#if FFX_FSR1_OPTION_RCAS_PASSTHROUGH_ALPHA + FfxFloat16x2 cr, cg, cb, ca; + FsrRcasHx2(cr, cg, cb, ca, pos, RCasConfig()); +#else + FfxFloat16x2 cr, cg, cb; + FsrRcasHx2(cr, cg, cb, pos, RCasConfig()); +#endif // FFX_FSR1_OPTION_RCAS_PASSTHROUGH_ALPHA + + if (RCasSample().x == 1) + { + cr *= cr; + cg *= cg; + cb *= cb; + } + +#if FFX_FSR1_OPTION_RCAS_PASSTHROUGH_ALPHA + StoreRCasOutputHx2(FfxInt16x2(pos), cr, cg, cb, ca); +#else + StoreRCasOutputHx2(FfxInt16x2(pos), cr, cg, cb, FfxFloat16x2(1.0, 1.0)); +#endif + +#else + +#if FFX_FSR1_OPTION_RCAS_PASSTHROUGH_ALPHA + FfxFloat32x4 c; + FsrRcasF(c.r, c.g, c.b, c.a, pos, RCasConfig()); +#else + FfxFloat32x3 c; + FsrRcasF(c.r, c.g, c.b, pos, RCasConfig()); +#endif // FFX_FSR1_OPTION_RCAS_PASSTHROUGH_ALPHA + if (RCasSample().x == 1) + { + c *= c; + } + +#if FFX_FSR1_OPTION_RCAS_PASSTHROUGH_ALPHA + StoreRCasOutput(FfxInt32x2(pos), c); +#else + StoreRCasOutput(FfxInt32x2(pos), FfxFloat32x4(c, 1.0)); +#endif + +#endif +} + +void RCAS(FfxUInt32x3 LocalThreadId, FfxUInt32x3 WorkGroupId, FfxUInt32x3 Dtid) +{ + // Do remapping of local xy in workgroup for a more PS-like swizzle pattern. + FfxUInt32x2 gxy = ffxRemapForQuad(LocalThreadId.x) + FfxUInt32x2(WorkGroupId.x << 4u, WorkGroupId.y << 4u); +#if FFX_HALF + // packed version process left and right 8x8 tile, in total 16x8 region + CurrFilter(FFX_MIN16_U2(gxy)); + gxy.y += 8u; + CurrFilter(FFX_MIN16_U2(gxy)); +#else + CurrFilter(FFX_MIN16_U2(gxy)); + gxy.x += 8u; + CurrFilter(FFX_MIN16_U2(gxy)); + gxy.y += 8u; + CurrFilter(FFX_MIN16_U2(gxy)); + gxy.x -= 8u; + CurrFilter(FFX_MIN16_U2(gxy)); +#endif +} diff --git a/thirdparty/amd-ffx/gpu/fsr1/ffx_fsr1_resources.h b/thirdparty/amd-ffx/gpu/fsr1/ffx_fsr1_resources.h new file mode 100644 index 000000000000..eb74d7433d75 --- /dev/null +++ b/thirdparty/amd-ffx/gpu/fsr1/ffx_fsr1_resources.h @@ -0,0 +1,38 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifndef FFX_FSR1_RESOURCES_H +#define FFX_FSR1_RESOURCES_H + +#if defined(FFX_CPU) || defined(FFX_GPU) +#define FFX_FSR1_RESOURCE_IDENTIFIER_NULL 0 +#define FFX_FSR1_RESOURCE_IDENTIFIER_INPUT_COLOR 1 +#define FFX_FSR1_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR 2 +#define FFX_FSR1_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT 3 + +#define FFX_FSR1_RESOURCE_IDENTIFIER_COUNT 4 + +#define FFX_FSR1_CONSTANTBUFFER_IDENTIFIER_FSR1 0 + +#endif // #if defined(FFX_CPU) || defined(FFX_GPU) + +#endif //!defined( FFX_FSR1_RESOURCES_H ) diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_accumulate.h b/thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_accumulate.h similarity index 95% rename from thirdparty/amd-fsr2/shaders/ffx_fsr2_accumulate.h rename to thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_accumulate.h index 7bd5892cb90f..71b9d26c10c4 100644 --- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_accumulate.h +++ b/thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_accumulate.h @@ -1,13 +1,14 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. -// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// // Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal +// of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell // copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: +// furnished to do so, subject to the following conditions : +// // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // @@ -35,7 +36,7 @@ FFX_MIN16_F GetPxHrVelocity(FFX_MIN16_F2 fMotionVector) void Accumulate(const AccumulationPassCommonParams params, FFX_PARAMETER_INOUT FfxFloat32x3 fHistoryColor, FfxFloat32x3 fAccumulation, FFX_PARAMETER_IN FfxFloat32x4 fUpsampledColorAndWeight) { - // Aviod invalid values when accumulation and upsampled weight is 0 + // Avoid invalid values when accumulation and upsampled weight is 0 fAccumulation = ffxMax(FSR2_EPSILON.xxx, fAccumulation + fUpsampledColorAndWeight.www); #if FFX_FSR2_OPTION_HDR_COLOR_INPUT @@ -65,8 +66,8 @@ void RectifyHistory( { FfxFloat32 fScaleFactorInfluence = ffxMin(20.0f, ffxPow(FfxFloat32(1.0f / length(DownscaleFactor().x * DownscaleFactor().y)), 3.0f)); - const FfxFloat32 fVecolityFactor = ffxSaturate(params.fHrVelocity / 20.0f); - const FfxFloat32 fBoxScaleT = ffxMax(params.fDepthClipFactor, ffxMax(params.fAccumulationMask, fVecolityFactor)); + const FfxFloat32 fHrVelocityFactor = ffxSaturate(params.fHrVelocity / 20.0f); + const FfxFloat32 fBoxScaleT = ffxMax(params.fDepthClipFactor, ffxMax(params.fAccumulationMask, fHrVelocityFactor)); FfxFloat32 fBoxScale = ffxLerp(fScaleFactorInfluence, 1.0f, fBoxScaleT); FfxFloat32x3 fScaledBoxVec = clippingBox.boxVec * fBoxScale; @@ -157,8 +158,7 @@ FfxFloat32 ComputeLumaInstabilityFactor(const AccumulationPassCommonParams param FfxFloat32 fMin = abs(fDiffs0); - if (fMin >= fUnormThreshold) - { + if (fMin >= fUnormThreshold) { for (int i = N_MINUS_2; i <= N_MINUS_4; i++) { FfxFloat32 fDiffs1 = (fCurrentFrameLuma - fCurrentFrameLumaHistory[i]); @@ -170,7 +170,7 @@ FfxFloat32 ComputeLumaInstabilityFactor(const AccumulationPassCommonParams param } } - const FfxFloat32 fBoxSize = clippingBox.boxVec.x; + const FfxFloat32 fBoxSize = clippingBox.boxVec.x; const FfxFloat32 fBoxSizeFactor = ffxPow(ffxSaturate(fBoxSize / 0.1f), 6.0f); fLumaInstability = FfxFloat32(fMin != abs(fDiffs0)) * fBoxSizeFactor; diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_callbacks_glsl.h b/thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_callbacks_glsl.h similarity index 66% rename from thirdparty/amd-fsr2/shaders/ffx_fsr2_callbacks_glsl.h rename to thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_callbacks_glsl.h index b610037cc673..1b2e11307ed1 100644 --- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_callbacks_glsl.h +++ b/thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_callbacks_glsl.h @@ -1,13 +1,14 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (C) 2024 Advanced Micro Devices, Inc. // // Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal +// of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell // copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: +// furnished to do so, subject to the following conditions : +// // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // @@ -18,20 +19,19 @@ // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. + #include "ffx_fsr2_resources.h" #if defined(FFX_GPU) -#include "ffx_core.h" -#endif // #if defined(FFX_GPU) +#include "../ffx_core.h" -#if defined(FFX_GPU) -#ifndef FFX_FSR2_PREFER_WAVE64 -#define FFX_FSR2_PREFER_WAVE64 -#endif // #if defined(FFX_GPU) +#ifndef FFX_PREFER_WAVE64 +#define FFX_PREFER_WAVE64 +#endif // #ifndef FFX_PREFER_WAVE64 #if defined(FSR2_BIND_CB_FSR2) - layout (set = 1, binding = FSR2_BIND_CB_FSR2, std140) uniform cbFSR2_t - { + layout (set = 0, binding = FSR2_BIND_CB_FSR2, std140) uniform cbFSR2_t + { FfxInt32x2 iRenderSize; FfxInt32x2 iMaxRenderSize; FfxInt32x2 iDisplaySize; @@ -53,247 +53,359 @@ FfxFloat32 fDynamicResChangeFactor; FfxFloat32 fViewSpaceToMetersFactor; + // GODOT BEGINS FfxFloat32 fPad; mat4 mReprojectionMatrix; + // GODOT ENDS } cbFSR2; -#endif + FfxInt32x2 RenderSize() { - return cbFSR2.iRenderSize; + return cbFSR2.iRenderSize; } FfxInt32x2 MaxRenderSize() { - return cbFSR2.iMaxRenderSize; + return cbFSR2.iMaxRenderSize; } FfxInt32x2 DisplaySize() { - return cbFSR2.iDisplaySize; + return cbFSR2.iDisplaySize; } FfxInt32x2 InputColorResourceDimensions() { - return cbFSR2.iInputColorResourceDimensions; + return cbFSR2.iInputColorResourceDimensions; } FfxInt32x2 LumaMipDimensions() { - return cbFSR2.iLumaMipDimensions; + return cbFSR2.iLumaMipDimensions; } FfxInt32 LumaMipLevelToUse() { - return cbFSR2.iLumaMipLevelToUse; + return cbFSR2.iLumaMipLevelToUse; } FfxInt32 FrameIndex() { - return cbFSR2.iFrameIndex; + return cbFSR2.iFrameIndex; } FfxFloat32x4 DeviceToViewSpaceTransformFactors() { - return cbFSR2.fDeviceToViewDepth; + return cbFSR2.fDeviceToViewDepth; } FfxFloat32x2 Jitter() { - return cbFSR2.fJitter; + return cbFSR2.fJitter; } FfxFloat32x2 MotionVectorScale() { - return cbFSR2.fMotionVectorScale; + return cbFSR2.fMotionVectorScale; } FfxFloat32x2 DownscaleFactor() { - return cbFSR2.fDownscaleFactor; + return cbFSR2.fDownscaleFactor; } FfxFloat32x2 MotionVectorJitterCancellation() { - return cbFSR2.fMotionVectorJitterCancellation; + return cbFSR2.fMotionVectorJitterCancellation; } FfxFloat32 PreExposure() { - return cbFSR2.fPreExposure; + return cbFSR2.fPreExposure; } FfxFloat32 PreviousFramePreExposure() { - return cbFSR2.fPreviousFramePreExposure; + return cbFSR2.fPreviousFramePreExposure; } FfxFloat32 TanHalfFoV() { - return cbFSR2.fTanHalfFOV; + return cbFSR2.fTanHalfFOV; } FfxFloat32 JitterSequenceLength() { - return cbFSR2.fJitterSequenceLength; + return cbFSR2.fJitterSequenceLength; } FfxFloat32 DeltaTime() { - return cbFSR2.fDeltaTime; + return cbFSR2.fDeltaTime; } FfxFloat32 DynamicResChangeFactor() { - return cbFSR2.fDynamicResChangeFactor; + return cbFSR2.fDynamicResChangeFactor; } FfxFloat32 ViewSpaceToMetersFactor() { - return cbFSR2.fViewSpaceToMetersFactor; + return cbFSR2.fViewSpaceToMetersFactor; +} + +#endif // #if defined(FSR2_BIND_CB_FSR2) + + +#ifdef FSR2_BIND_CB_AUTOREACTIVE +layout(set = 0, binding = FSR2_BIND_CB_AUTOREACTIVE, std140) uniform cbGenerateReactive_t +{ + FfxFloat32 fTcThreshold; + FfxFloat32 fTcScale; + FfxFloat32 fReactiveScale; + FfxFloat32 fReactiveMax; +} cbGenerateReactive; + +FfxFloat32 TcThreshold() +{ + return cbGenerateReactive.fTcThreshold; +} + +FfxFloat32 TcScale() +{ + return cbGenerateReactive.fTcScale; +} + +FfxFloat32 ReactiveScale() +{ + return cbGenerateReactive.fReactiveScale; +} + +FfxFloat32 ReactiveMax() +{ + return cbGenerateReactive.fReactiveMax; +} +#endif // #ifdef FSR2_BIND_CB_AUTOREACTIVE + +#if defined(FSR2_BIND_CB_RCAS) +layout(set = 0, binding = FSR2_BIND_CB_RCAS, std140) uniform cbRCAS_t +{ + FfxUInt32x4 rcasConfig; +} cbRCAS; + +FfxUInt32x4 RCASConfig() +{ + return cbRCAS.rcasConfig; +} +#endif // #if defined(FSR2_BIND_CB_RCAS) + + +#if defined(FSR2_BIND_CB_REACTIVE) +layout(set = 0, binding = FSR2_BIND_CB_REACTIVE, std140) uniform cbGenerateReactive_t +{ + FfxFloat32 scale; + FfxFloat32 threshold; + FfxFloat32 binaryValue; + FfxUInt32 flags; +} cbGenerateReactive; + +FfxFloat32 GenReactiveScale() +{ + return cbGenerateReactive.scale; +} + +FfxFloat32 GenReactiveThreshold() +{ + return cbGenerateReactive.threshold; +} + +FfxFloat32 GenReactiveBinaryValue() +{ + return cbGenerateReactive.binaryValue; +} + +FfxUInt32 GenReactiveFlags() +{ + return cbGenerateReactive.flags; +} +#endif // #if defined(FSR2_BIND_CB_REACTIVE) + + +#if defined(FSR2_BIND_CB_SPD) +layout(set = 0, binding = FSR2_BIND_CB_SPD, std140) uniform cbSPD_t +{ + FfxUInt32 mips; + FfxUInt32 numWorkGroups; + FfxUInt32x2 workGroupOffset; + FfxUInt32x2 renderSize; +} cbSPD; + +FfxUInt32 MipCount() +{ + return cbSPD.mips; } -layout (set = 0, binding = 0) uniform sampler s_PointClamp; -layout (set = 0, binding = 1) uniform sampler s_LinearClamp; +FfxUInt32 NumWorkGroups() +{ + return cbSPD.numWorkGroups; +} + +FfxUInt32x2 WorkGroupOffset() +{ + return cbSPD.workGroupOffset; +} + +FfxUInt32x2 SPD_RenderSize() +{ + return cbSPD.renderSize; +} +#endif // #if defined(FSR2_BIND_CB_SPD) + +// GODOT BEGINS +// Godot DX12 backend doesn't support binding numbers larger than 1000, so we have to remap them. +layout (set = 0, binding = 100 /*1000*/) uniform sampler s_PointClamp; +layout (set = 0, binding = 101 /*1001*/) uniform sampler s_LinearClamp; +// GODOT ENDS // SRVs #if defined(FSR2_BIND_SRV_INPUT_OPAQUE_ONLY) - layout (set = 1, binding = FSR2_BIND_SRV_INPUT_OPAQUE_ONLY) uniform texture2D r_input_opaque_only; + layout (set = 0, binding = FSR2_BIND_SRV_INPUT_OPAQUE_ONLY) uniform texture2D r_input_opaque_only; #endif #if defined(FSR2_BIND_SRV_INPUT_COLOR) - layout (set = 1, binding = FSR2_BIND_SRV_INPUT_COLOR) uniform texture2D r_input_color_jittered; + layout (set = 0, binding = FSR2_BIND_SRV_INPUT_COLOR) uniform texture2D r_input_color_jittered; #endif #if defined(FSR2_BIND_SRV_INPUT_MOTION_VECTORS) - layout (set = 1, binding = FSR2_BIND_SRV_INPUT_MOTION_VECTORS) uniform texture2D r_input_motion_vectors; + layout (set = 0, binding = FSR2_BIND_SRV_INPUT_MOTION_VECTORS) uniform texture2D r_input_motion_vectors; #endif #if defined(FSR2_BIND_SRV_INPUT_DEPTH) - layout (set = 1, binding = FSR2_BIND_SRV_INPUT_DEPTH) uniform texture2D r_input_depth; + layout (set = 0, binding = FSR2_BIND_SRV_INPUT_DEPTH) uniform texture2D r_input_depth; #endif #if defined(FSR2_BIND_SRV_INPUT_EXPOSURE) - layout (set = 1, binding = FSR2_BIND_SRV_INPUT_EXPOSURE) uniform texture2D r_input_exposure; + layout (set = 0, binding = FSR2_BIND_SRV_INPUT_EXPOSURE) uniform texture2D r_input_exposure; #endif #if defined(FSR2_BIND_SRV_AUTO_EXPOSURE) - layout(set = 1, binding = FSR2_BIND_SRV_AUTO_EXPOSURE) uniform texture2D r_auto_exposure; + layout(set = 0, binding = FSR2_BIND_SRV_AUTO_EXPOSURE) uniform texture2D r_auto_exposure; #endif #if defined(FSR2_BIND_SRV_REACTIVE_MASK) - layout (set = 1, binding = FSR2_BIND_SRV_REACTIVE_MASK) uniform texture2D r_reactive_mask; + layout (set = 0, binding = FSR2_BIND_SRV_REACTIVE_MASK) uniform texture2D r_reactive_mask; #endif #if defined(FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK) - layout (set = 1, binding = FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK) uniform texture2D r_transparency_and_composition_mask; + layout (set = 0, binding = FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK) uniform texture2D r_transparency_and_composition_mask; #endif #if defined(FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH) - layout (set = 1, binding = FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH) uniform utexture2D r_reconstructed_previous_nearest_depth; + layout (set = 0, binding = FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH) uniform utexture2D r_reconstructed_previous_nearest_depth; #endif #if defined(FSR2_BIND_SRV_DILATED_MOTION_VECTORS) - layout (set = 1, binding = FSR2_BIND_SRV_DILATED_MOTION_VECTORS) uniform texture2D r_dilated_motion_vectors; + layout (set = 0, binding = FSR2_BIND_SRV_DILATED_MOTION_VECTORS) uniform texture2D r_dilated_motion_vectors; #endif #if defined (FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS) - layout(set = 1, binding = FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS) uniform texture2D r_previous_dilated_motion_vectors; + layout(set = 0, binding = FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS) uniform texture2D r_previous_dilated_motion_vectors; #endif #if defined(FSR2_BIND_SRV_DILATED_DEPTH) - layout (set = 1, binding = FSR2_BIND_SRV_DILATED_DEPTH) uniform texture2D r_dilatedDepth; + layout (set = 0, binding = FSR2_BIND_SRV_DILATED_DEPTH) uniform texture2D r_dilatedDepth; #endif #if defined(FSR2_BIND_SRV_INTERNAL_UPSCALED) - layout (set = 1, binding = FSR2_BIND_SRV_INTERNAL_UPSCALED) uniform texture2D r_internal_upscaled_color; + layout (set = 0, binding = FSR2_BIND_SRV_INTERNAL_UPSCALED) uniform texture2D r_internal_upscaled_color; #endif #if defined(FSR2_BIND_SRV_LOCK_STATUS) - layout (set = 1, binding = FSR2_BIND_SRV_LOCK_STATUS) uniform texture2D r_lock_status; + layout (set = 0, binding = FSR2_BIND_SRV_LOCK_STATUS) uniform texture2D r_lock_status; #endif #if defined(FSR2_BIND_SRV_LOCK_INPUT_LUMA) - layout (set = 1, binding = FSR2_BIND_SRV_LOCK_INPUT_LUMA) uniform texture2D r_lock_input_luma; + layout (set = 0, binding = FSR2_BIND_SRV_LOCK_INPUT_LUMA) uniform texture2D r_lock_input_luma; #endif #if defined(FSR2_BIND_SRV_NEW_LOCKS) - layout(set = 1, binding = FSR2_BIND_SRV_NEW_LOCKS) uniform texture2D r_new_locks; + layout(set = 0, binding = FSR2_BIND_SRV_NEW_LOCKS) uniform texture2D r_new_locks; #endif #if defined(FSR2_BIND_SRV_PREPARED_INPUT_COLOR) - layout (set = 1, binding = FSR2_BIND_SRV_PREPARED_INPUT_COLOR) uniform texture2D r_prepared_input_color; + layout (set = 0, binding = FSR2_BIND_SRV_PREPARED_INPUT_COLOR) uniform texture2D r_prepared_input_color; #endif #if defined(FSR2_BIND_SRV_LUMA_HISTORY) - layout (set = 1, binding = FSR2_BIND_SRV_LUMA_HISTORY) uniform texture2D r_luma_history; + layout (set = 0, binding = FSR2_BIND_SRV_LUMA_HISTORY) uniform texture2D r_luma_history; #endif #if defined(FSR2_BIND_SRV_RCAS_INPUT) - layout (set = 1, binding = FSR2_BIND_SRV_RCAS_INPUT) uniform texture2D r_rcas_input; + layout (set = 0, binding = FSR2_BIND_SRV_RCAS_INPUT) uniform texture2D r_rcas_input; #endif #if defined(FSR2_BIND_SRV_LANCZOS_LUT) - layout (set = 1, binding = FSR2_BIND_SRV_LANCZOS_LUT) uniform texture2D r_lanczos_lut; + layout (set = 0, binding = FSR2_BIND_SRV_LANCZOS_LUT) uniform texture2D r_lanczos_lut; #endif #if defined(FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS) - layout (set = 1, binding = FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS) uniform texture2D r_imgMips; + layout (set = 0, binding = FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS) uniform texture2D r_imgMips; #endif #if defined(FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT) - layout (set = 1, binding = FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT) uniform texture2D r_upsample_maximum_bias_lut; + layout (set = 0, binding = FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT) uniform texture2D r_upsample_maximum_bias_lut; #endif #if defined(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS) - layout (set = 1, binding = FSR2_BIND_SRV_DILATED_REACTIVE_MASKS) uniform texture2D r_dilated_reactive_masks; -#endif + layout (set = 0, binding = FSR2_BIND_SRV_DILATED_REACTIVE_MASKS) uniform texture2D r_dilated_reactive_masks; +#endif #if defined(FSR2_BIND_SRV_PREV_PRE_ALPHA_COLOR) - layout(set = 1, binding = FSR2_BIND_SRV_PREV_PRE_ALPHA_COLOR) uniform texture2D r_input_prev_color_pre_alpha; + layout(set = 0, binding = FSR2_BIND_SRV_PREV_PRE_ALPHA_COLOR) uniform texture2D r_input_prev_color_pre_alpha; #endif #if defined(FSR2_BIND_SRV_PREV_POST_ALPHA_COLOR) - layout(set = 1, binding = FSR2_BIND_SRV_PREV_POST_ALPHA_COLOR) uniform texture2D r_input_prev_color_post_alpha; + layout(set = 0, binding = FSR2_BIND_SRV_PREV_POST_ALPHA_COLOR) uniform texture2D r_input_prev_color_post_alpha; #endif // UAV #if defined FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH - layout (set = 1, binding = FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH, r32ui) uniform uimage2D rw_reconstructed_previous_nearest_depth; + layout (set = 0, binding = FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH, r32ui) uniform uimage2D rw_reconstructed_previous_nearest_depth; #endif #if defined FSR2_BIND_UAV_DILATED_MOTION_VECTORS - layout (set = 1, binding = FSR2_BIND_UAV_DILATED_MOTION_VECTORS, rg16f) writeonly uniform image2D rw_dilated_motion_vectors; + layout (set = 0, binding = FSR2_BIND_UAV_DILATED_MOTION_VECTORS, rg16f) writeonly uniform image2D rw_dilated_motion_vectors; #endif #if defined FSR2_BIND_UAV_DILATED_DEPTH - layout (set = 1, binding = FSR2_BIND_UAV_DILATED_DEPTH, r16f) writeonly uniform image2D rw_dilatedDepth; + layout (set = 0, binding = FSR2_BIND_UAV_DILATED_DEPTH, r16f) writeonly uniform image2D rw_dilatedDepth; #endif #if defined FSR2_BIND_UAV_INTERNAL_UPSCALED - layout (set = 1, binding = FSR2_BIND_UAV_INTERNAL_UPSCALED, rgba16f) writeonly uniform image2D rw_internal_upscaled_color; + layout (set = 0, binding = FSR2_BIND_UAV_INTERNAL_UPSCALED, rgba16f) writeonly uniform image2D rw_internal_upscaled_color; #endif #if defined FSR2_BIND_UAV_LOCK_STATUS - layout (set = 1, binding = FSR2_BIND_UAV_LOCK_STATUS, rg16f) uniform image2D rw_lock_status; + layout (set = 0, binding = FSR2_BIND_UAV_LOCK_STATUS, rg16f) uniform image2D rw_lock_status; #endif #if defined(FSR2_BIND_UAV_LOCK_INPUT_LUMA) - layout(set = 1, binding = FSR2_BIND_UAV_LOCK_INPUT_LUMA, r16f) writeonly uniform image2D rw_lock_input_luma; + layout(set = 0, binding = FSR2_BIND_UAV_LOCK_INPUT_LUMA, r16f) writeonly uniform image2D rw_lock_input_luma; #endif #if defined FSR2_BIND_UAV_NEW_LOCKS - layout(set = 1, binding = FSR2_BIND_UAV_NEW_LOCKS, r8) uniform image2D rw_new_locks; + layout(set = 0, binding = FSR2_BIND_UAV_NEW_LOCKS, r8) uniform image2D rw_new_locks; #endif #if defined FSR2_BIND_UAV_PREPARED_INPUT_COLOR - layout (set = 1, binding = FSR2_BIND_UAV_PREPARED_INPUT_COLOR, rgba16) writeonly uniform image2D rw_prepared_input_color; + layout (set = 0, binding = FSR2_BIND_UAV_PREPARED_INPUT_COLOR, rgba16) writeonly uniform image2D rw_prepared_input_color; #endif #if defined FSR2_BIND_UAV_LUMA_HISTORY - layout (set = 1, binding = FSR2_BIND_UAV_LUMA_HISTORY, rgba8) uniform image2D rw_luma_history; + layout (set = 0, binding = FSR2_BIND_UAV_LUMA_HISTORY, rgba8) uniform image2D rw_luma_history; #endif #if defined FSR2_BIND_UAV_UPSCALED_OUTPUT - layout (set = 1, binding = FSR2_BIND_UAV_UPSCALED_OUTPUT /* app controlled format */) writeonly uniform image2D rw_upscaled_output; + layout (set = 0, binding = FSR2_BIND_UAV_UPSCALED_OUTPUT /* app controlled format */) writeonly uniform image2D rw_upscaled_output; #endif #if defined FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE - layout (set = 1, binding = FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE, r16f) coherent uniform image2D rw_img_mip_shading_change; + layout (set = 0, binding = FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE, r16f) coherent uniform image2D rw_img_mip_shading_change; #endif #if defined FSR2_BIND_UAV_EXPOSURE_MIP_5 - layout (set = 1, binding = FSR2_BIND_UAV_EXPOSURE_MIP_5, r16f) coherent uniform image2D rw_img_mip_5; + layout (set = 0, binding = FSR2_BIND_UAV_EXPOSURE_MIP_5, r16f) coherent uniform image2D rw_img_mip_5; #endif #if defined FSR2_BIND_UAV_DILATED_REACTIVE_MASKS - layout (set = 1, binding = FSR2_BIND_UAV_DILATED_REACTIVE_MASKS, rg8) writeonly uniform image2D rw_dilated_reactive_masks; -#endif -#if defined FSR2_BIND_UAV_EXPOSURE - layout (set = 1, binding = FSR2_BIND_UAV_EXPOSURE, rg32f) uniform image2D rw_exposure; + layout (set = 0, binding = FSR2_BIND_UAV_DILATED_REACTIVE_MASKS, rg8) writeonly uniform image2D rw_dilated_reactive_masks; +#endif +#if defined FSR2_BIND_UAV_EXPOSURE + layout (set = 0, binding = FSR2_BIND_UAV_EXPOSURE, rg32f) uniform image2D rw_exposure; #endif #if defined FSR2_BIND_UAV_AUTO_EXPOSURE - layout(set = 1, binding = FSR2_BIND_UAV_AUTO_EXPOSURE, rg32f) uniform image2D rw_auto_exposure; + layout(set = 0, binding = FSR2_BIND_UAV_AUTO_EXPOSURE, rg32f) uniform image2D rw_auto_exposure; #endif -#if defined FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC - layout (set = 1, binding = FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC, r32ui) coherent uniform uimage2D rw_spd_global_atomic; +#if defined FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC + layout (set = 0, binding = FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC, r32ui) coherent uniform uimage2D rw_spd_global_atomic; #endif #if defined FSR2_BIND_UAV_AUTOREACTIVE - layout(set = 1, binding = FSR2_BIND_UAV_AUTOREACTIVE, r32f) uniform image2D rw_output_autoreactive; + layout(set = 0, binding = FSR2_BIND_UAV_AUTOREACTIVE, r32f) uniform image2D rw_output_autoreactive; #endif #if defined FSR2_BIND_UAV_AUTOCOMPOSITION - layout(set = 1, binding = FSR2_BIND_UAV_AUTOCOMPOSITION, r32f) uniform image2D rw_output_autocomposition; + layout(set = 0, binding = FSR2_BIND_UAV_AUTOCOMPOSITION, r32f) uniform image2D rw_output_autocomposition; #endif #if defined FSR2_BIND_UAV_PREV_PRE_ALPHA_COLOR - layout(set = 1, binding = FSR2_BIND_UAV_PREV_PRE_ALPHA_COLOR, r11f_g11f_b10f) uniform image2D rw_output_prev_color_pre_alpha; + layout(set = 0, binding = FSR2_BIND_UAV_PREV_PRE_ALPHA_COLOR, r11f_g11f_b10f) uniform image2D rw_output_prev_color_pre_alpha; #endif #if defined FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR - layout(set = 1, binding = FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR, r11f_g11f_b10f) uniform image2D rw_output_prev_color_post_alpha; + layout(set = 0, binding = FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR, r11f_g11f_b10f) uniform image2D rw_output_prev_color_post_alpha; #endif #if defined(FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS) @@ -317,14 +429,16 @@ FfxFloat32 LoadInputDepth(FfxInt32x2 iPxPos) } #endif -#if defined(FSR2_BIND_SRV_REACTIVE_MASK) +#if defined(FSR2_BIND_SRV_REACTIVE_MASK) FfxFloat32 LoadReactiveMask(FfxInt32x2 iPxPos) { + // GODOT BEGINS #if FFX_FSR2_OPTION_GODOT_REACTIVE_MASK_CLAMP return min(texelFetch(r_reactive_mask, FfxInt32x2(iPxPos), 0).r, 0.9f); #else - return texelFetch(r_reactive_mask, FfxInt32x2(iPxPos), 0).r; + return texelFetch(r_reactive_mask, FfxInt32x2(iPxPos), 0).r; #endif + // GODOT ENDS } #endif @@ -361,15 +475,17 @@ FfxFloat32x2 LoadInputMotionVector(FfxInt32x2 iPxDilatedMotionVectorPos) { FfxFloat32x2 fSrcMotionVector = texelFetch(r_input_motion_vectors, iPxDilatedMotionVectorPos, 0).xy; + // GODOT BEGINS #if FFX_FSR2_OPTION_GODOT_DERIVE_INVALID_MOTION_VECTORS bool bInvalidMotionVector = all(lessThanEqual(fSrcMotionVector, vec2(-1.0f, -1.0f))); if (bInvalidMotionVector) { FfxFloat32 fSrcDepth = LoadInputDepth(iPxDilatedMotionVectorPos); FfxFloat32x2 fUv = (iPxDilatedMotionVectorPos + FfxFloat32(0.5)) / RenderSize(); - fSrcMotionVector = FFX_FSR2_OPTION_GODOT_DERIVE_INVALID_MOTION_VECTORS_FUNCTION(fUv, fSrcDepth, cbFSR2.mReprojectionMatrix); + fSrcMotionVector = FFX_FSR_OPTION_GODOT_DERIVE_INVALID_MOTION_VECTORS_FUNCTION(fUv, fSrcDepth, cbFSR2.mReprojectionMatrix); } #endif + // GODOT ENDS FfxFloat32x2 fUvMotionVector = fSrcMotionVector * MotionVectorScale(); @@ -402,6 +518,15 @@ FfxFloat32x4 SampleLumaHistory(FfxFloat32x2 fUV) } #endif +FfxFloat32x4 LoadRCAS_Input(FfxInt32x2 iPxPos) +{ +#if defined(FSR2_BIND_SRV_RCAS_INPUT) + return texelFetch(r_rcas_input, iPxPos, 0); +#else + return FfxFloat32x4(0.0); +#endif +} + #if defined(FSR2_BIND_UAV_INTERNAL_UPSCALED) void StoreReprojectedHistory(FfxInt32x2 iPxHistory, FfxFloat32x4 fHistory) { @@ -533,12 +658,12 @@ void SetReconstructedDepth(FfxInt32x2 iPxSample, FfxUInt32 uValue) #if defined(FSR2_BIND_UAV_DILATED_DEPTH) void StoreDilatedDepth(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32 fDepth) { - //FfxUInt32 uDepth = f32tof16(fDepth); + //FfxUInt32 uDepth = ffxF32ToF16(fDepth); imageStore(rw_dilatedDepth, iPxPos, vec4(fDepth, 0.0f, 0.0f, 0.0f)); } #endif -#if defined(FSR2_BIND_UAV_DILATED_MOTION_VECTORS) +#if defined(FSR2_BIND_UAV_DILATED_MOTION_VECTORS) void StoreDilatedMotionVector(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x2 fMotionVector) { imageStore(rw_dilated_motion_vectors, iPxPos, vec4(fMotionVector, 0.0f, 0.0f)); @@ -607,7 +732,7 @@ FfxFloat32 AutoExposure() FfxFloat32 SampleLanczos2Weight(FfxFloat32 x) { #if defined(FSR2_BIND_SRV_LANCZOS_LUT) - return textureLod(sampler2D(r_lanczos_lut, s_LinearClamp), FfxFloat32x2(x / 2.0f, 0.5f), 0.0f).x; + return textureLod(sampler2D(r_lanczos_lut, s_LinearClamp), FfxFloat32x2(x / 2.0f, 0.5f), 0.0f).x; #else return 0.f; #endif @@ -642,13 +767,6 @@ void StoreDilatedReactiveMasks(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER } #endif -#if defined(FFX_INTERNAL) -FfxFloat32x4 SampleDebug(FfxFloat32x2 fUV) -{ - return textureLod(sampler2D(r_debug_out, s_LinearClamp), fUV, 0.0f).rgba; -} -#endif - #if defined(FSR2_BIND_SRV_INPUT_OPAQUE_ONLY) FfxFloat32x3 LoadOpaqueOnly(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos) { @@ -695,4 +813,69 @@ void StorePrevPostAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN F } #endif +FfxFloat32x2 SPD_LoadExposureBuffer() +{ +#if defined FSR2_BIND_UAV_AUTO_EXPOSURE + return imageLoad(rw_auto_exposure, ivec2(0, 0)).xy; +#else + return FfxFloat32x2(0.f, 0.f); +#endif // #if defined FSR2_BIND_UAV_AUTO_EXPOSURE +} + +void SPD_SetExposureBuffer(FfxFloat32x2 value) +{ +#if defined FSR2_BIND_UAV_AUTO_EXPOSURE + imageStore(rw_auto_exposure, ivec2(0, 0), vec4(value, 0.0f, 0.0f)); +#endif // #if defined FSR2_BIND_UAV_AUTO_EXPOSURE +} + +FfxFloat32x4 SPD_LoadMipmap5(FfxInt32x2 iPxPos) +{ +#if defined FSR2_BIND_UAV_EXPOSURE_MIP_5 + return FfxFloat32x4(imageLoad(rw_img_mip_5, iPxPos).x, 0.0f, 0.0f, 0.0f); +#else + return FfxFloat32x4(0.f, 0.f, 0.f, 0.f); +#endif // #if defined FSR2_BIND_UAV_EXPOSURE_MIP_5 +} + +void SPD_SetMipmap(FfxInt32x2 iPxPos, FfxUInt32 slice, FfxFloat32 value) +{ + switch (slice) + { + case FFX_FSR2_SHADING_CHANGE_MIP_LEVEL: +#if defined FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE + imageStore(rw_img_mip_shading_change, iPxPos, FfxFloat32x4(value, 0.0f, 0.0f, 0.0f)); +#endif // #if defined FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE + break; + case 5: +#if defined FSR2_BIND_UAV_EXPOSURE_MIP_5 + imageStore(rw_img_mip_5, iPxPos, FfxFloat32x4(value, 0.0f, 0.0f, 0.0f)); +#endif // #if defined FSR2_BIND_UAV_EXPOSURE_MIP_5 + break; + default: + + // avoid flattened side effect +#if defined(FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE) + imageStore(rw_img_mip_shading_change, iPxPos, FfxFloat32x4(imageLoad(rw_img_mip_shading_change, iPxPos).x, 0.0f, 0.0f, 0.0f)); +#elif defined(FSR2_BIND_UAV_EXPOSURE_MIP_5) + imageStore(rw_img_mip_5, iPxPos, FfxFloat32x4(imageLoad(rw_img_mip_5, iPxPos).x, 0.0f, 0.0f, 0.0f)); +#endif // #if defined FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE + break; + } +} + +void SPD_IncreaseAtomicCounter(inout FfxUInt32 spdCounter) +{ +#if defined FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC + spdCounter = imageAtomicAdd(rw_spd_global_atomic, ivec2(0, 0), 1); +#endif // #if defined FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC +} + +void SPD_ResetAtomicCounter() +{ +#if defined FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC + imageStore(rw_spd_global_atomic, ivec2(0, 0), uvec4(0)); +#endif // #if defined FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC +} + #endif // #if defined(FFX_GPU) diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_callbacks_hlsl.h b/thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_callbacks_hlsl.h similarity index 64% rename from thirdparty/amd-fsr2/shaders/ffx_fsr2_callbacks_hlsl.h rename to thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_callbacks_hlsl.h index fd722b307e2f..8daafea000e6 100644 --- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_callbacks_hlsl.h +++ b/thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_callbacks_hlsl.h @@ -1,13 +1,14 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. -// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// // Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal +// of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell // copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: +// furnished to do so, subject to the following conditions : +// // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // @@ -26,20 +27,16 @@ #pragma dxc diagnostic push #pragma dxc diagnostic ignored "-Wambig-lit-shift" #endif //__hlsl_dx_compiler -#include "ffx_core.h" +#include "../ffx_core.h" #ifdef __hlsl_dx_compiler #pragma dxc diagnostic pop #endif //__hlsl_dx_compiler -#endif // #if defined(FFX_GPU) -#if defined(FFX_GPU) -#ifndef FFX_FSR2_PREFER_WAVE64 -#define FFX_FSR2_PREFER_WAVE64 -#endif // #if defined(FFX_GPU) +#ifndef FFX_PREFER_WAVE64 +#define FFX_PREFER_WAVE64 +#endif // #ifndef FFX_PREFER_WAVE64 -#if defined(FFX_GPU) #pragma warning(disable: 3205) // conversion from larger type to smaller -#endif // #if defined(FFX_GPU) #define DECLARE_SRV_REGISTER(regIndex) t##regIndex #define DECLARE_UAV_REGISTER(regIndex) u##regIndex @@ -48,7 +45,7 @@ #define FFX_FSR2_DECLARE_UAV(regIndex) register(DECLARE_UAV_REGISTER(regIndex)) #define FFX_FSR2_DECLARE_CB(regIndex) register(DECLARE_CB_REGISTER(regIndex)) -#if defined(FSR2_BIND_CB_FSR2) || defined(FFX_INTERNAL) +#if defined(FSR2_BIND_CB_FSR2) cbuffer cbFSR2 : FFX_FSR2_DECLARE_CB(FSR2_BIND_CB_FSR2) { FfxInt32x2 iRenderSize; @@ -71,56 +68,10 @@ FfxFloat32 fDeltaTime; FfxFloat32 fDynamicResChangeFactor; FfxFloat32 fViewSpaceToMetersFactor; + FfxFloat32 fPadding; }; -#define FFX_FSR2_CONSTANT_BUFFER_1_SIZE (sizeof(cbFSR2) / 4) // Number of 32-bit values. This must be kept in sync with the cbFSR2 size. -#endif - -#if defined(FFX_GPU) -#define FFX_FSR2_ROOTSIG_STRINGIFY(p) FFX_FSR2_ROOTSIG_STR(p) -#define FFX_FSR2_ROOTSIG_STR(p) #p -#define FFX_FSR2_ROOTSIG [RootSignature( "DescriptorTable(UAV(u0, numDescriptors = " FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_RESOURCE_IDENTIFIER_COUNT) ")), " \ - "DescriptorTable(SRV(t0, numDescriptors = " FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_RESOURCE_IDENTIFIER_COUNT) ")), " \ - "RootConstants(num32BitConstants=" FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_CONSTANT_BUFFER_1_SIZE) ", b0), " \ - "StaticSampler(s0, filter = FILTER_MIN_MAG_MIP_POINT, " \ - "addressU = TEXTURE_ADDRESS_CLAMP, " \ - "addressV = TEXTURE_ADDRESS_CLAMP, " \ - "addressW = TEXTURE_ADDRESS_CLAMP, " \ - "comparisonFunc = COMPARISON_NEVER, " \ - "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK), " \ - "StaticSampler(s1, filter = FILTER_MIN_MAG_MIP_LINEAR, " \ - "addressU = TEXTURE_ADDRESS_CLAMP, " \ - "addressV = TEXTURE_ADDRESS_CLAMP, " \ - "addressW = TEXTURE_ADDRESS_CLAMP, " \ - "comparisonFunc = COMPARISON_NEVER, " \ - "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK)" )] - -#define FFX_FSR2_CONSTANT_BUFFER_2_SIZE 6 // Number of 32-bit values. This must be kept in sync with max( cbRCAS , cbSPD) size. - -#define FFX_FSR2_CB2_ROOTSIG [RootSignature( "DescriptorTable(UAV(u0, numDescriptors = " FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_RESOURCE_IDENTIFIER_COUNT) ")), " \ - "DescriptorTable(SRV(t0, numDescriptors = " FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_RESOURCE_IDENTIFIER_COUNT) ")), " \ - "RootConstants(num32BitConstants=" FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_CONSTANT_BUFFER_1_SIZE) ", b0), " \ - "RootConstants(num32BitConstants=" FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_CONSTANT_BUFFER_2_SIZE) ", b1), " \ - "StaticSampler(s0, filter = FILTER_MIN_MAG_MIP_POINT, " \ - "addressU = TEXTURE_ADDRESS_CLAMP, " \ - "addressV = TEXTURE_ADDRESS_CLAMP, " \ - "addressW = TEXTURE_ADDRESS_CLAMP, " \ - "comparisonFunc = COMPARISON_NEVER, " \ - "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK), " \ - "StaticSampler(s1, filter = FILTER_MIN_MAG_MIP_LINEAR, " \ - "addressU = TEXTURE_ADDRESS_CLAMP, " \ - "addressV = TEXTURE_ADDRESS_CLAMP, " \ - "addressW = TEXTURE_ADDRESS_CLAMP, " \ - "comparisonFunc = COMPARISON_NEVER, " \ - "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK)" )] -#if defined(FFX_FSR2_EMBED_ROOTSIG) -#define FFX_FSR2_EMBED_ROOTSIG_CONTENT FFX_FSR2_ROOTSIG -#define FFX_FSR2_EMBED_CB2_ROOTSIG_CONTENT FFX_FSR2_CB2_ROOTSIG -#else -#define FFX_FSR2_EMBED_ROOTSIG_CONTENT -#define FFX_FSR2_EMBED_CB2_ROOTSIG_CONTENT -#endif // #if FFX_FSR2_EMBED_ROOTSIG -#endif // #if defined(FFX_GPU) +#define FFX_FSR2_CONSTANT_BUFFER_1_SIZE 32 /* Define getter functions in the order they are defined in the CB! */ FfxInt32x2 RenderSize() @@ -217,66 +168,181 @@ FfxFloat32 ViewSpaceToMetersFactor() { return fViewSpaceToMetersFactor; } +#endif // #if defined(FSR2_BIND_CB_FSR2) +#define FFX_FSR2_ROOTSIG_STRINGIFY(p) FFX_FSR2_ROOTSIG_STR(p) +#define FFX_FSR2_ROOTSIG_STR(p) #p +#define FFX_FSR2_ROOTSIG [RootSignature("DescriptorTable(UAV(u0, numDescriptors = " FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_RESOURCE_IDENTIFIER_COUNT) ")), " \ + "DescriptorTable(SRV(t0, numDescriptors = " FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_RESOURCE_IDENTIFIER_COUNT) ")), " \ + "CBV(b0), " \ + "StaticSampler(s0, filter = FILTER_MIN_MAG_MIP_POINT, " \ + "addressU = TEXTURE_ADDRESS_CLAMP, " \ + "addressV = TEXTURE_ADDRESS_CLAMP, " \ + "addressW = TEXTURE_ADDRESS_CLAMP, " \ + "comparisonFunc = COMPARISON_NEVER, " \ + "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK), " \ + "StaticSampler(s1, filter = FILTER_MIN_MAG_MIP_LINEAR, " \ + "addressU = TEXTURE_ADDRESS_CLAMP, " \ + "addressV = TEXTURE_ADDRESS_CLAMP, " \ + "addressW = TEXTURE_ADDRESS_CLAMP, " \ + "comparisonFunc = COMPARISON_NEVER, " \ + "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK)" )] + +#define FFX_FSR2_CONSTANT_BUFFER_2_SIZE 6 // Number of 32-bit values. This must be kept in sync with max( cbRCAS , cbSPD) size. + +#define FFX_FSR2_CB2_ROOTSIG [RootSignature("DescriptorTable(UAV(u0, numDescriptors = " FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_RESOURCE_IDENTIFIER_COUNT) ")), " \ + "DescriptorTable(SRV(t0, numDescriptors = " FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_RESOURCE_IDENTIFIER_COUNT) ")), " \ + "CBV(b0), " \ + "CBV(b1), " \ + "StaticSampler(s0, filter = FILTER_MIN_MAG_MIP_POINT, " \ + "addressU = TEXTURE_ADDRESS_CLAMP, " \ + "addressV = TEXTURE_ADDRESS_CLAMP, " \ + "addressW = TEXTURE_ADDRESS_CLAMP, " \ + "comparisonFunc = COMPARISON_NEVER, " \ + "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK), " \ + "StaticSampler(s1, filter = FILTER_MIN_MAG_MIP_LINEAR, " \ + "addressU = TEXTURE_ADDRESS_CLAMP, " \ + "addressV = TEXTURE_ADDRESS_CLAMP, " \ + "addressW = TEXTURE_ADDRESS_CLAMP, " \ + "comparisonFunc = COMPARISON_NEVER, " \ + "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK)" )] + +#define FFX_FSR2_CONSTANT_BUFFER_3_SIZE 4 // Number of 32-bit values. This must be kept in sync with cbGenerateReactive size. + +#define FFX_FSR2_REACTIVE_ROOTSIG [RootSignature("DescriptorTable(UAV(u0, numDescriptors = " FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_RESOURCE_IDENTIFIER_COUNT) ")), " \ + "DescriptorTable(SRV(t0, numDescriptors = " FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_RESOURCE_IDENTIFIER_COUNT) ")), " \ + "CBV(b0), " \ + "CBV(b1), " \ + "StaticSampler(s0, filter = FILTER_MIN_MAG_MIP_POINT, " \ + "addressU = TEXTURE_ADDRESS_CLAMP, " \ + "addressV = TEXTURE_ADDRESS_CLAMP, " \ + "addressW = TEXTURE_ADDRESS_CLAMP, " \ + "comparisonFunc = COMPARISON_NEVER, " \ + "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK), " \ + "StaticSampler(s1, filter = FILTER_MIN_MAG_MIP_LINEAR, " \ + "addressU = TEXTURE_ADDRESS_CLAMP, " \ + "addressV = TEXTURE_ADDRESS_CLAMP, " \ + "addressW = TEXTURE_ADDRESS_CLAMP, " \ + "comparisonFunc = COMPARISON_NEVER, " \ + "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK)" )] + +#if defined(FFX_FSR2_EMBED_ROOTSIG) +#define FFX_FSR2_EMBED_ROOTSIG_CONTENT FFX_FSR2_ROOTSIG +#define FFX_FSR2_EMBED_CB2_ROOTSIG_CONTENT FFX_FSR2_CB2_ROOTSIG +#define FFX_FSR2_EMBED_ROOTSIG_REACTIVE_CONTENT FFX_FSR2_REACTIVE_ROOTSIG +#else +#define FFX_FSR2_EMBED_ROOTSIG_CONTENT +#define FFX_FSR2_EMBED_CB2_ROOTSIG_CONTENT +#define FFX_FSR2_EMBED_ROOTSIG_REACTIVE_CONTENT +#endif // #if FFX_FSR2_EMBED_ROOTSIG + +#if defined(FSR2_BIND_CB_AUTOREACTIVE) +cbuffer cbGenerateReactive : FFX_FSR2_DECLARE_CB(FSR2_BIND_CB_AUTOREACTIVE) +{ + FfxFloat32 fTcThreshold; // 0.1 is a good starting value, lower will result in more TC pixels + FfxFloat32 fTcScale; + FfxFloat32 fReactiveScale; + FfxFloat32 fReactiveMax; +}; + +FfxFloat32 TcThreshold() +{ + return fTcThreshold; +} + +FfxFloat32 TcScale() +{ + return fTcScale; +} + +FfxFloat32 ReactiveScale() +{ + return fReactiveScale; +} + +FfxFloat32 ReactiveMax() +{ + return fReactiveMax; +} +#endif // #if defined(FSR2_BIND_CB_AUTOREACTIVE) + +#if defined(FSR2_BIND_CB_RCAS) +cbuffer cbRCAS : FFX_FSR2_DECLARE_CB(FSR2_BIND_CB_RCAS) +{ + FfxUInt32x4 rcasConfig; +}; + +FfxUInt32x4 RCASConfig() +{ + return rcasConfig; +} +#endif // #if defined(FSR2_BIND_CB_RCAS) + + +#if defined(FSR2_BIND_CB_REACTIVE) +cbuffer cbGenerateReactive : FFX_FSR2_DECLARE_CB(FSR2_BIND_CB_REACTIVE) +{ + FfxFloat32 gen_reactive_scale; + FfxFloat32 gen_reactive_threshold; + FfxFloat32 gen_reactive_binaryValue; + FfxUInt32 gen_reactive_flags; +}; + +FfxFloat32 GenReactiveScale() +{ + return gen_reactive_scale; +} + +FfxFloat32 GenReactiveThreshold() +{ + return gen_reactive_threshold; +} + +FfxFloat32 GenReactiveBinaryValue() +{ + return gen_reactive_binaryValue; +} + +FfxUInt32 GenReactiveFlags() +{ + return gen_reactive_flags; +} +#endif // #if defined(FSR2_BIND_CB_REACTIVE) + +#if defined(FSR2_BIND_CB_SPD) +cbuffer cbSPD : FFX_FSR2_DECLARE_CB(FSR2_BIND_CB_SPD) { + + FfxUInt32 mips; + FfxUInt32 numWorkGroups; + FfxUInt32x2 workGroupOffset; + FfxUInt32x2 renderSize; +}; + +FfxUInt32 MipCount() +{ + return mips; +} + +FfxUInt32 NumWorkGroups() +{ + return numWorkGroups; +} + +FfxUInt32x2 WorkGroupOffset() +{ + return workGroupOffset; +} + +FfxUInt32x2 SPD_RenderSize() +{ + return renderSize; +} +#endif // #if defined(FSR2_BIND_CB_SPD) SamplerState s_PointClamp : register(s0); SamplerState s_LinearClamp : register(s1); -// SRVs -#if defined(FFX_INTERNAL) - Texture2D r_input_opaque_only : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_OPAQUE_ONLY); - Texture2D r_input_color_jittered : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_COLOR); - Texture2D r_input_motion_vectors : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_MOTION_VECTORS); - Texture2D r_input_depth : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_DEPTH); - Texture2D r_input_exposure : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_EXPOSURE); - Texture2D r_auto_exposure : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE); - Texture2D r_reactive_mask : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK); - Texture2D r_transparency_and_composition_mask : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_TRANSPARENCY_AND_COMPOSITION_MASK); - Texture2D r_reconstructed_previous_nearest_depth : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH); - Texture2D r_dilated_motion_vectors : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS); - Texture2D r_previous_dilated_motion_vectors : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_PREVIOUS_DILATED_MOTION_VECTORS); - Texture2D r_dilatedDepth : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_DEPTH); - Texture2D r_internal_upscaled_color : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR); - Texture2D r_lock_status : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS); - Texture2D r_lock_input_luma : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_INPUT_LUMA); - Texture2D r_new_locks : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_NEW_LOCKS); - Texture2D r_prepared_input_color : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR); - Texture2D r_luma_history : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY); - Texture2D r_rcas_input : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_RCAS_INPUT); - Texture2D r_lanczos_lut : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LANCZOS_LUT); - Texture2D r_imgMips : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE); - Texture2D r_upsample_maximum_bias_lut : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTITIER_UPSAMPLE_MAXIMUM_BIAS_LUT); - Texture2D r_dilated_reactive_masks : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS); - Texture2D r_input_prev_color_pre_alpha : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR); - Texture2D r_input_prev_color_post_alpha : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR); - - Texture2D r_debug_out : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DEBUG_OUTPUT); - - // UAV declarations - RWTexture2D rw_reconstructed_previous_nearest_depth : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH); - RWTexture2D rw_dilated_motion_vectors : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS); - RWTexture2D rw_dilatedDepth : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_DEPTH); - RWTexture2D rw_internal_upscaled_color : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR); - RWTexture2D rw_lock_status : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS); - RWTexture2D rw_lock_input_luma : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_INPUT_LUMA); - RWTexture2D rw_new_locks : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_NEW_LOCKS); - RWTexture2D rw_prepared_input_color : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR); - RWTexture2D rw_luma_history : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY); - RWTexture2D rw_upscaled_output : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT); - - globallycoherent RWTexture2D rw_img_mip_shading_change : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_SHADING_CHANGE); - globallycoherent RWTexture2D rw_img_mip_5 : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_5); - RWTexture2D rw_dilated_reactive_masks : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS); - RWTexture2D rw_auto_exposure : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE); - globallycoherent RWTexture2D rw_spd_global_atomic : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_SPD_ATOMIC_COUNT); - RWTexture2D rw_debug_out : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DEBUG_OUTPUT); - - RWTexture2D rw_output_autoreactive : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_AUTOREACTIVE); - RWTexture2D rw_output_autocomposition : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_AUTOCOMPOSITION); - RWTexture2D rw_output_prev_color_pre_alpha : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR); - RWTexture2D rw_output_prev_color_post_alpha : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR); - -#else // #if defined(FFX_INTERNAL) + // SRVs #if defined FSR2_BIND_SRV_INPUT_COLOR Texture2D r_input_color_jittered : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_COLOR); #endif @@ -416,72 +482,79 @@ SamplerState s_LinearClamp : register(s1); #if defined FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR RWTexture2D rw_output_prev_color_post_alpha : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR); #endif -#endif // #if defined(FFX_INTERNAL) -#if defined(FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS) || defined(FFX_INTERNAL) +#if defined(FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS) FfxFloat32 LoadMipLuma(FfxUInt32x2 iPxPos, FfxUInt32 mipLevel) { return r_imgMips.mips[mipLevel][iPxPos]; } #endif -#if defined(FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS) || defined(FFX_INTERNAL) +#if defined(FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS) FfxFloat32 SampleMipLuma(FfxFloat32x2 fUV, FfxUInt32 mipLevel) { return r_imgMips.SampleLevel(s_LinearClamp, fUV, mipLevel); } #endif -#if defined(FSR2_BIND_SRV_INPUT_DEPTH) || defined(FFX_INTERNAL) +#if defined(FSR2_BIND_SRV_INPUT_DEPTH) FfxFloat32 LoadInputDepth(FfxUInt32x2 iPxPos) { return r_input_depth[iPxPos]; } #endif -#if defined(FSR2_BIND_SRV_INPUT_DEPTH) || defined(FFX_INTERNAL) +#if defined(FSR2_BIND_SRV_INPUT_DEPTH) FfxFloat32 SampleInputDepth(FfxFloat32x2 fUV) { return r_input_depth.SampleLevel(s_LinearClamp, fUV, 0).x; } #endif -#if defined(FSR2_BIND_SRV_REACTIVE_MASK) || defined(FFX_INTERNAL) +#if defined(FSR2_BIND_SRV_REACTIVE_MASK) FfxFloat32 LoadReactiveMask(FfxUInt32x2 iPxPos) { return r_reactive_mask[iPxPos]; } #endif -#if defined(FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK) || defined(FFX_INTERNAL) +#if defined(FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK) FfxFloat32 LoadTransparencyAndCompositionMask(FfxUInt32x2 iPxPos) { return r_transparency_and_composition_mask[iPxPos]; } #endif -#if defined(FSR2_BIND_SRV_INPUT_COLOR) || defined(FFX_INTERNAL) +#if defined(FSR2_BIND_SRV_INPUT_COLOR) FfxFloat32x3 LoadInputColor(FfxUInt32x2 iPxPos) { return r_input_color_jittered[iPxPos].rgb; } #endif -#if defined(FSR2_BIND_SRV_INPUT_COLOR) || defined(FFX_INTERNAL) +#if defined(FSR2_BIND_SRV_INPUT_COLOR) FfxFloat32x3 SampleInputColor(FfxFloat32x2 fUV) { return r_input_color_jittered.SampleLevel(s_LinearClamp, fUV, 0).rgb; } #endif -#if defined(FSR2_BIND_SRV_PREPARED_INPUT_COLOR) || defined(FFX_INTERNAL) +#if defined(FSR2_BIND_SRV_PREPARED_INPUT_COLOR) FfxFloat32x3 LoadPreparedInputColor(FfxUInt32x2 iPxPos) { return r_prepared_input_color[iPxPos].xyz; } + +#if FFX_HALF && defined(__XBOX_SCARLETT) && defined(__XBATG_EXTRA_16_BIT_OPTIMISATION) && (__XBATG_EXTRA_16_BIT_OPTIMISATION == 1) +FFX_MIN16_F3 LoadPreparedInputColorHalf(FfxUInt32x2 iPxPos) +{ + return FFX_MIN16_F3(r_prepared_input_color[iPxPos].xyz); +} #endif -#if defined(FSR2_BIND_SRV_INPUT_MOTION_VECTORS) || defined(FFX_INTERNAL) +#endif + +#if defined(FSR2_BIND_SRV_INPUT_MOTION_VECTORS) FfxFloat32x2 LoadInputMotionVector(FfxUInt32x2 iPxDilatedMotionVectorPos) { FfxFloat32x2 fSrcMotionVector = r_input_motion_vectors[iPxDilatedMotionVectorPos].xy; @@ -496,49 +569,51 @@ FfxFloat32x2 LoadInputMotionVector(FfxUInt32x2 iPxDilatedMotionVectorPos) } #endif -#if defined(FSR2_BIND_SRV_INTERNAL_UPSCALED) || defined(FFX_INTERNAL) +#if defined(FSR2_BIND_SRV_INTERNAL_UPSCALED) FfxFloat32x4 LoadHistory(FfxUInt32x2 iPxHistory) { return r_internal_upscaled_color[iPxHistory]; } #endif -#if defined(FSR2_BIND_UAV_LUMA_HISTORY) || defined(FFX_INTERNAL) +#if defined(FSR2_BIND_UAV_LUMA_HISTORY) void StoreLumaHistory(FfxUInt32x2 iPxPos, FfxFloat32x4 fLumaHistory) { rw_luma_history[iPxPos] = fLumaHistory; } #endif -#if defined(FSR2_BIND_SRV_LUMA_HISTORY) || defined(FFX_INTERNAL) +#if defined(FSR2_BIND_SRV_LUMA_HISTORY) FfxFloat32x4 SampleLumaHistory(FfxFloat32x2 fUV) { return r_luma_history.SampleLevel(s_LinearClamp, fUV, 0); } #endif -#if defined(FFX_INTERNAL) -FfxFloat32x4 SampleDebug(FfxFloat32x2 fUV) +FfxFloat32x4 LoadRCAS_Input(FfxInt32x2 iPxPos) { - return r_debug_out.SampleLevel(s_LinearClamp, fUV, 0).w; -} +#if defined(FSR2_BIND_SRV_RCAS_INPUT) + return r_rcas_input[iPxPos]; +#else + return 0.0; #endif +} -#if defined(FSR2_BIND_UAV_INTERNAL_UPSCALED) || defined(FFX_INTERNAL) +#if defined(FSR2_BIND_UAV_INTERNAL_UPSCALED) void StoreReprojectedHistory(FfxUInt32x2 iPxHistory, FfxFloat32x4 fHistory) { rw_internal_upscaled_color[iPxHistory] = fHistory; } #endif -#if defined(FSR2_BIND_UAV_INTERNAL_UPSCALED) || defined(FFX_INTERNAL) +#if defined(FSR2_BIND_UAV_INTERNAL_UPSCALED) void StoreInternalColorAndWeight(FfxUInt32x2 iPxPos, FfxFloat32x4 fColorAndWeight) { rw_internal_upscaled_color[iPxPos] = fColorAndWeight; } #endif -#if defined(FSR2_BIND_UAV_UPSCALED_OUTPUT) || defined(FFX_INTERNAL) +#if defined(FSR2_BIND_UAV_UPSCALED_OUTPUT) void StoreUpscaledOutput(FfxUInt32x2 iPxPos, FfxFloat32x3 fColor) { rw_upscaled_output[iPxPos] = FfxFloat32x4(fColor, 1.f); @@ -547,70 +622,70 @@ void StoreUpscaledOutput(FfxUInt32x2 iPxPos, FfxFloat32x3 fColor) //LOCK_LIFETIME_REMAINING == 0 //Should make LockInitialLifetime() return a const 1.0f later -#if defined(FSR2_BIND_SRV_LOCK_STATUS) || defined(FFX_INTERNAL) +#if defined(FSR2_BIND_SRV_LOCK_STATUS) FfxFloat32x2 LoadLockStatus(FfxUInt32x2 iPxPos) { return r_lock_status[iPxPos]; } #endif -#if defined(FSR2_BIND_UAV_LOCK_STATUS) || defined(FFX_INTERNAL) +#if defined(FSR2_BIND_UAV_LOCK_STATUS) void StoreLockStatus(FfxUInt32x2 iPxPos, FfxFloat32x2 fLockStatus) { rw_lock_status[iPxPos] = fLockStatus; } #endif -#if defined(FSR2_BIND_SRV_LOCK_INPUT_LUMA) || defined(FFX_INTERNAL) +#if defined(FSR2_BIND_SRV_LOCK_INPUT_LUMA) FfxFloat32 LoadLockInputLuma(FfxUInt32x2 iPxPos) { return r_lock_input_luma[iPxPos]; } #endif -#if defined(FSR2_BIND_UAV_LOCK_INPUT_LUMA) || defined(FFX_INTERNAL) +#if defined(FSR2_BIND_UAV_LOCK_INPUT_LUMA) void StoreLockInputLuma(FfxUInt32x2 iPxPos, FfxFloat32 fLuma) { rw_lock_input_luma[iPxPos] = fLuma; } #endif -#if defined(FSR2_BIND_SRV_NEW_LOCKS) || defined(FFX_INTERNAL) +#if defined(FSR2_BIND_SRV_NEW_LOCKS) FfxFloat32 LoadNewLocks(FfxUInt32x2 iPxPos) { return r_new_locks[iPxPos]; } #endif -#if defined(FSR2_BIND_UAV_NEW_LOCKS) || defined(FFX_INTERNAL) +#if defined(FSR2_BIND_UAV_NEW_LOCKS) FfxFloat32 LoadRwNewLocks(FfxUInt32x2 iPxPos) { return rw_new_locks[iPxPos]; } #endif -#if defined(FSR2_BIND_UAV_NEW_LOCKS) || defined(FFX_INTERNAL) +#if defined(FSR2_BIND_UAV_NEW_LOCKS) void StoreNewLocks(FfxUInt32x2 iPxPos, FfxFloat32 newLock) { rw_new_locks[iPxPos] = newLock; } #endif -#if defined(FSR2_BIND_UAV_PREPARED_INPUT_COLOR) || defined(FFX_INTERNAL) +#if defined(FSR2_BIND_UAV_PREPARED_INPUT_COLOR) void StorePreparedInputColor(FFX_PARAMETER_IN FfxUInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x4 fTonemapped) { rw_prepared_input_color[iPxPos] = fTonemapped; } #endif -#if defined(FSR2_BIND_SRV_PREPARED_INPUT_COLOR) || defined(FFX_INTERNAL) +#if defined(FSR2_BIND_SRV_PREPARED_INPUT_COLOR) FfxFloat32 SampleDepthClip(FfxFloat32x2 fUV) { return r_prepared_input_color.SampleLevel(s_LinearClamp, fUV, 0).w; } #endif -#if defined(FSR2_BIND_SRV_LOCK_STATUS) || defined(FFX_INTERNAL) +#if defined(FSR2_BIND_SRV_LOCK_STATUS) FfxFloat32x2 SampleLockStatus(FfxFloat32x2 fUV) { FfxFloat32x2 fLockStatus = r_lock_status.SampleLevel(s_LinearClamp, fUV, 0); @@ -618,14 +693,14 @@ FfxFloat32x2 SampleLockStatus(FfxFloat32x2 fUV) } #endif -#if defined(FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH) || defined(FFX_INTERNAL) +#if defined(FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH) FfxFloat32 LoadReconstructedPrevDepth(FfxUInt32x2 iPxPos) { return asfloat(r_reconstructed_previous_nearest_depth[iPxPos]); } #endif -#if defined(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH) || defined(FFX_INTERNAL) +#if defined(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH) void StoreReconstructedDepth(FfxUInt32x2 iPxSample, FfxFloat32 fDepth) { FfxUInt32 uDepth = asuint(fDepth); @@ -638,35 +713,35 @@ void StoreReconstructedDepth(FfxUInt32x2 iPxSample, FfxFloat32 fDepth) } #endif -#if defined(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH) || defined(FFX_INTERNAL) +#if defined(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH) void SetReconstructedDepth(FfxUInt32x2 iPxSample, const FfxUInt32 uValue) { rw_reconstructed_previous_nearest_depth[iPxSample] = uValue; } #endif -#if defined(FSR2_BIND_UAV_DILATED_DEPTH) || defined(FFX_INTERNAL) +#if defined(FSR2_BIND_UAV_DILATED_DEPTH) void StoreDilatedDepth(FFX_PARAMETER_IN FfxUInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32 fDepth) { rw_dilatedDepth[iPxPos] = fDepth; } #endif -#if defined(FSR2_BIND_UAV_DILATED_MOTION_VECTORS) || defined(FFX_INTERNAL) +#if defined(FSR2_BIND_UAV_DILATED_MOTION_VECTORS) void StoreDilatedMotionVector(FFX_PARAMETER_IN FfxUInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x2 fMotionVector) { rw_dilated_motion_vectors[iPxPos] = fMotionVector; } #endif -#if defined(FSR2_BIND_SRV_DILATED_MOTION_VECTORS) || defined(FFX_INTERNAL) +#if defined(FSR2_BIND_SRV_DILATED_MOTION_VECTORS) FfxFloat32x2 LoadDilatedMotionVector(FfxUInt32x2 iPxInput) { return r_dilated_motion_vectors[iPxInput].xy; } #endif -#if defined(FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS) || defined(FFX_INTERNAL) +#if defined(FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS) FfxFloat32x2 LoadPreviousDilatedMotionVector(FfxUInt32x2 iPxInput) { return r_previous_dilated_motion_vectors[iPxInput].xy; @@ -678,14 +753,14 @@ FfxFloat32x2 SamplePreviousDilatedMotionVector(FfxFloat32x2 uv) } #endif -#if defined(FSR2_BIND_SRV_DILATED_DEPTH) || defined(FFX_INTERNAL) +#if defined(FSR2_BIND_SRV_DILATED_DEPTH) FfxFloat32 LoadDilatedDepth(FfxUInt32x2 iPxInput) { return r_dilatedDepth[iPxInput]; } #endif -#if defined(FSR2_BIND_SRV_INPUT_EXPOSURE) || defined(FFX_INTERNAL) +#if defined(FSR2_BIND_SRV_INPUT_EXPOSURE) FfxFloat32 Exposure() { FfxFloat32 exposure = r_input_exposure[FfxUInt32x2(0, 0)].x; @@ -698,7 +773,7 @@ FfxFloat32 Exposure() } #endif -#if defined(FSR2_BIND_SRV_AUTO_EXPOSURE) || defined(FFX_INTERNAL) +#if defined(FSR2_BIND_SRV_AUTO_EXPOSURE) FfxFloat32 AutoExposure() { FfxFloat32 exposure = r_auto_exposure[FfxUInt32x2(0, 0)].x; @@ -713,14 +788,35 @@ FfxFloat32 AutoExposure() FfxFloat32 SampleLanczos2Weight(FfxFloat32 x) { -#if defined(FSR2_BIND_SRV_LANCZOS_LUT) || defined(FFX_INTERNAL) +#if defined(FSR2_BIND_SRV_LANCZOS_LUT) return r_lanczos_lut.SampleLevel(s_LinearClamp, FfxFloat32x2(x / 2, 0.5f), 0); #else return 0.f; #endif } -#if defined(FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT) || defined(FFX_INTERNAL) +#if FFX_HALF && defined(__XBOX_SCARLETT) && defined(__XBATG_EXTRA_16_BIT_OPTIMISATION) && (__XBATG_EXTRA_16_BIT_OPTIMISATION == 1) + +FFX_MIN16_F SampleLanczos2Weight_NoValu(FFX_MIN16_F x) +{ +#if defined(FSR2_BIND_SRV_LANCZOS_LUT) + return FFX_MIN16_F(r_lanczos_lut.SampleLevel(s_LinearClamp, __XB_AsHalf(__XB_V_PACK_B32_F16(x, 0.5)), 0)); +#else + return 0.0; +#endif +} + +FFX_MIN16_F SampleLanczos2Weight_NoValuNoA16(FfxFloat32 x) +{ +#if defined(FSR2_BIND_SRV_LANCZOS_LUT) + return FFX_MIN16_F(r_lanczos_lut.SampleLevel(s_LinearClamp, FfxFloat32x2(x, 0.5), 0)); +#else + return 0.0; +#endif +} +#endif + +#if defined(FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT) FfxFloat32 SampleUpsampleMaximumBias(FfxFloat32x2 uv) { // Stored as a SNORM, so make sure to multiply by 2 to retrieve the actual expected range. @@ -728,50 +824,50 @@ FfxFloat32 SampleUpsampleMaximumBias(FfxFloat32x2 uv) } #endif -#if defined(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS) || defined(FFX_INTERNAL) +#if defined(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS) FfxFloat32x2 SampleDilatedReactiveMasks(FfxFloat32x2 fUV) { return r_dilated_reactive_masks.SampleLevel(s_LinearClamp, fUV, 0); } #endif -#if defined(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS) || defined(FFX_INTERNAL) +#if defined(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS) FfxFloat32x2 LoadDilatedReactiveMasks(FFX_PARAMETER_IN FfxUInt32x2 iPxPos) { return r_dilated_reactive_masks[iPxPos]; } #endif -#if defined(FSR2_BIND_UAV_DILATED_REACTIVE_MASKS) || defined(FFX_INTERNAL) +#if defined(FSR2_BIND_UAV_DILATED_REACTIVE_MASKS) void StoreDilatedReactiveMasks(FFX_PARAMETER_IN FfxUInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x2 fDilatedReactiveMasks) { rw_dilated_reactive_masks[iPxPos] = fDilatedReactiveMasks; } #endif -#if defined(FSR2_BIND_SRV_INPUT_OPAQUE_ONLY) || defined(FFX_INTERNAL) +#if defined(FSR2_BIND_SRV_INPUT_OPAQUE_ONLY) FfxFloat32x3 LoadOpaqueOnly(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos) { return r_input_opaque_only[iPxPos].xyz; } #endif -#if defined(FSR2_BIND_SRV_PREV_PRE_ALPHA_COLOR) || defined(FFX_INTERNAL) +#if defined(FSR2_BIND_SRV_PREV_PRE_ALPHA_COLOR) FfxFloat32x3 LoadPrevPreAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos) { return r_input_prev_color_pre_alpha[iPxPos]; } #endif -#if defined(FSR2_BIND_SRV_PREV_POST_ALPHA_COLOR) || defined(FFX_INTERNAL) +#if defined(FSR2_BIND_SRV_PREV_POST_ALPHA_COLOR) FfxFloat32x3 LoadPrevPostAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos) { return r_input_prev_color_post_alpha[iPxPos]; } #endif -#if defined(FSR2_BIND_UAV_AUTOREACTIVE) || defined(FFX_INTERNAL) -#if defined(FSR2_BIND_UAV_AUTOCOMPOSITION) || defined(FFX_INTERNAL) +#if defined(FSR2_BIND_UAV_AUTOREACTIVE) +#if defined(FSR2_BIND_UAV_AUTOCOMPOSITION) void StoreAutoReactive(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F2 fReactive) { rw_output_autoreactive[iPxPos] = fReactive.x; @@ -781,7 +877,7 @@ void StoreAutoReactive(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FF #endif #endif -#if defined(FSR2_BIND_UAV_PREV_PRE_ALPHA_COLOR) || defined(FFX_INTERNAL) +#if defined(FSR2_BIND_UAV_PREV_PRE_ALPHA_COLOR) void StorePrevPreAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F3 color) { rw_output_prev_color_pre_alpha[iPxPos] = color; @@ -789,11 +885,76 @@ void StorePrevPreAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FF } #endif -#if defined(FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR) || defined(FFX_INTERNAL) +#if defined(FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR) void StorePrevPostAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F3 color) { rw_output_prev_color_post_alpha[iPxPos] = color; } #endif +FfxFloat32x2 SPD_LoadExposureBuffer() +{ +#if defined FSR2_BIND_UAV_AUTO_EXPOSURE + return rw_auto_exposure[FfxInt32x2(0, 0)]; +#else + return FfxFloat32x2(0.f, 0.f); +#endif // #if defined FSR2_BIND_UAV_AUTO_EXPOSURE +} + +void SPD_SetExposureBuffer(FfxFloat32x2 value) +{ +#if defined FSR2_BIND_UAV_AUTO_EXPOSURE + rw_auto_exposure[FfxInt32x2(0, 0)] = value; +#endif // #if defined FSR2_BIND_UAV_AUTO_EXPOSURE +} + +FfxFloat32x4 SPD_LoadMipmap5(FfxInt32x2 iPxPos) +{ +#if defined FSR2_BIND_UAV_EXPOSURE_MIP_5 + return FfxFloat32x4(rw_img_mip_5[iPxPos], 0, 0, 0); +#else + return FfxFloat32x4(0.f, 0.f, 0.f, 0.f); +#endif // #if defined FSR2_BIND_UAV_EXPOSURE_MIP_5 +} + +void SPD_SetMipmap(FfxInt32x2 iPxPos, FfxUInt32 slice, FfxFloat32 value) +{ + switch (slice) + { + case FFX_FSR2_SHADING_CHANGE_MIP_LEVEL: +#if defined FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE + rw_img_mip_shading_change[iPxPos] = value; +#endif // #if defined FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE + break; + case 5: +#if defined FSR2_BIND_UAV_EXPOSURE_MIP_5 + rw_img_mip_5[iPxPos] = value; +#endif // #if defined FSR2_BIND_UAV_EXPOSURE_MIP_5 + break; + default: + + // avoid flattened side effect +#if defined(FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE) + rw_img_mip_shading_change[iPxPos] = rw_img_mip_shading_change[iPxPos]; +#elif defined(FSR2_BIND_UAV_EXPOSURE_MIP_5) + rw_img_mip_5[iPxPos] = rw_img_mip_5[iPxPos]; +#endif // #if defined FSR2_BIND_UAV_EXPOSURE_MIP_5 + break; + } +} + +void SPD_IncreaseAtomicCounter(inout FfxUInt32 spdCounter) +{ +#if defined FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC + InterlockedAdd(rw_spd_global_atomic[FfxInt32x2(0, 0)], 1, spdCounter); +#endif // #if defined FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC +} + +void SPD_ResetAtomicCounter() +{ +#if defined FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC + rw_spd_global_atomic[FfxInt32x2(0, 0)] = 0; +#endif // #if defined FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC +} + #endif // #if defined(FFX_GPU) diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_common.h b/thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_common.h similarity index 94% rename from thirdparty/amd-fsr2/shaders/ffx_fsr2_common.h rename to thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_common.h index 0c72aa84943e..22d7f48ce467 100644 --- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_common.h +++ b/thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_common.h @@ -1,13 +1,14 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. -// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// // Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal +// of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell // copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: +// furnished to do so, subject to the following conditions : +// // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // @@ -508,6 +509,32 @@ FfxFloat32x3 UnprepareRgb(FfxFloat32x3 fRgb, FfxFloat32 fExposure) return fRgb; } +#if FFX_HALF && defined(__XBOX_SCARLETT) && defined(__XBATG_EXTRA_16_BIT_OPTIMISATION) && (__XBATG_EXTRA_16_BIT_OPTIMISATION == 1) + +void PrepareRgbPaired(inout FFX_MIN16_F2 r, inout FFX_MIN16_F2 g, inout FFX_MIN16_F2 b, FfxFloat32 fExposure, FfxFloat32 fPreExposure) +{ + FFX_MIN16_F ExposureOverPreExposureOver = FFX_MIN16_F(fExposure / fPreExposure); + + r *= ExposureOverPreExposureOver; + g *= ExposureOverPreExposureOver; + b *= ExposureOverPreExposureOver; + + r = ffxClampHalf(r, 0.0, FSR2_FP16_MAX); + g = ffxClampHalf(g, 0.0, FSR2_FP16_MAX); + b = ffxClampHalf(b, 0.0, FSR2_FP16_MAX); +} + +void UnprepareRgbPaired(inout FFX_MIN16_F2 r, inout FFX_MIN16_F2 g, inout FFX_MIN16_F2 b, FfxFloat32 fExposure) +{ + FFX_MIN16_F PreExposureOverExposure = FFX_MIN16_F(PreExposure() / fExposure); + + r *= PreExposureOverExposure; + g *= PreExposureOverExposure; + b *= PreExposureOverExposure; +} + +#endif + struct BilinearSamplingData { diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_compute_luminance_pyramid.h b/thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_compute_luminance_pyramid.h similarity index 84% rename from thirdparty/amd-fsr2/shaders/ffx_fsr2_compute_luminance_pyramid.h rename to thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_compute_luminance_pyramid.h index c63f1820e08d..d8d482018ef4 100644 --- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_compute_luminance_pyramid.h +++ b/thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_compute_luminance_pyramid.h @@ -1,13 +1,14 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. -// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// // Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal +// of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell // copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: +// furnished to do so, subject to the following conditions : +// // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // @@ -21,6 +22,21 @@ FFX_GROUPSHARED FfxUInt32 spdCounter; +void SpdIncreaseAtomicCounter(FfxUInt32 slice) +{ + SPD_IncreaseAtomicCounter(spdCounter); +} + +FfxUInt32 SpdGetAtomicCounter() +{ + return spdCounter; +} + +void SpdResetAtomicCounter(FfxUInt32 slice) +{ + SPD_ResetAtomicCounter(); +} + #ifndef SPD_PACKED_ONLY FFX_GROUPSHARED FfxFloat32 spdIntermediateR[16][16]; FFX_GROUPSHARED FfxFloat32 spdIntermediateG[16][16]; @@ -74,21 +90,6 @@ void SpdStore(FfxInt32x2 pix, FfxFloat32x4 outValue, FfxUInt32 index, FfxUInt32 } } -void SpdIncreaseAtomicCounter(FfxUInt32 slice) -{ - SPD_IncreaseAtomicCounter(spdCounter); -} - -FfxUInt32 SpdGetAtomicCounter() -{ - return spdCounter; -} - -void SpdResetAtomicCounter(FfxUInt32 slice) -{ - SPD_ResetAtomicCounter(); -} - FfxFloat32x4 SpdLoadIntermediate(FfxUInt32 x, FfxUInt32 y) { return FfxFloat32x4( @@ -112,40 +113,24 @@ FfxFloat32x4 SpdReduce4(FfxFloat32x4 v0, FfxFloat32x4 v1, FfxFloat32x4 v2, FfxFl // define fetch and store functions Packed #if FFX_HALF -#error Callback must be implemented FFX_GROUPSHARED FfxFloat16x2 spdIntermediateRG[16][16]; FFX_GROUPSHARED FfxFloat16x2 spdIntermediateBA[16][16]; FfxFloat16x4 SpdLoadSourceImageH(FfxFloat32x2 tex, FfxUInt32 slice) { - return FfxFloat16x4(imgDst[0][FfxFloat32x3(tex, slice)]); + return FfxFloat16x4(0, 0, 0, 0); } + FfxFloat16x4 SpdLoadH(FfxInt32x2 p, FfxUInt32 slice) { - return FfxFloat16x4(imgDst6[FfxUInt32x3(p, slice)]); + return FfxFloat16x4(0, 0, 0, 0); } + void SpdStoreH(FfxInt32x2 p, FfxFloat16x4 value, FfxUInt32 mip, FfxUInt32 slice) { - if (index == LumaMipLevelToUse() || index == 5) - { - imgDst6[FfxUInt32x3(p, slice)] = FfxFloat32x4(value); - return; - } - imgDst[mip + 1][FfxUInt32x3(p, slice)] = FfxFloat32x4(value); -} -void SpdIncreaseAtomicCounter(FfxUInt32 slice) -{ - InterlockedAdd(rw_spd_global_atomic[FfxInt16x2(0, 0)].counter[slice], 1, spdCounter); -} -FfxUInt32 SpdGetAtomicCounter() -{ - return spdCounter; -} -void SpdResetAtomicCounter(FfxUInt32 slice) -{ - rw_spd_global_atomic[FfxInt16x2(0, 0)].counter[slice] = 0; } + FfxFloat16x4 SpdLoadIntermediateH(FfxUInt32 x, FfxUInt32 y) { return FfxFloat16x4( @@ -154,18 +139,20 @@ FfxFloat16x4 SpdLoadIntermediateH(FfxUInt32 x, FfxUInt32 y) spdIntermediateBA[x][y].x, spdIntermediateBA[x][y].y); } + void SpdStoreIntermediateH(FfxUInt32 x, FfxUInt32 y, FfxFloat16x4 value) { spdIntermediateRG[x][y] = value.xy; spdIntermediateBA[x][y] = value.zw; } + FfxFloat16x4 SpdReduce4H(FfxFloat16x4 v0, FfxFloat16x4 v1, FfxFloat16x4 v2, FfxFloat16x4 v3) { return (v0 + v1 + v2 + v3) * FfxFloat16(0.25); } #endif -#include "ffx_spd.h" +#include "../spd/ffx_spd.h" void ComputeAutoExposure(FfxUInt32x3 WorkGroupId, FfxUInt32 LocalThreadIndex) { @@ -186,4 +173,4 @@ void ComputeAutoExposure(FfxUInt32x3 WorkGroupId, FfxUInt32 LocalThreadIndex) FfxUInt32(WorkGroupId.z), FfxUInt32x2(WorkGroupOffset())); #endif -} \ No newline at end of file +} diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_depth_clip.h b/thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_depth_clip.h similarity index 97% rename from thirdparty/amd-fsr2/shaders/ffx_fsr2_depth_clip.h rename to thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_depth_clip.h index fa4c975a23fd..873ff4b72d77 100644 --- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_depth_clip.h +++ b/thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_depth_clip.h @@ -1,13 +1,14 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. -// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// // Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal +// of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell // copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: +// furnished to do so, subject to the following conditions : +// // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // @@ -255,4 +256,4 @@ void DepthClip(FfxInt32x2 iPxPos) PreProcessReactiveMasks(iPxPos, ffxMax(fTemporalMotionDifference, fMotionDivergence)); } -#endif //!defined( FFX_FSR2_DEPTH_CLIPH ) \ No newline at end of file +#endif //!defined( FFX_FSR2_DEPTH_CLIPH ) diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_lock.h b/thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_lock.h similarity index 93% rename from thirdparty/amd-fsr2/shaders/ffx_fsr2_lock.h rename to thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_lock.h index 8347fa86bcdf..61030f6bb84c 100644 --- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_lock.h +++ b/thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_lock.h @@ -1,13 +1,14 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. -// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// // Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal +// of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell // copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: +// furnished to do so, subject to the following conditions : +// // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_postprocess_lock_status.h b/thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_postprocess_lock_status.h similarity index 95% rename from thirdparty/amd-fsr2/shaders/ffx_fsr2_postprocess_lock_status.h rename to thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_postprocess_lock_status.h index cee9e148ba16..90ef34423eeb 100644 --- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_postprocess_lock_status.h +++ b/thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_postprocess_lock_status.h @@ -1,13 +1,14 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. -// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// // Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal +// of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell // copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: +// furnished to do so, subject to the following conditions : +// // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // diff --git a/thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_rcas.h b/thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_rcas.h new file mode 100644 index 000000000000..f4c976bbec0e --- /dev/null +++ b/thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_rcas.h @@ -0,0 +1,121 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#define GROUP_SIZE 8 +#define FSR_RCAS_DENOISE 1 + +#include "../ffx_core.h" + +#if FFX_HALF && defined(__XBOX_SCARLETT) && defined(__XBATG_EXTRA_16_BIT_OPTIMISATION) && (__XBATG_EXTRA_16_BIT_OPTIMISATION == 1) + #define FSR_RCAS_PREFER_PAIRED_VERSION 1 +#else + #define FSR_RCAS_PREFER_PAIRED_VERSION 0 +#endif + +void WriteUpscaledOutput(FFX_MIN16_U2 iPxHrPos, FfxFloat32x3 fUpscaledColor) +{ + StoreUpscaledOutput(FFX_MIN16_I2(iPxHrPos), fUpscaledColor); +} + +#if FSR_RCAS_PREFER_PAIRED_VERSION + + #define FSR_RCAS_HX2 1 + + FfxFloat16x4 FsrRcasLoadHx2(FfxInt16x2 p) + { + return FfxFloat16x4(LoadRCAS_Input(p)); + } + void FsrRcasInputHx2(inout FfxFloat16x2 r, inout FfxFloat16x2 g, inout FfxFloat16x2 b) + { + PrepareRgbPaired(r, g, b, Exposure(), PreExposure()); + } + +#else + + #define FSR_RCAS_F 1 + FfxFloat32x4 FsrRcasLoadF(FfxInt32x2 p) + { + FfxFloat32x4 fColor = LoadRCAS_Input(p); + + fColor.rgb = PrepareRgb(fColor.rgb, Exposure(), PreExposure()); + + return fColor; + } + void FsrRcasInputF(inout FfxFloat32 r, inout FfxFloat32 g, inout FfxFloat32 b) {} + +#endif // #if FSR_RCAS_PREFER_PAIRED_VERSION + +// GODOT BEGINS +// Workaround for Godot GLSL processor not supporting conditional include. +// Thus we have to take the include statement out of the conditional block. + +#include "../fsr1/ffx_fsr1.h" + +#if FSR_RCAS_PREFER_PAIRED_VERSION + +void CurrFilterPaired(FFX_MIN16_U2 pos) +{ + FfxFloat16x2 cr; + FfxFloat16x2 cg; + FfxFloat16x2 cb; + FsrRcasHx2(cr, cg, cb, pos, RCASConfig()); + + UnprepareRgbPaired(cr, cg, cb, Exposure()); + + WriteUpscaledOutput(pos, FfxFloat16x3(cr.x, cg.x, cb.x)); //TODO: fix type + pos.x += 8; + WriteUpscaledOutput(pos, FfxFloat16x3(cr.y, cg.y, cb.y)); //TODO: fix type +} + +#else + +void CurrFilter(FFX_MIN16_U2 pos) +{ + FfxFloat32x3 c; + FsrRcasF(c.r, c.g, c.b, pos, RCASConfig()); + + c = UnprepareRgb(c, Exposure()); + + WriteUpscaledOutput(pos, c); +} + +#endif // #if FSR_RCAS_PREFER_PAIRED_VERSION +// GODOT ENDS + +void RCAS(FfxUInt32x3 LocalThreadId, FfxUInt32x3 WorkGroupId, FfxUInt32x3 Dtid) +{ + // Do remapping of local xy in workgroup for a more PS-like swizzle pattern. + FfxUInt32x2 gxy = ffxRemapForQuad(LocalThreadId.x) + FfxUInt32x2(WorkGroupId.x << 4u, WorkGroupId.y << 4u); +#if FSR_RCAS_PREFER_PAIRED_VERSION + CurrFilterPaired(FFX_MIN16_U2(gxy)); + gxy.y += 8u; + CurrFilterPaired(FFX_MIN16_U2(gxy)); +#else + CurrFilter(FFX_MIN16_U2(gxy)); + gxy.x += 8u; + CurrFilter(FFX_MIN16_U2(gxy)); + gxy.y += 8u; + CurrFilter(FFX_MIN16_U2(gxy)); + gxy.x -= 8u; + CurrFilter(FFX_MIN16_U2(gxy)); +#endif +} diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_reconstruct_dilated_velocity_and_previous_depth.h b/thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_reconstruct_dilated_velocity_and_previous_depth.h similarity index 95% rename from thirdparty/amd-fsr2/shaders/ffx_fsr2_reconstruct_dilated_velocity_and_previous_depth.h rename to thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_reconstruct_dilated_velocity_and_previous_depth.h index e9ccc4bc8c28..1a4305d772a2 100644 --- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_reconstruct_dilated_velocity_and_previous_depth.h +++ b/thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_reconstruct_dilated_velocity_and_previous_depth.h @@ -1,13 +1,14 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. -// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// // Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal +// of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell // copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: +// furnished to do so, subject to the following conditions : +// // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_reproject.h b/thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_reproject.h similarity index 95% rename from thirdparty/amd-fsr2/shaders/ffx_fsr2_reproject.h rename to thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_reproject.h index f7f396129e14..386b29771b9c 100644 --- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_reproject.h +++ b/thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_reproject.h @@ -1,13 +1,14 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. -// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// // Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal +// of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell // copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: +// furnished to do so, subject to the following conditions : +// // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_resources.h b/thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_resources.h similarity index 97% rename from thirdparty/amd-fsr2/shaders/ffx_fsr2_resources.h rename to thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_resources.h index 535dbc383c7b..a597c5a5aed9 100644 --- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_resources.h +++ b/thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_resources.h @@ -1,13 +1,14 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. -// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// // Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal +// of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell // copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: +// furnished to do so, subject to the following conditions : +// // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_sample.h b/thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_sample.h similarity index 91% rename from thirdparty/amd-fsr2/shaders/ffx_fsr2_sample.h rename to thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_sample.h index f94f40aa793c..cd7142af53eb 100644 --- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_sample.h +++ b/thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_sample.h @@ -1,13 +1,14 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. -// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// // Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal +// of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell // copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: +// furnished to do so, subject to the following conditions : +// // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // @@ -138,14 +139,6 @@ FfxFloat32 Lanczos2(FfxFloat32 x) #if FFX_HALF -#if 0 -FFX_MIN16_F Lanczos2NoClamp(FFX_MIN16_F x) -{ - const FFX_MIN16_F PI = FFX_MIN16_F(3.141592653589793f); // TODO: share SDK constants - return abs(x) < FFX_MIN16_F(FSR2_EPSILON) ? FFX_MIN16_F(1.f) : (sin(PI * x) / (PI * x)) * (sin(FFX_MIN16_F(0.5f) * PI * x) / (FFX_MIN16_F(0.5f) * PI * x)); -} -#endif - FFX_MIN16_F Lanczos2(FFX_MIN16_F x) { x = ffxMin(abs(x), FFX_MIN16_F(2.0f)); @@ -168,6 +161,26 @@ FFX_MIN16_F Lanczos2ApproxSqNoClamp(FFX_MIN16_F x2) FFX_MIN16_F b = FFX_MIN16_F(1.0f / 4.0f) * x2 - FFX_MIN16_F(1); return (FFX_MIN16_F(25.0f / 16.0f) * a * a - FFX_MIN16_F(25.0f / 16.0f - 1)) * (b * b); } + +#if defined(__XBOX_SCARLETT) && defined(__XBATG_EXTRA_16_BIT_OPTIMISATION) && (__XBATG_EXTRA_16_BIT_OPTIMISATION == 1) + +FFX_MIN16_F2 PairedLanczos2ApproxSqNoClamp(FFX_MIN16_F2 x2) +{ + // Xbox ATG (Pavel): + // + // 2.0 * x2 - 5.0 25.0 25.0 - 16.0 (2.0 * x2 - 5.0)^2 - (3.0)^2 (2.0 * x2 - 8.0) * (2.0 * x2 - 2.0) (x2 - 4.0) * (x2 - 1.0) + // a = -------------- ==> ---- * a^2 - -------------- = ----------------------------- = ---------------------------------- = ----------------------- = b * (x2 - 1.0) + // 5.0 16.0 16.0 16.0 16.0 4.0 + // + // so we need to compute just (b * b) * (b * x2 - b), so we should get four packed instructions: 2 fma + 2 mul + // + + FFX_MIN16_F2 b = (0.25 * x2 - 1.0); + return (b * b) * (b * x2 - b); +} + +#endif + #endif //FFX_HALF FfxFloat32 Lanczos2ApproxSq(FfxFloat32 x2) @@ -182,6 +195,15 @@ FFX_MIN16_F Lanczos2ApproxSq(FFX_MIN16_F x2) x2 = ffxMin(x2, FFX_MIN16_F(4.0f)); return Lanczos2ApproxSqNoClamp(x2); } + +#if defined(__XBOX_SCARLETT) && defined(__XBATG_EXTRA_16_BIT_OPTIMISATION) && (__XBATG_EXTRA_16_BIT_OPTIMISATION == 1) +FFX_MIN16_F2 PairedLanczos2ApproxSq(FFX_MIN16_F2 x2) +{ + x2 = ffxMin(x2, FFX_MIN16_F2(4.0, 4.0)); + return PairedLanczos2ApproxSqNoClamp(x2); +} +#endif + #endif //FFX_HALF FfxFloat32 Lanczos2ApproxNoClamp(FfxFloat32 x) @@ -218,6 +240,21 @@ FFX_MIN16_F Lanczos2_UseLUT(FFX_MIN16_F x) { return FFX_MIN16_F(SampleLanczos2Weight(abs(x))); } + +#if defined(__XBOX_SCARLETT) && defined(__XBATG_EXTRA_16_BIT_OPTIMISATION) && (__XBATG_EXTRA_16_BIT_OPTIMISATION == 1) + +FFX_MIN16_F Lanczos2_UseLUTNoAbs(FFX_MIN16_F x) +{ + return SampleLanczos2Weight_NoValu(x); +} + +FFX_MIN16_F Lanczos2_UseLUTNoAbsNoA16(FfxFloat32 x) +{ + return SampleLanczos2Weight_NoValuNoA16(x); +} + +#endif + #endif //FFX_HALF FfxFloat32x4 Lanczos2_UseLUT(FfxFloat32x4 fColor0, FfxFloat32x4 fColor1, FfxFloat32x4 fColor2, FfxFloat32x4 fColor3, FfxFloat32 t) @@ -363,6 +400,19 @@ FfxFloat32x4 Lanczos2LUT(FetchedBicubicSamples Samples, FfxFloat32x2 fPxFrac) } #if FFX_HALF + +#if defined(__XBOX_SCARLETT) && defined(__XBATG_EXTRA_16_BIT_OPTIMISATION) && (__XBATG_EXTRA_16_BIT_OPTIMISATION == 1) +FFX_MIN16_F4 Lanczos2ApplyWeightX(FFX_MIN16_F4 fColor0, FFX_MIN16_F4 fColor1, FFX_MIN16_F4 fColor2, FFX_MIN16_F4 fColor3, FFX_MIN16_F2 fWeight0, FFX_MIN16_F2 fWeight1, FFX_MIN16_F2 fWeight2, FFX_MIN16_F2 fWeight3, FFX_MIN16_F2 fWeightSumInverted) +{ + return (((fWeight0.x * fColor0) + fWeight1.x * fColor1) + ((fWeight2.x * fColor2) + fWeight3.x * fColor3)) * fWeightSumInverted.x; +} + +FFX_MIN16_F4 Lanczos2ApplyWeightY(FFX_MIN16_F4 fColor0, FFX_MIN16_F4 fColor1, FFX_MIN16_F4 fColor2, FFX_MIN16_F4 fColor3, FFX_MIN16_F2 fWeight0, FFX_MIN16_F2 fWeight1, FFX_MIN16_F2 fWeight2, FFX_MIN16_F2 fWeight3, FFX_MIN16_F2 fWeightSumInverted) +{ + return (((fWeight0.y * fColor0) + fWeight1.y * fColor1) + ((fWeight2.y * fColor2) + fWeight3.y * fColor3)) * fWeightSumInverted.y; +} +#endif + FFX_MIN16_F4 Lanczos2LUT(FetchedBicubicSamplesMin16 Samples, FFX_MIN16_F2 fPxFrac) { FFX_MIN16_F4 fColorX0 = Lanczos2_UseLUT(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x); diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_tcr_autogen.h b/thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_tcr_autogen.h similarity index 97% rename from thirdparty/amd-fsr2/shaders/ffx_fsr2_tcr_autogen.h rename to thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_tcr_autogen.h index 101b75d25e4d..10970eff3ffb 100644 --- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_tcr_autogen.h +++ b/thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_tcr_autogen.h @@ -1,13 +1,14 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. -// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// // Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal +// of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell // copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: +// furnished to do so, subject to the following conditions : +// // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // @@ -52,7 +53,7 @@ FFX_MIN16_F ComputeAutoTC_01(FFX_MIN16_I2 uDispatchThreadId, FFX_MIN16_I2 iPrevI FFX_MIN16_F retVal = FFX_MIN16_F(ffxSaturate(dot(abs(abs(Y - X) - abs(W - Z)), FfxFloat32x3(1, 1, 1)))); // cleanup very small values - retVal = (retVal < getTcThreshold()) ? FFX_MIN16_F(0.0f) : FFX_MIN16_F(1.f); + retVal = (retVal < TcThreshold()) ? FFX_MIN16_F(0.0f) : FFX_MIN16_F(1.f); return retVal; } diff --git a/thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_upsample.h b/thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_upsample.h new file mode 100644 index 000000000000..2281d986dbd3 --- /dev/null +++ b/thirdparty/amd-ffx/gpu/fsr2/ffx_fsr2_upsample.h @@ -0,0 +1,460 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifndef FFX_FSR2_UPSAMPLE_H +#define FFX_FSR2_UPSAMPLE_H + +FFX_STATIC const FfxUInt32 iLanczos2SampleCount = 16; + +void Deringing(RectificationBox clippingBox, FFX_PARAMETER_INOUT FfxFloat32x3 fColor) +{ + fColor = clamp(fColor, clippingBox.aabbMin, clippingBox.aabbMax); +} +#if FFX_HALF +void Deringing(RectificationBoxMin16 clippingBox, FFX_PARAMETER_INOUT FFX_MIN16_F3 fColor) +{ + fColor = clamp(fColor, clippingBox.aabbMin, clippingBox.aabbMax); +} +#endif + +#ifndef FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE +#define FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE 2 // Approximate +#endif + +FfxFloat32 GetUpsampleLanczosWeight(FfxFloat32x2 fSrcSampleOffset, FfxFloat32 fKernelWeight) +{ + FfxFloat32x2 fSrcSampleOffsetBiased = fSrcSampleOffset * fKernelWeight.xx; +#if FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 0 // LANCZOS_TYPE_REFERENCE + FfxFloat32 fSampleWeight = Lanczos2(length(fSrcSampleOffsetBiased)); +#elif FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 1 // LANCZOS_TYPE_LUT + FfxFloat32 fSampleWeight = Lanczos2_UseLUT(length(fSrcSampleOffsetBiased)); +#elif FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 2 // LANCZOS_TYPE_APPROXIMATE + FfxFloat32 fSampleWeight = Lanczos2ApproxSq(dot(fSrcSampleOffsetBiased, fSrcSampleOffsetBiased)); +#else +#error "Invalid Lanczos type" +#endif + return fSampleWeight; +} + +#if FFX_HALF +FFX_MIN16_F GetUpsampleLanczosWeight(FFX_MIN16_F2 fSrcSampleOffset, FFX_MIN16_F fKernelWeight) +{ + FFX_MIN16_F2 fSrcSampleOffsetBiased = fSrcSampleOffset * fKernelWeight.xx; +#if FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 0 // LANCZOS_TYPE_REFERENCE + FFX_MIN16_F fSampleWeight = Lanczos2(length(fSrcSampleOffsetBiased)); +#elif FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 1 // LANCZOS_TYPE_LUT + FFX_MIN16_F fSampleWeight = Lanczos2_UseLUT(length(fSrcSampleOffsetBiased)); +#elif FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 2 // LANCZOS_TYPE_APPROXIMATE + FFX_MIN16_F fSampleWeight = Lanczos2ApproxSq(dot(fSrcSampleOffsetBiased, fSrcSampleOffsetBiased)); + + // To Test: Save reciproqual sqrt compute + // FfxFloat32 fSampleWeight = Lanczos2Sq_UseLUT(dot(fSrcSampleOffsetBiased, fSrcSampleOffsetBiased)); +#else +#error "Invalid Lanczos type" +#endif + return fSampleWeight; +} +#endif + +FfxFloat32 ComputeMaxKernelWeight() { + const FfxFloat32 fKernelSizeBias = 1.0f; + + FfxFloat32 fKernelWeight = FfxFloat32(1) + (FfxFloat32(1.0f) / FfxFloat32x2(DownscaleFactor()) - FfxFloat32(1)).x * FfxFloat32(fKernelSizeBias); + + return ffxMin(FfxFloat32(1.99f), fKernelWeight); +} + + +#if FFX_HALF && (FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 2) && defined(__XBOX_SCARLETT) && defined(__XBATG_EXTRA_16_BIT_OPTIMISATION) && (__XBATG_EXTRA_16_BIT_OPTIMISATION == 1) +#define FFX_FSR2_USE_XBOX_PAIRED_16BIT_MATH_OPTIMIZATIONS 1 +#else +#define FFX_FSR2_USE_XBOX_PAIRED_16BIT_MATH_OPTIMIZATIONS 0 +#endif + +#if FFX_FSR2_USE_XBOX_PAIRED_16BIT_MATH_OPTIMIZATIONS + +FFX_MIN16_F2 Bool2ToFloat16x2(bool x, bool y) +{ + uint lo = x ? 0x00003c00 : 0x00000000; + uint hi = y ? 0x3c000000 : 0x00000000; + + return FFX_MIN16_F2(__XB_AsHalf(lo).x, __XB_AsHalf(hi).y); +} + +struct PairedRectificationBoxAndAccumulatedColorAndWeight +{ + FFX_MIN16_F2 aabbMinRG; + FFX_MIN16_F2 aabbMinB; + + FFX_MIN16_F2 aabbMaxRG; + FFX_MIN16_F2 aabbMaxB; + + FFX_MIN16_F2 boxCenterRG; + FFX_MIN16_F2 boxCenterB; + + FFX_MIN16_F2 boxVecRG; + FFX_MIN16_F2 boxVecB; + + FFX_MIN16_F2 fBoxCenterWeight; + + FFX_MIN16_F2 fColorRG; + FFX_MIN16_F2 fColorB; + FFX_MIN16_F2 fWeight; + + FFX_MIN16_F fKernelBiasSq; + FfxFloat32 fRectificationCurveBias; + + void setKernelBiasAndRectificationCurveBias(FfxFloat32 kernelBias, FfxFloat32 rectificationCurveBias) + { + fKernelBiasSq = FFX_MIN16_F(kernelBias * kernelBias); + fRectificationCurveBias = rectificationCurveBias; + } + + void init(FFX_MIN16_F fSrcSampleOffsetSq, bool sampleOnScreenX, bool sampleOnScreenY, FFX_MIN16_F3 colorSample) + { + // NOTE: make sure exp has 32-bit precision + const FFX_MIN16_F fBoxSampleWeight = FFX_MIN16_F( + exp(fRectificationCurveBias * FfxFloat32(fSrcSampleOffsetSq)) + ); + +#if FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 2 // LANCZOS_TYPE_APPROXIMATE + const FFX_MIN16_F2 LanczosUpsampleWeight = PairedLanczos2ApproxSq(fSrcSampleOffsetSq * fKernelBiasSq); +#else +#error "Only LANCZOS_TYPE_APPROXIMATE is supported in paired version so far" +#endif + const FFX_MIN16_F2 fSampleWeight = FFX_MIN16_F2((sampleOnScreenX && sampleOnScreenY ? 1.0 : 0.0), 0.0) * LanczosUpsampleWeight; + + aabbMinRG = colorSample.rg; + aabbMinB = colorSample.bb; + + aabbMaxRG = colorSample.rg; + aabbMaxB = colorSample.bb; + + boxCenterRG = colorSample.rg * fBoxSampleWeight.x; + boxCenterB = colorSample.bb * fBoxSampleWeight; + + boxVecRG = colorSample.rg * boxCenterRG; + boxVecB = colorSample.bb * boxCenterB; + + fBoxCenterWeight = fBoxSampleWeight; + + fColorRG = colorSample.rg * fSampleWeight.x; + fColorB = colorSample.bb * fSampleWeight; + fWeight = fSampleWeight; + } + + void addSample(FFX_MIN16_F2 fSrcSampleOffsetSq, bool sample0OnScreen, bool sample1OnScreen, bool sample01OnScreen, FFX_MIN16_F3 ColorSample0, FFX_MIN16_F3 ColorSample1) + { + // NOTE: make sure exp has 32-bit precision + const FFX_MIN16_F2 fBoxSampleWeight = FFX_MIN16_F2( + exp(fRectificationCurveBias * FfxFloat32(fSrcSampleOffsetSq.x)), + exp(fRectificationCurveBias * FfxFloat32(fSrcSampleOffsetSq.y)) + ); + +#if FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 2 // LANCZOS_TYPE_APPROXIMATE + const FFX_MIN16_F2 LanczosUpsampleWeight = PairedLanczos2ApproxSq(fSrcSampleOffsetSq * fKernelBiasSq); +#else +#error "Only LANCZOS_TYPE_APPROXIMATE is supported in paired version so far" +#endif + const FFX_MIN16_F2 fSampleWeight = Bool2ToFloat16x2(sample0OnScreen && sample01OnScreen, sample1OnScreen && sample01OnScreen) * LanczosUpsampleWeight; + + FFX_MIN16_F2 colorSampleB = FFX_MIN16_F2(ColorSample0.b, ColorSample1.b); + + aabbMinRG = ffxMin(aabbMinRG, ColorSample0.rg); + aabbMinRG = ffxMin(aabbMinRG, ColorSample1.rg); + aabbMinB = ffxMin(aabbMinB, colorSampleB); + + aabbMaxRG = ffxMax(aabbMaxRG, ColorSample0.rg); + aabbMaxRG = ffxMax(aabbMaxRG, ColorSample1.rg); + aabbMaxB = ffxMax(aabbMaxB, colorSampleB); + + FFX_MIN16_F2 weightedColorSampleRG0 = ColorSample0.rg * fBoxSampleWeight.x; + FFX_MIN16_F2 weightedColorSampleRG1 = ColorSample1.rg * fBoxSampleWeight.y; + FFX_MIN16_F2 weightedColorSampleB = colorSampleB * fBoxSampleWeight; + + boxCenterRG += weightedColorSampleRG0; + boxCenterRG += weightedColorSampleRG1; + boxCenterB += weightedColorSampleB; + + boxVecRG += ColorSample0.rg * weightedColorSampleRG0; + boxVecRG += ColorSample1.rg * weightedColorSampleRG1; + boxVecB += colorSampleB * weightedColorSampleB; + + fBoxCenterWeight += fBoxSampleWeight; + + fWeight += fSampleWeight; + fColorRG += (ColorSample0.rg * fSampleWeight.x) + (ColorSample1.rg * fSampleWeight.y); + fColorB += colorSampleB * fSampleWeight; + } + + void finalize(FFX_PARAMETER_INOUT RectificationBox rectificationBox, FFX_PARAMETER_INOUT FfxFloat32x4 outColorAndWeight) + { + rectificationBox.aabbMin.r = FfxFloat32(aabbMinRG.x); + rectificationBox.aabbMin.g = FfxFloat32(aabbMinRG.y); + rectificationBox.aabbMin.b = FfxFloat32(ffxMin(aabbMinB.x, aabbMinB.y)); + + rectificationBox.aabbMax.r = FfxFloat32(aabbMaxRG.x); + rectificationBox.aabbMax.g = FfxFloat32(aabbMaxRG.y); + rectificationBox.aabbMax.b = FfxFloat32(ffxMax(aabbMaxB.x, aabbMaxB.y)); + + rectificationBox.boxCenter.r = FfxFloat32(boxCenterRG.x); + rectificationBox.boxCenter.g = FfxFloat32(boxCenterRG.y); + rectificationBox.boxCenter.b = FfxFloat32(boxCenterB.x + boxCenterB.y); + + rectificationBox.boxVec.r = FfxFloat32(boxVecRG.x); + rectificationBox.boxVec.g = FfxFloat32(boxVecRG.y); + rectificationBox.boxVec.b = FfxFloat32(boxVecB.x + boxVecB.y); + + rectificationBox.fBoxCenterWeight = FfxFloat32(fBoxCenterWeight.x + fBoxCenterWeight.y); + + outColorAndWeight = FfxFloat32x4(fColorRG, fColorB.x + fColorB.y, fWeight.x + fWeight.y); + } +}; +#endif + +FfxFloat32x4 ComputeUpsampledColorAndWeight(const AccumulationPassCommonParams params, + FFX_PARAMETER_INOUT RectificationBox clippingBox, FfxFloat32 fReactiveFactor) +{ + // We compute a sliced lanczos filter with 2 lobes (other slices are accumulated temporaly) + FfxFloat32x2 fDstOutputPos = FfxFloat32x2(params.iPxHrPos) + FFX_BROADCAST_FLOAT32X2(0.5f); // Destination resolution output pixel center position + FfxFloat32x2 fSrcOutputPos = fDstOutputPos * DownscaleFactor(); // Source resolution output pixel center position + FfxInt32x2 iSrcInputPos = FfxInt32x2(floor(fSrcOutputPos)); // TODO: what about weird upscale factors... + +#if FFX_FSR2_USE_XBOX_PAIRED_16BIT_MATH_OPTIMIZATIONS + FFX_MIN16_F3 fSamples[iLanczos2SampleCount]; +#else + FfxFloat32x3 fSamples[iLanczos2SampleCount]; +#endif + + FfxFloat32x2 fSrcUnjitteredPos = (FfxFloat32x2(iSrcInputPos) + FfxFloat32x2(0.5f, 0.5f)) - Jitter(); // This is the un-jittered position of the sample at offset 0,0 + + FfxInt32x2 offsetTL; + offsetTL.x = (fSrcUnjitteredPos.x > fSrcOutputPos.x) ? FfxInt32(-2) : FfxInt32(-1); + offsetTL.y = (fSrcUnjitteredPos.y > fSrcOutputPos.y) ? FfxInt32(-2) : FfxInt32(-1); + + //Load samples + // If fSrcUnjitteredPos.y > fSrcOutputPos.y, indicates offsetTL.y = -2, sample offset Y will be [-2, 1], clipbox will be rows [1, 3]. + // Flip row# for sampling offset in this case, so first 0~2 rows in the sampled array can always be used for computing the clipbox. + // This reduces branch or cmove on sampled colors, but moving this overhead to sample position / weight calculation time which apply to less values. + const FfxBoolean bFlipRow = fSrcUnjitteredPos.y > fSrcOutputPos.y; + const FfxBoolean bFlipCol = fSrcUnjitteredPos.x > fSrcOutputPos.x; + +#if FFX_FSR2_USE_XBOX_PAIRED_16BIT_MATH_OPTIMIZATIONS + // Unroll the loop to load samples on Scarlett to help the shader compiler + const FFX_MIN16_F2 fSampleOffsetX02 = __XB_AsHalf(bFlipCol ? __XB_AsUInt(FFX_MIN16_F2( 1, -1)) : __XB_AsUInt(FFX_MIN16_F2(-1, 1))); + const FFX_MIN16_F2 fSampleOffsetY02 = __XB_AsHalf(bFlipRow ? __XB_AsUInt(FFX_MIN16_F2( 1, -1)) : __XB_AsUInt(FFX_MIN16_F2(-1, 1))); + + typedef FfxInt32 FfxTexCoordI; + typedef FfxInt32x2 FfxTexCoordI2; + + const FfxTexCoordI2 iSrcSamplePosX01 = FfxTexCoordI2(iSrcInputPos.xx) + (bFlipCol ? FfxTexCoordI2( 1, 0) : FfxTexCoordI2(-1, 0)); + const FfxTexCoordI2 iSrcSamplePosX23 = FfxTexCoordI2(iSrcInputPos.xx) + (bFlipCol ? FfxTexCoordI2(-1, -2) : FfxTexCoordI2( 1, 2)); + + const FfxTexCoordI2 iSrcSamplePosY01 = FfxTexCoordI2(iSrcInputPos.yy) + (bFlipRow ? FfxTexCoordI2( 1, 0) : FfxTexCoordI2(-1, 0)); + const FfxTexCoordI2 iSrcSamplePosY23 = FfxTexCoordI2(iSrcInputPos.yy) + (bFlipRow ? FfxTexCoordI2(-1, -2) : FfxTexCoordI2( 1, 2)); + + const FfxTexCoordI2 renderSizeLastTexelCoord = FfxTexCoordI2(RenderSize()) - FfxTexCoordI2(1, 1); + + const FfxTexCoordI2 iSrcSamplePosX01Clamped = FfxTexCoordI2( + __XB_Med3_I32(iSrcSamplePosX01.x, 0, renderSizeLastTexelCoord.x), + __XB_Med3_I32(iSrcSamplePosX01.y, 0, renderSizeLastTexelCoord.x) + ); + + const FfxTexCoordI2 iSrcSamplePosX23Clamped = FfxTexCoordI2( + __XB_Med3_I32(iSrcSamplePosX23.x, 0, renderSizeLastTexelCoord.x), + __XB_Med3_I32(iSrcSamplePosX23.y, 0, renderSizeLastTexelCoord.x) + ); + + const FfxTexCoordI2 iSrcSamplePosY01Clamped = FfxTexCoordI2( + __XB_Med3_I32(iSrcSamplePosY01.x, 0, renderSizeLastTexelCoord.y), + __XB_Med3_I32(iSrcSamplePosY01.y, 0, renderSizeLastTexelCoord.y) + ); + + const FfxTexCoordI2 iSrcSamplePosY23Clamped = FfxTexCoordI2( + __XB_Med3_I32(iSrcSamplePosY23.x, 0, renderSizeLastTexelCoord.y), + __XB_Med3_I32(iSrcSamplePosY23.y, 0, renderSizeLastTexelCoord.y) + ); + + fSamples[ 0] = LoadPreparedInputColorHalf(FfxTexCoordI2(iSrcSamplePosX01Clamped.x, iSrcSamplePosY01Clamped.x)); + fSamples[ 1] = LoadPreparedInputColorHalf(FfxTexCoordI2(iSrcSamplePosX01Clamped.y, iSrcSamplePosY01Clamped.x)); + fSamples[ 2] = LoadPreparedInputColorHalf(FfxTexCoordI2(iSrcSamplePosX23Clamped.x, iSrcSamplePosY01Clamped.x)); + + fSamples[4 + 0] = LoadPreparedInputColorHalf(FfxTexCoordI2(iSrcSamplePosX01Clamped.x, iSrcSamplePosY01Clamped.y)); + fSamples[4 + 1] = LoadPreparedInputColorHalf(FfxTexCoordI2(iSrcSamplePosX01Clamped.y, iSrcSamplePosY01Clamped.y)); + fSamples[4 + 2] = LoadPreparedInputColorHalf(FfxTexCoordI2(iSrcSamplePosX23Clamped.x, iSrcSamplePosY01Clamped.y)); + + fSamples[8 + 0] = LoadPreparedInputColorHalf(FfxTexCoordI2(iSrcSamplePosX01Clamped.x, iSrcSamplePosY23Clamped.x)); + fSamples[8 + 1] = LoadPreparedInputColorHalf(FfxTexCoordI2(iSrcSamplePosX01Clamped.y, iSrcSamplePosY23Clamped.x)); + fSamples[8 + 2] = LoadPreparedInputColorHalf(FfxTexCoordI2(iSrcSamplePosX23Clamped.x, iSrcSamplePosY23Clamped.x)); + + fSamples[12 + 0] = LoadPreparedInputColorHalf(FfxTexCoordI2(iSrcSamplePosX01Clamped.x, iSrcSamplePosY23Clamped.y)); + fSamples[12 + 1] = LoadPreparedInputColorHalf(FfxTexCoordI2(iSrcSamplePosX01Clamped.y, iSrcSamplePosY23Clamped.y)); + fSamples[12 + 2] = LoadPreparedInputColorHalf(FfxTexCoordI2(iSrcSamplePosX23Clamped.x, iSrcSamplePosY23Clamped.y)); + +#else + FfxFloat32x2 fOffsetTL = FfxFloat32x2(offsetTL); + + FFX_UNROLL + for (FfxInt32 row = 0; row < 3; row++) { + + FFX_UNROLL + for (FfxInt32 col = 0; col < 3; col++) { + FfxInt32 iSampleIndex = col + (row << 2); + + FfxInt32x2 sampleColRow = FfxInt32x2(bFlipCol ? (3 - col) : col, bFlipRow ? (3 - row) : row); + FfxInt32x2 iSrcSamplePos = FfxInt32x2(iSrcInputPos) + offsetTL + sampleColRow; + + const FfxInt32x2 sampleCoord = ClampLoad(iSrcSamplePos, FfxInt32x2(0, 0), FfxInt32x2(RenderSize())); + + fSamples[iSampleIndex] = LoadPreparedInputColor(FfxInt32x2(sampleCoord)); + } + } +#endif + + FfxFloat32x4 fColorAndWeight = FfxFloat32x4(0.0f, 0.0f, 0.0f, 0.0f); + + FfxFloat32x2 fBaseSampleOffset = FfxFloat32x2(fSrcUnjitteredPos - fSrcOutputPos); + + // Identify how much of each upsampled color to be used for this frame + const FfxFloat32 fKernelReactiveFactor = ffxMax(fReactiveFactor, FfxFloat32(params.bIsNewSample)); + const FfxFloat32 fKernelBiasMax = ComputeMaxKernelWeight() * (1.0f - fKernelReactiveFactor); + + const FfxFloat32 fKernelBiasMin = ffxMax(1.0f, ((1.0f + fKernelBiasMax) * 0.3f)); + const FfxFloat32 fKernelBiasFactor = ffxMax(0.0f, ffxMax(0.25f * params.fDepthClipFactor, fKernelReactiveFactor)); + const FfxFloat32 fKernelBias = ffxLerp(fKernelBiasMax, fKernelBiasMin, fKernelBiasFactor); + + const FfxFloat32 fRectificationCurveBias = ffxLerp(-2.0f, -3.0f, ffxSaturate(params.fHrVelocity / 50.0f)); + +#if FFX_FSR2_USE_XBOX_PAIRED_16BIT_MATH_OPTIMIZATIONS + // Unroll the loop to load samples on Scarlett to help the shader compiler + const bool coordX0OnScreen = iSrcSamplePosX01.x == iSrcSamplePosX01Clamped.x; + const bool coordX1OnScreen = iSrcSamplePosX01.y == iSrcSamplePosX01Clamped.y; + const bool coordX2OnScreen = iSrcSamplePosX23.x == iSrcSamplePosX23Clamped.x; + + const bool coordY0OnScreen = iSrcSamplePosY01.x == iSrcSamplePosY01Clamped.x; + const bool coordY1OnScreen = iSrcSamplePosY01.y == iSrcSamplePosY01Clamped.y; + const bool coordY2OnScreen = iSrcSamplePosY23.x == iSrcSamplePosY23Clamped.x; + + const FFX_MIN16_F2 fBaseSampleOffsetHalf = FFX_MIN16_F2(fBaseSampleOffset); + + const FFX_MIN16_F2 fSrcSampleOffsetX_02 = fBaseSampleOffsetHalf.xx + fSampleOffsetX02; + const FFX_MIN16_F2 fSrcSampleOffsetY_02 = fBaseSampleOffsetHalf.yy + fSampleOffsetY02; + + const FFX_MIN16_F2 fSrcSampleOffsetXSq_02 = fSrcSampleOffsetX_02 * fSrcSampleOffsetX_02; + const FFX_MIN16_F2 fSrcSampleOffsetYSq_02 = fSrcSampleOffsetY_02 * fSrcSampleOffsetY_02; + const FFX_MIN16_F2 fSrcSampleOffsetXYSq_11 = fBaseSampleOffsetHalf * fBaseSampleOffsetHalf; + + PairedRectificationBoxAndAccumulatedColorAndWeight pairedBox; + pairedBox.setKernelBiasAndRectificationCurveBias(fKernelBias, fRectificationCurveBias); + + // init by o o o + // o x o + // o o o + pairedBox.init( + fSrcSampleOffsetXYSq_11.x + fSrcSampleOffsetXYSq_11.y, + coordX1OnScreen, coordY1OnScreen, + fSamples[5] + ); + + // add remaining two samples from 1st row x o x + // o * o + // o o o + pairedBox.addSample( + fSrcSampleOffsetXSq_02 + fSrcSampleOffsetYSq_02.xx, + coordX0OnScreen, coordX2OnScreen, coordY0OnScreen, + fSamples[0 + 0], fSamples[0 + 2] + ); + + // add two samples from 2nd row * o * + // o * o + // x o x + pairedBox.addSample( + fSrcSampleOffsetXSq_02 + fSrcSampleOffsetYSq_02.yy, + coordX0OnScreen, coordX2OnScreen, coordY2OnScreen, + fSamples[8 + 0], fSamples[8 + 2] + ); + + // add two samples from 3rd row * o * + // x * x + // * o * + pairedBox.addSample( + fSrcSampleOffsetXSq_02 + fSrcSampleOffsetXYSq_11.yy, + coordX0OnScreen, coordX2OnScreen, coordY1OnScreen, + fSamples[4 + 0], fSamples[4 + 2] + ); + + // add remaining samples * x * + // * * * + // * x * + pairedBox.addSample( + fSrcSampleOffsetXYSq_11.xx + fSrcSampleOffsetYSq_02, + coordY0OnScreen, coordY2OnScreen, coordX1OnScreen, + fSamples[0 + 1], fSamples[8 + 1] + ); + + pairedBox.finalize(clippingBox, fColorAndWeight); +#else + FFX_UNROLL + for (FfxInt32 row = 0; row < 3; row++) { + FFX_UNROLL + for (FfxInt32 col = 0; col < 3; col++) { + FfxInt32 iSampleIndex = col + (row << 2); + + const FfxInt32x2 sampleColRow = FfxInt32x2(bFlipCol ? (3 - col) : col, bFlipRow ? (3 - row) : row); + const FfxFloat32x2 fOffset = fOffsetTL + FfxFloat32x2(sampleColRow); + FfxFloat32x2 fSrcSampleOffset = fBaseSampleOffset + fOffset; + + FfxInt32x2 iSrcSamplePos = FfxInt32x2(iSrcInputPos) + FfxInt32x2(offsetTL) + sampleColRow; + + const FfxFloat32 fOnScreenFactor = FfxFloat32(IsOnScreen(FfxInt32x2(iSrcSamplePos), FfxInt32x2(RenderSize()))); + FfxFloat32 fSampleWeight = fOnScreenFactor * FfxFloat32(GetUpsampleLanczosWeight(fSrcSampleOffset, fKernelBias)); + + fColorAndWeight += FfxFloat32x4(fSamples[iSampleIndex] * fSampleWeight, fSampleWeight); + + // Update rectification box + { + const FfxFloat32 fSrcSampleOffsetSq = dot(fSrcSampleOffset, fSrcSampleOffset); + const FfxFloat32 fBoxSampleWeight = exp(fRectificationCurveBias * fSrcSampleOffsetSq); + + const FfxBoolean bInitialSample = (row == 0) && (col == 0); + RectificationBoxAddSample(bInitialSample, clippingBox, fSamples[iSampleIndex], fBoxSampleWeight); + } + } + } +#endif + + RectificationBoxComputeVarianceBoxData(clippingBox); + + fColorAndWeight.w *= FfxFloat32(fColorAndWeight.w > FSR2_EPSILON); + + if (fColorAndWeight.w > FSR2_EPSILON) { + // Normalize for deringing (we need to compare colors) + fColorAndWeight.xyz = fColorAndWeight.xyz / fColorAndWeight.w; + fColorAndWeight.w *= fUpsampleLanczosWeightScale; + + Deringing(clippingBox, fColorAndWeight.xyz); + } + + return fColorAndWeight; +} + +#endif //!defined( FFX_FSR2_UPSAMPLE_H ) diff --git a/thirdparty/amd-ffx/gpu/fsr3/ffx_fsr3_callbacks_hlsl.h b/thirdparty/amd-ffx/gpu/fsr3/ffx_fsr3_callbacks_hlsl.h new file mode 100644 index 000000000000..b8240604f9c3 --- /dev/null +++ b/thirdparty/amd-ffx/gpu/fsr3/ffx_fsr3_callbacks_hlsl.h @@ -0,0 +1,24 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "../fsr2/ffx_fsr2_callbacks_hlsl.h" +#include "ffx_fsr3_resources.h" diff --git a/thirdparty/amd-ffx/gpu/fsr3/ffx_fsr3_common.h b/thirdparty/amd-ffx/gpu/fsr3/ffx_fsr3_common.h new file mode 100644 index 000000000000..f8c935c7eeb8 --- /dev/null +++ b/thirdparty/amd-ffx/gpu/fsr3/ffx_fsr3_common.h @@ -0,0 +1,25 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "../fsr2/ffx_fsr2_common.h" +#include "../frameinterpolation/ffx_frameinterpolation_common.h" +#include "../opticalflow/ffx_opticalflow_common.h" diff --git a/thirdparty/amd-ffx/gpu/fsr3/ffx_fsr3_resources.h b/thirdparty/amd-ffx/gpu/fsr3/ffx_fsr3_resources.h new file mode 100644 index 000000000000..bb0614a4efbe --- /dev/null +++ b/thirdparty/amd-ffx/gpu/fsr3/ffx_fsr3_resources.h @@ -0,0 +1,51 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifndef FFX_FSR3_RESOURCES_H +#define FFX_FSR3_RESOURCES_H + +#include "../fsr2/ffx_fsr2_resources.h" +#include "../frameinterpolation/ffx_frameinterpolation_resources.h" + +#if defined(FFX_CPU) || defined(FFX_GPU) +#define FFX_FSR3_RESOURCE_IDENTIFIER_NULL 0 + +#define FFX_FSR3_RESOURCE_IDENTIFIER_OPTICAL_FLOW_VECTOR 1 +#define FFX_FSR3_RESOURCE_IDENTIFIER_OPTICAL_FLOW_SCD_OUTPUT 2 +#define FFX_FSR3_RESOURCE_IDENTIFIER_DILATED_DEPTH_0 5 +#define FFX_FSR3_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS_0 6 +#define FFX_FSR3_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH_0 7 +#define FFX_FSR3_RESOURCE_IDENTIFIER_DILATED_DEPTH_1 8 +#define FFX_FSR3_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS_1 9 +#define FFX_FSR3_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH_1 10 +#define FFX_FSR3_RESOURCE_IDENTIFIER_DILATED_DEPTH_2 11 +#define FFX_FSR3_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS_2 12 +#define FFX_FSR3_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH_2 13 +#define FFX_FSR3_RESOURCE_IDENTIFIER_DILATED_DEPTH_3 14 +#define FFX_FSR3_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS_3 15 +#define FFX_FSR3_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH_3 16 + +#define FFX_FSR3_RESOURCE_IDENTIFIER_COUNT 17 +#define FFX_FSR3_RESOURCE_IDENTIFIER_UPSCALED_COUNT 3 +#endif // #if defined(FFX_CPU) || defined(FFX_GPU) + +#endif //!defined( FFX_FSR2_RESOURCES_H ) diff --git a/thirdparty/amd-ffx/gpu/fsr3upscaler/ffx_fsr3upscaler_accumulate.h b/thirdparty/amd-ffx/gpu/fsr3upscaler/ffx_fsr3upscaler_accumulate.h new file mode 100644 index 000000000000..a8c6daa0e6cb --- /dev/null +++ b/thirdparty/amd-ffx/gpu/fsr3upscaler/ffx_fsr3upscaler_accumulate.h @@ -0,0 +1,172 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +void Accumulate(const AccumulationPassCommonParams params, FFX_PARAMETER_INOUT AccumulationPassData data) +{ + // Avoid invalid values when accumulation and upsampled weight is 0 + data.fHistoryWeight *= FfxFloat32(data.fHistoryWeight > FSR3UPSCALER_FP16_MIN); + data.fHistoryWeight = ffxMax(FSR3UPSCALER_EPSILON, data.fHistoryWeight + data.fUpsampledWeight); + +#if FFX_FSR3UPSCALER_OPTION_HDR_COLOR_INPUT + //YCoCg -> RGB -> Tonemap -> YCoCg (Use RGB tonemapper to avoid color desaturation) + data.fUpsampledColor = RGBToYCoCg(Tonemap(YCoCgToRGB(data.fUpsampledColor))); + data.fHistoryColor = RGBToYCoCg(Tonemap(YCoCgToRGB(data.fHistoryColor))); +#endif + + const FfxFloat32 fAlpha = ffxSaturate(data.fUpsampledWeight / data.fHistoryWeight); + data.fHistoryColor = ffxLerp(data.fHistoryColor, data.fUpsampledColor, fAlpha); + data.fHistoryColor = YCoCgToRGB(data.fHistoryColor); + +#if FFX_FSR3UPSCALER_OPTION_HDR_COLOR_INPUT + data.fHistoryColor = InverseTonemap(data.fHistoryColor); +#endif +} + +void RectifyHistory( + const AccumulationPassCommonParams params, + FFX_PARAMETER_INOUT AccumulationPassData data +) +{ + const FfxFloat32 f4kVelocityFactor = ffxSaturate(params.f4KVelocity / 20.0f); + const FfxFloat32 fDistanceFactor = ffxSaturate(0.75f - params.fFarthestDepthInMeters / 20.0f); + const FfxFloat32 fAccumulationFactor = 1.0f - params.fAccumulation; + const FfxFloat32 fReactiveFactor = ffxPow(params.fReactiveMask, 1.0f / 2.0f); + const FfxFloat32 fShadingChangeFactor = params.fShadingChange; + const FfxFloat32 fBoxScaleT = ffxMax(f4kVelocityFactor, ffxMax(fDistanceFactor, ffxMax(fAccumulationFactor, ffxMax(fReactiveFactor, fShadingChangeFactor)))); + + const FfxFloat32 fBoxScale = ffxLerp(3.0f, 1.0f, fBoxScaleT); + const FfxFloat32x3 fScaledBoxVec = data.clippingBox.boxVec * FfxFloat32x3(1.7f, 1.0f, 1.0f) * fBoxScale; + + const FfxFloat32x3 fClampedScaledBoxVec = ffxMax(fScaledBoxVec, FfxFloat32x3(1.193e-7f, 1.193e-7f, 1.193e-7f)); + const FfxFloat32x3 fTransformedHistoryColor = (data.fHistoryColor - data.clippingBox.boxCenter) / fClampedScaledBoxVec; + + if (length(fTransformedHistoryColor)>1.f) { + const FfxFloat32x3 fClampedHistoryColor = normalize(fTransformedHistoryColor); + const FfxFloat32x3 fFinalClampedHistoryColor = (fClampedHistoryColor * fScaledBoxVec) + data.clippingBox.boxCenter; + + // Scale history color using rectification info, also using accumulation mask to avoid potential invalid color protection + const FfxFloat32 fHistoryContribution = ffxMax(params.fLumaInstabilityFactor, data.fLockContributionThisFrame) * params.fAccumulation * (1 - params.fDisocclusion); + data.fHistoryColor = ffxLerp(fFinalClampedHistoryColor, data.fHistoryColor, ffxSaturate(fHistoryContribution)); + } +} + +void UpdateLockStatus(AccumulationPassCommonParams params, FFX_PARAMETER_INOUT AccumulationPassData data) +{ + data.fLock *= FfxFloat32(params.bIsNewSample == false); + + const FfxFloat32 fLifetimeDecreaseFactor = ffxMax(ffxSaturate(params.fShadingChange), ffxMax(params.fReactiveMask, params.fDisocclusion)); + data.fLock = ffxMax(0.0f, data.fLock - fLifetimeDecreaseFactor * fLockMax); + + // Compute this frame lock contribution + data.fLockContributionThisFrame = ffxSaturate(ffxSaturate(data.fLock - fLockThreshold) * (fLockMax - fLockThreshold)); + + const FfxFloat32 fNewLockIntensity = LoadRwNewLocks(params.iPxHrPos) * (1.0f - ffxMax(params.fShadingChange * 0, params.fReactiveMask)); + data.fLock = ffxMax(0.0f, ffxMin(data.fLock + fNewLockIntensity, fLockMax)); + + // Preparing for next frame + const FfxFloat32 fLifetimeDecrease = (0.1f / JitterSequenceLength()) * (1.0f - fLifetimeDecreaseFactor); + data.fLock = ffxMax(0.0f, data.fLock - fLifetimeDecrease); + + // we expect similar motion for next frame + // kill lock if that location is outside screen, avoid locks to be clamped to screen borders + const FfxFloat32x2 fEstimatedUvNextFrame = params.fHrUv - params.fMotionVector; + data.fLock *= FfxFloat32(IsUvInside(fEstimatedUvNextFrame) == true); +} + +void ComputeBaseAccumulationWeight(const AccumulationPassCommonParams params, FFX_PARAMETER_INOUT AccumulationPassData data) +{ + FfxFloat32 fBaseAccumulation = params.fAccumulation; + + fBaseAccumulation = ffxMin(fBaseAccumulation, ffxLerp(fBaseAccumulation, 0.15f, ffxSaturate(ffxMax(0.0f, (params.f4KVelocity * VelocityFactor()) / 0.5f)))); + + data.fHistoryWeight = fBaseAccumulation; +} + +void InitPassData(FfxInt32x2 iPxHrPos, FFX_PARAMETER_INOUT AccumulationPassCommonParams params, FFX_PARAMETER_INOUT AccumulationPassData data) +{ + // Init constant params + params.iPxHrPos = iPxHrPos; + const FfxFloat32x2 fHrUv = (iPxHrPos + 0.5f) / UpscaleSize(); + params.fHrUv = fHrUv; + params.fLrUvJittered = fHrUv + Jitter() / RenderSize(); + params.fLrUv_HwSampler = ClampUv(params.fLrUvJittered, RenderSize(), MaxRenderSize()); + + params.fMotionVector = GetMotionVector(iPxHrPos, fHrUv); + params.f4KVelocity = Get4KVelocity(params.fMotionVector); + + ComputeReprojectedUVs(params); + + const FfxFloat32x2 fLumaInstabilityUv_HW = ClampUv(fHrUv, RenderSize(), MaxRenderSize()); + params.fLumaInstabilityFactor = SampleLumaInstability(fLumaInstabilityUv_HW); + + const FfxFloat32x2 fFarthestDepthUv = ClampUv(params.fLrUvJittered, RenderSize() / 2, GetFarthestDepthMip1ResourceDimensions()); + params.fFarthestDepthInMeters = SampleFarthestDepthMip1(fFarthestDepthUv); + params.bIsNewSample = (params.bIsExistingSample == false || 0 == FrameIndex()); + + const FfxFloat32x4 fDilatedReactiveMasks = SampleDilatedReactiveMasks(params.fLrUv_HwSampler); + params.fReactiveMask = ffxSaturate(fDilatedReactiveMasks[REACTIVE]); + params.fDisocclusion = ffxSaturate(fDilatedReactiveMasks[DISOCCLUSION]); + params.fShadingChange = ffxSaturate(fDilatedReactiveMasks[SHADING_CHANGE]); + params.fAccumulation = ffxSaturate(fDilatedReactiveMasks[ACCUMULAION]); + params.fAccumulation *= FfxFloat32(round(params.fAccumulation * 100.0f) > 1.0f); + + // Init variable data + data.fUpsampledColor = FfxFloat32x3(0.0f, 0.0f, 0.0f); + data.fHistoryColor = FfxFloat32x3(0.0f, 0.0f, 0.0f); + data.fHistoryWeight = 1.0f; + data.fUpsampledWeight = 0.0f; + data.fLock = 0.0f; + data.fLockContributionThisFrame = 0.0f; +} + +void Accumulate(FfxInt32x2 iPxHrPos) +{ + AccumulationPassCommonParams params; + AccumulationPassData data; + InitPassData(iPxHrPos, params, data); + + if (params.bIsExistingSample && !params.bIsNewSample) { + ReprojectHistoryColor(params, data); + } + + UpdateLockStatus(params, data); + + ComputeBaseAccumulationWeight(params, data); + + ComputeUpsampledColorAndWeight(params, data); + + RectifyHistory(params, data); + + Accumulate(params, data); + + data.fHistoryColor /= Exposure(); + + data.fHistoryColor = ffxMax(data.fHistoryColor, FfxFloat32x3(0.0f, 0.0f, 0.0f)); + + StoreInternalColorAndWeight(iPxHrPos, FfxFloat32x4(data.fHistoryColor, data.fLock)); + + // Output final color when RCAS is disabled +#if FFX_FSR3UPSCALER_OPTION_APPLY_SHARPENING == 0 + StoreUpscaledOutput(iPxHrPos, data.fHistoryColor); +#endif + StoreNewLocks(iPxHrPos, 0); +} diff --git a/thirdparty/amd-ffx/gpu/fsr3upscaler/ffx_fsr3upscaler_callbacks_glsl.h b/thirdparty/amd-ffx/gpu/fsr3upscaler/ffx_fsr3upscaler_callbacks_glsl.h new file mode 100644 index 000000000000..a0014ddc8b02 --- /dev/null +++ b/thirdparty/amd-ffx/gpu/fsr3upscaler/ffx_fsr3upscaler_callbacks_glsl.h @@ -0,0 +1,936 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "ffx_fsr3upscaler_resources.h" + +#if defined(FFX_GPU) +#include "../ffx_core.h" +#endif // #if defined(FFX_GPU) + +#if defined(FFX_GPU) +#ifndef FFX_PREFER_WAVE64 +#define FFX_PREFER_WAVE64 +#endif // FFX_PREFER_WAVE64 + +#if defined(FSR3UPSCALER_BIND_CB_FSR3UPSCALER) + layout (set = 0, binding = FSR3UPSCALER_BIND_CB_FSR3UPSCALER, std140) uniform cbFSR3UPSCALER_t + { + FfxInt32x2 iRenderSize; + FfxInt32x2 iPreviousFrameRenderSize; + + FfxInt32x2 iUpscaleSize; + FfxInt32x2 iPreviousFrameUpscaleSize; + + FfxInt32x2 iMaxRenderSize; + FfxInt32x2 iMaxUpscaleSize; + + FfxFloat32x4 fDeviceToViewDepth; + + FfxFloat32x2 fJitter; + FfxFloat32x2 fPreviousFrameJitter; + + FfxFloat32x2 fMotionVectorScale; + FfxFloat32x2 fDownscaleFactor; + + FfxFloat32x2 fMotionVectorJitterCancellation; + FfxFloat32 fTanHalfFOV; + FfxFloat32 fJitterSequenceLength; + + FfxFloat32 fDeltaTime; + FfxFloat32 fDeltaPreExposure; + FfxFloat32 fViewSpaceToMetersFactor; + FfxFloat32 fFrameIndex; + + FfxFloat32 fVelocityFactor; + FfxFloat32 fReactivenessScale; + FfxFloat32 fShadingChangeScale; + FfxFloat32 fAccumulationAddedPerFrame; + FfxFloat32 fMinDisocclusionAccumulation; + + // GODOT BEGINS + // Do not change this to an array, as arrays have different alignment requirements + FfxFloat32 fPad1; + FfxFloat32 fPad2; + FfxFloat32 fPad3; + mat4 mReprojectionMatrix; + // GODOT ENDS + } cbFSR3Upscaler; + + +FfxInt32x2 RenderSize() +{ + return cbFSR3Upscaler.iRenderSize; +} + +FfxInt32x2 PreviousFrameRenderSize() +{ + return cbFSR3Upscaler.iPreviousFrameRenderSize; +} + +FfxInt32x2 MaxRenderSize() +{ + return cbFSR3Upscaler.iMaxRenderSize; +} + +FfxInt32x2 UpscaleSize() +{ + return cbFSR3Upscaler.iUpscaleSize; +} + +FfxInt32x2 PreviousFrameUpscaleSize() +{ + return cbFSR3Upscaler.iPreviousFrameUpscaleSize; +} + +FfxInt32x2 MaxUpscaleSize() +{ + return cbFSR3Upscaler.iMaxUpscaleSize; +} + +FfxFloat32x2 Jitter() +{ + return cbFSR3Upscaler.fJitter; +} + +FfxFloat32x2 PreviousFrameJitter() +{ + return cbFSR3Upscaler.fPreviousFrameJitter; +} + +FfxFloat32x4 DeviceToViewSpaceTransformFactors() +{ + return cbFSR3Upscaler.fDeviceToViewDepth; +} + +FfxFloat32x2 MotionVectorScale() +{ + return cbFSR3Upscaler.fMotionVectorScale; +} + +FfxFloat32x2 DownscaleFactor() +{ + return cbFSR3Upscaler.fDownscaleFactor; +} + +FfxFloat32x2 MotionVectorJitterCancellation() +{ + return cbFSR3Upscaler.fMotionVectorJitterCancellation; +} + +FfxFloat32 TanHalfFoV() +{ + return cbFSR3Upscaler.fTanHalfFOV; +} + +FfxFloat32 JitterSequenceLength() +{ + return cbFSR3Upscaler.fJitterSequenceLength; +} + +FfxFloat32 DeltaTime() +{ + return cbFSR3Upscaler.fDeltaTime; +} + +FfxFloat32 DeltaPreExposure() +{ + return cbFSR3Upscaler.fDeltaPreExposure; +} + +FfxFloat32 ViewSpaceToMetersFactor() +{ + return cbFSR3Upscaler.fViewSpaceToMetersFactor; +} + +FfxFloat32 FrameIndex() +{ + return cbFSR3Upscaler.fFrameIndex; +} + +FfxFloat32 VelocityFactor() +{ + return cbFSR3Upscaler.fVelocityFactor; +} + +FfxFloat32 AccumulationAddedPerFrame() +{ + return cbFSR3Upscaler.fAccumulationAddedPerFrame; +} + +FfxFloat32 MinDisocclusionAccumulation() +{ + return cbFSR3Upscaler.fMinDisocclusionAccumulation; +} + +#endif // #if defined(FSR3UPSCALER_BIND_CB_FSR3UPSCALER) + + +#if defined(FSR3UPSCALER_BIND_CB_AUTOREACTIVE) +layout(set = 0, binding = FSR3UPSCALER_BIND_CB_AUTOREACTIVE, std140) uniform cbGenerateReactive_t +{ + FfxFloat32 fTcThreshold; // 0.1 is a good starting value, lower will result in more TC pixels + FfxFloat32 fTcScale; + FfxFloat32 fReactiveScale; + FfxFloat32 fReactiveMax; +} cbGenerateReactive; + +FfxFloat32 TcThreshold() +{ + return cbGenerateReactive.fTcThreshold; +} + +FfxFloat32 TcScale() +{ + return cbGenerateReactive.fTcScale; +} + +FfxFloat32 ReactiveScale() +{ + return cbGenerateReactive.fReactiveScale; +} + +FfxFloat32 ReactiveMax() +{ + return cbGenerateReactive.fReactiveMax; +} +#endif // #if defined(FSR3UPSCALER_BIND_CB_AUTOREACTIVE) + +#if defined(FSR3UPSCALER_BIND_CB_RCAS) +layout(set = 0, binding = FSR3UPSCALER_BIND_CB_RCAS, std140) uniform cbRCAS_t +{ + FfxUInt32x4 rcasConfig; +} cbRCAS; + +FfxUInt32x4 RCASConfig() +{ + return cbRCAS.rcasConfig; +} +#endif // #if defined(FSR3UPSCALER_BIND_CB_RCAS) + + +#if defined(FSR3UPSCALER_BIND_CB_REACTIVE) +layout(set = 0, binding = FSR3UPSCALER_BIND_CB_REACTIVE, std140) uniform cbGenerateReactive_t +{ + FfxFloat32 gen_reactive_scale; + FfxFloat32 gen_reactive_threshold; + FfxFloat32 gen_reactive_binaryValue; + FfxUInt32 gen_reactive_flags; +} cbGenerateReactive; + +FfxFloat32 GenReactiveScale() +{ + return cbGenerateReactive.gen_reactive_scale; +} + +FfxFloat32 GenReactiveThreshold() +{ + return cbGenerateReactive.gen_reactive_threshold; +} + +FfxFloat32 GenReactiveBinaryValue() +{ + return cbGenerateReactive.gen_reactive_binaryValue; +} + +FfxUInt32 GenReactiveFlags() +{ + return cbGenerateReactive.gen_reactive_flags; +} +#endif // #if defined(FSR3UPSCALER_BIND_CB_REACTIVE) + + +#if defined(FSR3UPSCALER_BIND_CB_SPD) +layout(set = 0, binding = FSR3UPSCALER_BIND_CB_SPD, std140) uniform cbSPD_t +{ + FfxUInt32 mips; + FfxUInt32 numWorkGroups; + FfxUInt32x2 workGroupOffset; + FfxUInt32x2 renderSize; +} cbSPD; + +FfxUInt32 MipCount() +{ + return cbSPD.mips; +} + +FfxUInt32 NumWorkGroups() +{ + return cbSPD.numWorkGroups; +} + +FfxUInt32x2 WorkGroupOffset() +{ + return cbSPD.workGroupOffset; +} + +FfxUInt32x2 SPD_RenderSize() +{ + return cbSPD.renderSize; +} +#endif // #if defined(FSR3UPSCALER_BIND_CB_SPD) + +// GODOT BEGINS +// Godot DX12 backend doesn't support binding numbers larger than 1000, so we have to remap them. +layout (set = 0, binding = 100 /*1000*/) uniform sampler s_PointClamp; +layout (set = 0, binding = 101 /*1001*/) uniform sampler s_LinearClamp; +// GODOT ENDS + +#if defined(FSR3UPSCALER_BIND_SRV_SPD_MIPS) +layout (set = 0, binding = FSR3UPSCALER_BIND_SRV_SPD_MIPS) uniform texture2D r_spd_mips; + +FfxInt32x2 GetSPDMipDimensions(FfxUInt32 uMipLevel) +{ + return textureSize(r_spd_mips, int(uMipLevel)).xy; +} + +FfxFloat32x2 SampleSPDMipLevel(FfxFloat32x2 fUV, FfxUInt32 mipLevel) +{ + return textureLod(sampler2D(r_spd_mips, s_LinearClamp), fUV, float(mipLevel)).rg; +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_INPUT_DEPTH) +layout (set = 0, binding = FSR3UPSCALER_BIND_SRV_INPUT_DEPTH) uniform texture2D r_input_depth; + +FfxFloat32 LoadInputDepth(FfxInt32x2 iPxPos) +{ + return texelFetch(r_input_depth, iPxPos, 0).r; +} + +FfxFloat32 SampleInputDepth(FfxFloat32x2 fUV) +{ + return textureLod(sampler2D(r_input_depth, s_LinearClamp), fUV, 0.0).x; +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_REACTIVE_MASK) +layout (set = 0, binding = FSR3UPSCALER_BIND_SRV_REACTIVE_MASK) uniform texture2D r_reactive_mask; + +FfxFloat32 LoadReactiveMask(FfxInt32x2 iPxPos) +{ + // GODOT BEGINS +#if FFX_FSR3UPSCALER_OPTION_GODOT_REACTIVE_MASK_CLAMP + return min(texelFetch(r_reactive_mask, FfxInt32x2(iPxPos), 0).r * cbFSR3Upscaler.fReactivenessScale, 0.9f); +#else + return texelFetch(r_reactive_mask, FfxInt32x2(iPxPos), 0).r * cbFSR3Upscaler.fReactivenessScale; +#endif + // GODOT ENDS +} + +FfxInt32x2 GetReactiveMaskResourceDimensions() +{ + return textureSize(r_reactive_mask, 0).xy; +} + +FfxFloat32 SampleReactiveMask(FfxFloat32x2 fUV) +{ + return textureLod(sampler2D(r_reactive_mask, s_LinearClamp), fUV, 0.0).x * cbFSR3Upscaler.fReactivenessScale; +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK) +layout (set = 0, binding = FSR3UPSCALER_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK) uniform texture2D r_transparency_and_composition_mask; + +FfxFloat32 LoadTransparencyAndCompositionMask(FfxUInt32x2 iPxPos) +{ + return texelFetch(r_transparency_and_composition_mask, FfxInt32x2(iPxPos), 0).r; +} + +FfxInt32x2 GetTransparencyAndCompositionMaskResourceDimensions() +{ + return textureSize(r_transparency_and_composition_mask, 0).xy; +} + +FfxFloat32 SampleTransparencyAndCompositionMask(FfxFloat32x2 fUV) +{ + return textureLod(sampler2D(r_transparency_and_composition_mask, s_LinearClamp), fUV, 0.0).x; +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_INPUT_COLOR) +layout (set = 0, binding = FSR3UPSCALER_BIND_SRV_INPUT_COLOR) uniform texture2D r_input_color_jittered; + +FfxFloat32x3 LoadInputColor(FfxInt32x2 iPxPos) +{ + return texelFetch(r_input_color_jittered, iPxPos, 0).rgb; +} + +FfxFloat32x3 SampleInputColor(FfxFloat32x2 fUV) +{ + return textureLod(sampler2D(r_input_color_jittered, s_LinearClamp), fUV, 0.0).rgb; +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_INPUT_MOTION_VECTORS) +layout (set = 0, binding = FSR3UPSCALER_BIND_SRV_INPUT_MOTION_VECTORS) uniform texture2D r_input_motion_vectors; + +FfxFloat32x2 LoadInputMotionVector(FfxInt32x2 iPxDilatedMotionVectorPos) +{ + FfxFloat32x2 fSrcMotionVector = texelFetch(r_input_motion_vectors, iPxDilatedMotionVectorPos, 0).xy; + + // GODOT BEGINS +#if FFX_FSR3UPSCALER_OPTION_GODOT_DERIVE_INVALID_MOTION_VECTORS + bool bInvalidMotionVector = all(lessThanEqual(fSrcMotionVector, vec2(-1.0f, -1.0f))); + if (bInvalidMotionVector) + { + FfxFloat32 fSrcDepth = LoadInputDepth(iPxDilatedMotionVectorPos); + FfxFloat32x2 fUv = (iPxDilatedMotionVectorPos + FfxFloat32(0.5)) / RenderSize(); + fSrcMotionVector = FFX_FSR_OPTION_GODOT_DERIVE_INVALID_MOTION_VECTORS_FUNCTION(fUv, fSrcDepth, cbFSR3Upscaler.mReprojectionMatrix); + } +#endif + // GODOT ENDS + + FfxFloat32x2 fUvMotionVector = fSrcMotionVector * MotionVectorScale(); + +#if FFX_FSR3UPSCALER_OPTION_JITTERED_MOTION_VECTORS + fUvMotionVector -= MotionVectorJitterCancellation(); +#endif + + return fUvMotionVector; +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_INTERNAL_UPSCALED) +layout (set = 0, binding = FSR3UPSCALER_BIND_SRV_INTERNAL_UPSCALED) uniform texture2D r_internal_upscaled_color; + +FfxFloat32x4 LoadHistory(FfxInt32x2 iPxHistory) +{ + return texelFetch(r_internal_upscaled_color, iPxHistory, 0); +} + +FfxFloat32x4 SampleHistory(FfxFloat32x2 fUV) +{ + return textureLod(sampler2D(r_internal_upscaled_color, s_LinearClamp), fUV, 0.0); +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_LUMA_HISTORY) +layout (set = 0, binding = FSR3UPSCALER_BIND_UAV_LUMA_HISTORY, rgba8) uniform image2D rw_luma_history; + +void StoreLumaHistory(FfxInt32x2 iPxPos, FfxFloat32x4 fLumaHistory) +{ + imageStore(rw_luma_history, iPxPos, fLumaHistory); +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_LUMA_HISTORY) +layout (set = 0, binding = FSR3UPSCALER_BIND_SRV_LUMA_HISTORY) uniform texture2D r_luma_history; + +FfxFloat32x4 LoadLumaHistory(FfxInt32x2 iPxPos) +{ + return texelFetch(r_luma_history, iPxPos, 0); +} + +FfxFloat32x4 SampleLumaHistory(FfxFloat32x2 fUV) +{ + return textureLod(sampler2D(r_luma_history, s_LinearClamp), fUV, 0.0); +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_RCAS_INPUT) +layout (set = 0, binding = FSR3UPSCALER_BIND_SRV_RCAS_INPUT) uniform texture2D r_rcas_input; + +FfxFloat32x4 LoadRCAS_Input(FfxInt32x2 iPxPos) +{ + return texelFetch(r_rcas_input, iPxPos, 0); +} + +FfxFloat32x3 SampleRCAS_Input(FfxFloat32x2 fUV) +{ + return textureLod(sampler2D(r_rcas_input, s_LinearClamp), fUV, 0.0).rgb; +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_INTERNAL_UPSCALED) +layout (set = 0, binding = FSR3UPSCALER_BIND_UAV_INTERNAL_UPSCALED, rgba16f) writeonly uniform image2D rw_internal_upscaled_color; + +void StoreReprojectedHistory(FfxInt32x2 iPxHistory, FfxFloat32x4 fHistory) +{ + imageStore(rw_internal_upscaled_color, iPxHistory, fHistory); +} + +void StoreInternalColorAndWeight(FfxInt32x2 iPxPos, FfxFloat32x4 fColorAndWeight) +{ + imageStore(rw_internal_upscaled_color, iPxPos, fColorAndWeight); +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_UPSCALED_OUTPUT) +layout (set = 0, binding = FSR3UPSCALER_BIND_UAV_UPSCALED_OUTPUT /* app controlled format */) writeonly uniform image2D rw_upscaled_output; + +void StoreUpscaledOutput(FfxInt32x2 iPxPos, FfxFloat32x3 fColor) +{ + imageStore(rw_upscaled_output, iPxPos, FfxFloat32x4(fColor, 1.0)); +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_ACCUMULATION) +layout(set = 0, binding = FSR3UPSCALER_BIND_SRV_ACCUMULATION) uniform texture2D r_accumulation; + +FfxFloat32 SampleAccumulation(FfxFloat32x2 fUV) +{ + return textureLod(sampler2D(r_accumulation, s_LinearClamp), fUV, 0.0).x; +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_ACCUMULATION) +layout (set = 0, binding = FSR3UPSCALER_BIND_UAV_ACCUMULATION, r8) uniform image2D rw_accumulation; + +void StoreAccumulation(FfxInt32x2 iPxPos, FfxFloat32 fAccumulation) +{ + imageStore(rw_accumulation, iPxPos, vec4(fAccumulation, 0.0, 0.0, 0.0)); +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_SHADING_CHANGE) +layout(set = 0, binding = FSR3UPSCALER_BIND_SRV_SHADING_CHANGE) uniform texture2D r_shading_change; + +FfxFloat32 LoadShadingChange(FfxInt32x2 iPxPos) +{ + return texelFetch(r_shading_change, iPxPos, 0).x * cbFSR3Upscaler.fShadingChangeScale; +} + +FfxFloat32 SampleShadingChange(FfxFloat32x2 fUV) +{ + return textureLod(sampler2D(r_shading_change, s_LinearClamp), fUV, 0.0).x * cbFSR3Upscaler.fShadingChangeScale; +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_SHADING_CHANGE) +layout (set = 0, binding = FSR3UPSCALER_BIND_UAV_SHADING_CHANGE, r8) uniform image2D rw_shading_change; + +void StoreShadingChange(FfxInt32x2 iPxPos, FfxFloat32 fShadingChange) +{ + imageStore(rw_shading_change, iPxPos, vec4(fShadingChange, 0.0, 0.0, 0.0)); +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_FARTHEST_DEPTH) +layout(set = 0, binding = FSR3UPSCALER_BIND_SRV_FARTHEST_DEPTH) uniform texture2D r_farthest_depth; + +FfxInt32x2 GetFarthestDepthResourceDimensions() +{ + return textureSize(r_farthest_depth, 0).xy; +} + +FfxFloat32 LoadFarthestDepth(FfxInt32x2 iPxPos) +{ + return texelFetch(r_farthest_depth, iPxPos, 0).x; +} + +FfxFloat32 SampleFarthestDepth(FfxFloat32x2 fUV) +{ + return textureLod(sampler2D(r_farthest_depth, s_LinearClamp), fUV, 0.0).x; +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_FARTHEST_DEPTH) +layout (set = 0, binding = FSR3UPSCALER_BIND_UAV_FARTHEST_DEPTH, r16f) uniform image2D rw_farthest_depth; + +void StoreFarthestDepth(FfxInt32x2 iPxPos, FfxFloat32 fDepth) +{ + imageStore(rw_farthest_depth, iPxPos, vec4(fDepth, 0.0, 0.0, 0.0)); +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_FARTHEST_DEPTH_MIP1) +layout(set = 0, binding = FSR3UPSCALER_BIND_SRV_FARTHEST_DEPTH_MIP1) uniform texture2D r_farthest_depth_mip1; + +FfxInt32x2 GetFarthestDepthMip1ResourceDimensions() +{ + return textureSize(r_farthest_depth_mip1, 0).xy; +} + +FfxFloat32 LoadFarthestDepthMip1(FfxInt32x2 iPxPos) +{ + return texelFetch(r_farthest_depth_mip1, iPxPos, 0).x; +} + +FfxFloat32 SampleFarthestDepthMip1(FfxFloat32x2 fUV) +{ + return textureLod(sampler2D(r_farthest_depth_mip1, s_LinearClamp), fUV, 0.0).x; +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_FARTHEST_DEPTH_MIP1) +layout (set = 0, binding = FSR3UPSCALER_BIND_UAV_FARTHEST_DEPTH_MIP1, r16f) uniform image2D rw_farthest_depth_mip1; + +void StoreFarthestDepthMip1(FfxInt32x2 iPxPos, FfxFloat32 fDepth) +{ + imageStore(rw_farthest_depth_mip1, iPxPos, vec4(fDepth, 0.0, 0.0, 0.0)); +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_CURRENT_LUMA) +layout(set = 0, binding = FSR3UPSCALER_BIND_SRV_CURRENT_LUMA) uniform texture2D r_current_luma; + +FfxFloat32 LoadCurrentLuma(FfxInt32x2 iPxPos) +{ + return texelFetch(r_current_luma, iPxPos, 0).r; +} + +FfxFloat32 SampleCurrentLuma(FfxFloat32x2 uv) +{ + return textureLod(sampler2D(r_current_luma, s_LinearClamp), uv, 0.0).r; +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_CURRENT_LUMA) +layout (set = 0, binding = FSR3UPSCALER_BIND_UAV_CURRENT_LUMA, r16f) uniform image2D rw_current_luma; + +void StoreCurrentLuma(FfxInt32x2 iPxPos, FfxFloat32 fLuma) +{ + imageStore(rw_current_luma, iPxPos, vec4(fLuma, 0.0, 0.0, 0.0)); +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_LUMA_INSTABILITY) +layout(set = 0, binding = FSR3UPSCALER_BIND_SRV_LUMA_INSTABILITY) uniform texture2D r_luma_instability; + +FfxFloat32 SampleLumaInstability(FfxFloat32x2 uv) +{ + return textureLod(sampler2D(r_luma_instability, s_LinearClamp), uv, 0.0).x; +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_LUMA_INSTABILITY) +layout (set = 0, binding = FSR3UPSCALER_BIND_UAV_LUMA_INSTABILITY, r16f) uniform image2D rw_luma_instability; + +void StoreLumaInstability(FfxInt32x2 iPxPos, FfxFloat32 fLumaInstability) +{ + imageStore(rw_luma_instability, iPxPos, vec4(fLumaInstability, 0.0, 0.0, 0.0)); +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_PREVIOUS_LUMA) +layout(set = 0, binding = FSR3UPSCALER_BIND_SRV_PREVIOUS_LUMA) uniform texture2D r_previous_luma; + +FfxFloat32 LoadPreviousLuma(FfxInt32x2 iPxPos) +{ + return texelFetch(r_previous_luma, iPxPos, 0).r; +} + +FfxFloat32 SamplePreviousLuma(FfxFloat32x2 uv) +{ + return textureLod(sampler2D(r_previous_luma, s_LinearClamp), uv, 0.0).r; +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_NEW_LOCKS) +layout(set = 0, binding = FSR3UPSCALER_BIND_SRV_NEW_LOCKS) uniform texture2D r_new_locks; + +FfxFloat32 LoadNewLocks(FfxInt32x2 iPxPos) +{ + return texelFetch(r_new_locks, iPxPos, 0).r; +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_NEW_LOCKS) +layout(set = 0, binding = FSR3UPSCALER_BIND_UAV_NEW_LOCKS, r8) uniform image2D rw_new_locks; + +FfxFloat32 LoadRwNewLocks(FfxInt32x2 iPxPos) +{ + return imageLoad(rw_new_locks, iPxPos).r; +} + +void StoreNewLocks(FfxInt32x2 iPxPos, FfxFloat32 newLock) +{ + imageStore(rw_new_locks, iPxPos, vec4(newLock, 0, 0, 0)); +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH) +layout (set = 0, binding = FSR3UPSCALER_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH) uniform utexture2D r_reconstructed_previous_nearest_depth; + +FfxFloat32 LoadReconstructedPrevDepth(FfxInt32x2 iPxPos) +{ + return uintBitsToFloat(texelFetch(r_reconstructed_previous_nearest_depth, iPxPos, 0).r); +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH) +layout (set = 0, binding = FSR3UPSCALER_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH, r32ui) uniform uimage2D rw_reconstructed_previous_nearest_depth; + +void StoreReconstructedDepth(FfxInt32x2 iPxSample, FfxFloat32 fDepth) +{ + FfxUInt32 uDepth = floatBitsToUint(fDepth); + + #if FFX_FSR3UPSCALER_OPTION_INVERTED_DEPTH + imageAtomicMax(rw_reconstructed_previous_nearest_depth, iPxSample, uDepth); + #else + imageAtomicMin(rw_reconstructed_previous_nearest_depth, iPxSample, uDepth); // min for standard, max for inverted depth + #endif +} + +void SetReconstructedDepth(FfxInt32x2 iPxSample, FfxUInt32 uValue) +{ + imageStore(rw_reconstructed_previous_nearest_depth, iPxSample, uvec4(uValue, 0, 0, 0)); +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_DILATED_DEPTH) +layout (set = 0, binding = FSR3UPSCALER_BIND_UAV_DILATED_DEPTH, r32f) writeonly uniform image2D rw_dilated_depth; + +void StoreDilatedDepth(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32 fDepth) +{ + imageStore(rw_dilated_depth, iPxPos, vec4(fDepth, 0.0, 0.0, 0.0)); +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_DILATED_MOTION_VECTORS) +layout (set = 0, binding = FSR3UPSCALER_BIND_UAV_DILATED_MOTION_VECTORS, rg16f) writeonly uniform image2D rw_dilated_motion_vectors; + +void StoreDilatedMotionVector(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x2 fMotionVector) +{ + imageStore(rw_dilated_motion_vectors, iPxPos, vec4(fMotionVector, 0.0, 0.0)); +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_DILATED_MOTION_VECTORS) +layout (set = 0, binding = FSR3UPSCALER_BIND_SRV_DILATED_MOTION_VECTORS) uniform texture2D r_dilated_motion_vectors; + +FfxFloat32x2 LoadDilatedMotionVector(FfxInt32x2 iPxInput) +{ + return texelFetch(r_dilated_motion_vectors, iPxInput, 0).xy; +} + +FfxFloat32x2 SampleDilatedMotionVector(FfxFloat32x2 fUV) +{ + return textureLod(sampler2D(r_dilated_motion_vectors, s_LinearClamp), fUV, 0.0).xy; +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_DILATED_DEPTH) +layout (set = 0, binding = FSR3UPSCALER_BIND_SRV_DILATED_DEPTH) uniform texture2D r_dilated_depth; + +FfxFloat32 LoadDilatedDepth(FfxInt32x2 iPxInput) +{ + return texelFetch(r_dilated_depth, iPxInput, 0).r; +} + +FfxFloat32 SampleDilatedDepth(FfxFloat32x2 fUV) +{ + return textureLod(sampler2D(r_dilated_depth, s_LinearClamp), fUV, 0.0).r; +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_INPUT_EXPOSURE) +layout (set = 0, binding = FSR3UPSCALER_BIND_SRV_INPUT_EXPOSURE) uniform texture2D r_input_exposure; + +FfxFloat32 Exposure() +{ + FfxFloat32 exposure = texelFetch(r_input_exposure, FfxInt32x2(0, 0), 0).x; + + if (exposure == 0.0) { + exposure = 1.0; + } + + return exposure; +} +#endif + +// BEGIN: FSR3UPSCALER_BIND_SRV_LANCZOS_LUT +#if defined(FSR3UPSCALER_BIND_SRV_LANCZOS_LUT) +layout (set = 0, binding = FSR3UPSCALER_BIND_SRV_LANCZOS_LUT) uniform texture2D r_lanczos_lut; +#endif + +FfxFloat32 SampleLanczos2Weight(FfxFloat32 x) +{ +#if defined(FSR3UPSCALER_BIND_SRV_LANCZOS_LUT) + return textureLod(sampler2D(r_lanczos_lut, s_LinearClamp), FfxFloat32x2(x / 2.0, 0.5), 0.0).x; +#else + return 0.f; +#endif +} +// END: FSR3UPSCALER_BIND_SRV_LANCZOS_LUT + +#if defined(FSR3UPSCALER_BIND_SRV_DILATED_REACTIVE_MASKS) +layout (set = 0, binding = FSR3UPSCALER_BIND_SRV_DILATED_REACTIVE_MASKS) uniform texture2D r_dilated_reactive_masks; + +FfxFloat32x4 SampleDilatedReactiveMasks(FfxFloat32x2 fUV) +{ + return textureLod(sampler2D(r_dilated_reactive_masks, s_LinearClamp), fUV, 0.0); +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_DILATED_REACTIVE_MASKS) +layout (set = 0, binding = FSR3UPSCALER_BIND_UAV_DILATED_REACTIVE_MASKS, rgba8) writeonly uniform image2D rw_dilated_reactive_masks; + +void StoreDilatedReactiveMasks(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x4 fDilatedReactiveMasks) +{ + imageStore(rw_dilated_reactive_masks, iPxPos, fDilatedReactiveMasks); +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_INPUT_OPAQUE_ONLY) +layout (set = 0, binding = FSR3UPSCALER_BIND_SRV_INPUT_OPAQUE_ONLY) uniform texture2D r_input_opaque_only; + +FfxFloat32x3 LoadOpaqueOnly(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos) +{ + return texelFetch(r_input_opaque_only, iPxPos, 0).xyz; +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_PREV_PRE_ALPHA_COLOR) +layout(set = 0, binding = FSR3UPSCALER_BIND_SRV_PREV_PRE_ALPHA_COLOR) uniform texture2D r_input_prev_color_pre_alpha; + +FfxFloat32x3 LoadPrevPreAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos) +{ + return texelFetch(r_input_prev_color_pre_alpha, iPxPos, 0).xyz; +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_PREV_POST_ALPHA_COLOR) +layout(set = 0, binding = FSR3UPSCALER_BIND_SRV_PREV_POST_ALPHA_COLOR) uniform texture2D r_input_prev_color_post_alpha; + +FfxFloat32x3 LoadPrevPostAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos) +{ + return texelFetch(r_input_prev_color_post_alpha, iPxPos, 0).xyz; +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_AUTOREACTIVE) && \ + defined(FSR3UPSCALER_BIND_UAV_AUTOCOMPOSITION) + +layout(set = 0, binding = FSR3UPSCALER_BIND_UAV_AUTOREACTIVE, r32f) uniform image2D rw_output_autoreactive; + +// GODOT BEGINS +// The binding is never used, so comment it out to avoid compilation errors. +// layout(set = 0, binding = FSR3UPSCALER_BIND_UAV_AUTOCOMPOSITION, r32f) uniform image2D rw_output_autocomposition; +// +// void StoreAutoReactive(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F2 fReactive) +// { +// imageStore(rw_output_autoreactive, iPxPos, FfxFloat32x4(FfxFloat32(fReactive.x), 0.0, 0.0, 0.0)); +// +// imageStore(rw_output_autocomposition, iPxPos, FfxFloat32x4(FfxFloat32(fReactive.y), 0.0, 0.0, 0.0)); +// } +// GODOT ENDS +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_PREV_PRE_ALPHA_COLOR) +layout(set = 0, binding = FSR3UPSCALER_BIND_UAV_PREV_PRE_ALPHA_COLOR, r11f_g11f_b10f) uniform image2D rw_output_prev_color_pre_alpha; + +void StorePrevPreAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F3 color) +{ + imageStore(rw_output_prev_color_pre_alpha, iPxPos, FfxFloat32x4(color, 0.0)); +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_PREV_POST_ALPHA_COLOR) +layout(set = 0, binding = FSR3UPSCALER_BIND_UAV_PREV_POST_ALPHA_COLOR, r11f_g11f_b10f) uniform image2D rw_output_prev_color_post_alpha; + +void StorePrevPostAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F3 color) +{ + imageStore(rw_output_prev_color_post_alpha, iPxPos, FfxFloat32x4(color, 0.0)); +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_FRAME_INFO) +layout (set = 0, binding = FSR3UPSCALER_BIND_UAV_FRAME_INFO, rgba32f) uniform image2D rw_frame_info; + +FfxFloat32x4 LoadFrameInfo() +{ + return imageLoad(rw_frame_info, ivec2(0, 0)); +} + +void StoreFrameInfo(FfxFloat32x4 fInfo) +{ + imageStore(rw_frame_info, ivec2(0, 0), fInfo); +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_FRAME_INFO) +layout(set = 0, binding = FSR3UPSCALER_BIND_SRV_FRAME_INFO) uniform texture2D r_frame_info; + +FfxFloat32x4 FrameInfo() +{ + return texelFetch(r_frame_info, ivec2(0, 0), 0); +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_0) && \ + defined(FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_1) && \ + defined(FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_2) && \ + defined(FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_3) && \ + defined(FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_4) && \ + defined(FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_5) + +layout (set = 0, binding = FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_0, rg16f) uniform image2D rw_spd_mip0; +layout (set = 0, binding = FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_1, rg16f) uniform image2D rw_spd_mip1; +layout (set = 0, binding = FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_2, rg16f) uniform image2D rw_spd_mip2; +layout (set = 0, binding = FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_3, rg16f) uniform image2D rw_spd_mip3; +layout (set = 0, binding = FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_4, rg16f) uniform image2D rw_spd_mip4; +layout (set = 0, binding = FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_5, rg16f) coherent uniform image2D rw_spd_mip5; + +FfxFloat32x2 RWLoadPyramid(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxUInt32 index) +{ +#define LOAD(idx) \ + if (index == idx) \ + { \ + return imageLoad(rw_spd_mip##idx, iPxPos).xy; \ + } + LOAD(0); + LOAD(1); + LOAD(2); + LOAD(3); + LOAD(4); + LOAD(5); + + return FfxFloat32x2(0.0, 0.0); + +#undef LOAD +} + +void StorePyramid(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x2 outValue, FFX_PARAMETER_IN FfxUInt32 index) +{ +#define STORE(idx) \ + if (index == idx) \ + { \ + imageStore(rw_spd_mip##idx, iPxPos, vec4(outValue, 0.0, 0.0)); \ + } + + STORE(0); + STORE(1); + STORE(2); + STORE(3); + STORE(4); + STORE(5); + +#undef STORE +} +#endif + +#if defined FSR3UPSCALER_BIND_UAV_SPD_GLOBAL_ATOMIC +layout (set = 0, binding = FSR3UPSCALER_BIND_UAV_SPD_GLOBAL_ATOMIC, r32ui) coherent uniform uimage2D rw_spd_global_atomic; + +void SPD_IncreaseAtomicCounter(inout FfxUInt32 spdCounter) +{ + spdCounter = imageAtomicAdd(rw_spd_global_atomic, ivec2(0, 0), 1); +} + +void SPD_ResetAtomicCounter() +{ + imageStore(rw_spd_global_atomic, ivec2(0, 0), uvec4(0)); +} +#endif + +#endif // #if defined(FFX_GPU) diff --git a/thirdparty/amd-ffx/gpu/fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h b/thirdparty/amd-ffx/gpu/fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h new file mode 100644 index 000000000000..ab3969ed5f76 --- /dev/null +++ b/thirdparty/amd-ffx/gpu/fsr3upscaler/ffx_fsr3upscaler_callbacks_hlsl.h @@ -0,0 +1,990 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "ffx_fsr3upscaler_resources.h" + +#if defined(FFX_GPU) +#ifdef __hlsl_dx_compiler +#pragma dxc diagnostic push +#pragma dxc diagnostic ignored "-Wambig-lit-shift" +#endif //__hlsl_dx_compiler +#include "../ffx_core.h" +#ifdef __hlsl_dx_compiler +#pragma dxc diagnostic pop +#endif //__hlsl_dx_compiler +#endif // #if defined(FFX_GPU) + +#if defined(FFX_GPU) +#ifndef FFX_PREFER_WAVE64 +#define FFX_PREFER_WAVE64 +#endif // FFX_PREFER_WAVE64 + +#pragma warning(disable: 3205) // conversion from larger type to smaller + +#define DECLARE_SRV_REGISTER(regIndex) t##regIndex +#define DECLARE_UAV_REGISTER(regIndex) u##regIndex +#define DECLARE_CB_REGISTER(regIndex) b##regIndex +#define FFX_FSR3UPSCALER_DECLARE_SRV(regIndex) register(DECLARE_SRV_REGISTER(regIndex)) +#define FFX_FSR3UPSCALER_DECLARE_UAV(regIndex) register(DECLARE_UAV_REGISTER(regIndex)) +#define FFX_FSR3UPSCALER_DECLARE_CB(regIndex) register(DECLARE_CB_REGISTER(regIndex)) + +#if defined(FSR3UPSCALER_BIND_CB_FSR3UPSCALER) +cbuffer cbFSR3Upscaler : FFX_FSR3UPSCALER_DECLARE_CB(FSR3UPSCALER_BIND_CB_FSR3UPSCALER) +{ + FfxInt32x2 iRenderSize; + FfxInt32x2 iPreviousFrameRenderSize; + + FfxInt32x2 iUpscaleSize; + FfxInt32x2 iPreviousFrameUpscaleSize; + + FfxInt32x2 iMaxRenderSize; + FfxInt32x2 iMaxUpscaleSize; + + FfxFloat32x4 fDeviceToViewDepth; + + FfxFloat32x2 fJitter; + FfxFloat32x2 fPreviousFrameJitter; + + FfxFloat32x2 fMotionVectorScale; + FfxFloat32x2 fDownscaleFactor; + + FfxFloat32x2 fMotionVectorJitterCancellation; + FfxFloat32 fTanHalfFOV; + FfxFloat32 fJitterSequenceLength; + + FfxFloat32 fDeltaTime; + FfxFloat32 fDeltaPreExposure; + FfxFloat32 fViewSpaceToMetersFactor; + FfxFloat32 fFrameIndex; + + FfxFloat32 fVelocityFactor; + FfxFloat32 fReactivenessScale; + FfxFloat32 fShadingChangeScale; + FfxFloat32 fAccumulationAddedPerFrame; + FfxFloat32 fMinDisocclusionAccumulation; +}; + +#define FFX_FSR3UPSCALER_CONSTANT_BUFFER_1_SIZE (sizeof(cbFSR3Upscaler) / 4) // Number of 32-bit values. This must be kept in sync with the cbFSR3Upscaler size. + +/* Define getter functions in the order they are defined in the CB! */ +FfxInt32x2 RenderSize() +{ + return iRenderSize; +} + +FfxInt32x2 PreviousFrameRenderSize() +{ + return iPreviousFrameRenderSize; +} + +FfxInt32x2 MaxRenderSize() +{ + return iMaxRenderSize; +} + +FfxInt32x2 UpscaleSize() +{ + return iUpscaleSize; +} + +FfxInt32x2 PreviousFrameUpscaleSize() +{ + return iPreviousFrameUpscaleSize; +} + +FfxInt32x2 MaxUpscaleSize() +{ + return iMaxUpscaleSize; +} + +FfxFloat32x2 Jitter() +{ + return fJitter; +} + +FfxFloat32x2 PreviousFrameJitter() +{ + return fPreviousFrameJitter; +} + +FfxFloat32x4 DeviceToViewSpaceTransformFactors() +{ + return fDeviceToViewDepth; +} + +FfxFloat32x2 MotionVectorScale() +{ + return fMotionVectorScale; +} + +FfxFloat32x2 DownscaleFactor() +{ + return fDownscaleFactor; +} + +FfxFloat32x2 MotionVectorJitterCancellation() +{ + return fMotionVectorJitterCancellation; +} + +FfxFloat32 TanHalfFoV() +{ + return fTanHalfFOV; +} + +FfxFloat32 JitterSequenceLength() +{ + return fJitterSequenceLength; +} + +FfxFloat32 DeltaTime() +{ + return fDeltaTime; +} + +FfxFloat32 DeltaPreExposure() +{ + return fDeltaPreExposure; +} + +FfxFloat32 ViewSpaceToMetersFactor() +{ + return fViewSpaceToMetersFactor; +} + +FfxFloat32 FrameIndex() +{ + return fFrameIndex; +} + +FfxFloat32 VelocityFactor() +{ + return fVelocityFactor; +} + +FfxFloat32 AccumulationAddedPerFrame() +{ + return fAccumulationAddedPerFrame; +} + +FfxFloat32 MinDisocclusionAccumulation() +{ + return fMinDisocclusionAccumulation; +} + +#endif // #if defined(FSR3UPSCALER_BIND_CB_FSR3UPSCALER) + +#define FFX_FSR3UPSCALER_ROOTSIG_STRINGIFY(p) FFX_FSR3UPSCALER_ROOTSIG_STR(p) +#define FFX_FSR3UPSCALER_ROOTSIG_STR(p) #p +#define FFX_FSR3UPSCALER_ROOTSIG [RootSignature( "DescriptorTable(UAV(u0, numDescriptors = " FFX_FSR3UPSCALER_ROOTSIG_STRINGIFY(FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_COUNT) ")), " \ + "DescriptorTable(SRV(t0, numDescriptors = " FFX_FSR3UPSCALER_ROOTSIG_STRINGIFY(FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_COUNT) ")), " \ + "CBV(b0), " \ + "StaticSampler(s0, filter = FILTER_MIN_MAG_MIP_POINT, " \ + "addressU = TEXTURE_ADDRESS_CLAMP, " \ + "addressV = TEXTURE_ADDRESS_CLAMP, " \ + "addressW = TEXTURE_ADDRESS_CLAMP, " \ + "comparisonFunc = COMPARISON_NEVER, " \ + "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK), " \ + "StaticSampler(s1, filter = FILTER_MIN_MAG_MIP_LINEAR, " \ + "addressU = TEXTURE_ADDRESS_CLAMP, " \ + "addressV = TEXTURE_ADDRESS_CLAMP, " \ + "addressW = TEXTURE_ADDRESS_CLAMP, " \ + "comparisonFunc = COMPARISON_NEVER, " \ + "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK)" )] + +#define FFX_FSR3UPSCALER_CONSTANT_BUFFER_2_SIZE 6 // Number of 32-bit values. This must be kept in sync with max( cbRCAS , cbSPD) size. + +#define FFX_FSR3UPSCALER_CB2_ROOTSIG [RootSignature( "DescriptorTable(UAV(u0, numDescriptors = " FFX_FSR3UPSCALER_ROOTSIG_STRINGIFY(FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_COUNT) ")), " \ + "DescriptorTable(SRV(t0, numDescriptors = " FFX_FSR3UPSCALER_ROOTSIG_STRINGIFY(FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_COUNT) ")), " \ + "CBV(b0), " \ + "CBV(b1), " \ + "StaticSampler(s0, filter = FILTER_MIN_MAG_MIP_POINT, " \ + "addressU = TEXTURE_ADDRESS_CLAMP, " \ + "addressV = TEXTURE_ADDRESS_CLAMP, " \ + "addressW = TEXTURE_ADDRESS_CLAMP, " \ + "comparisonFunc = COMPARISON_NEVER, " \ + "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK), " \ + "StaticSampler(s1, filter = FILTER_MIN_MAG_MIP_LINEAR, " \ + "addressU = TEXTURE_ADDRESS_CLAMP, " \ + "addressV = TEXTURE_ADDRESS_CLAMP, " \ + "addressW = TEXTURE_ADDRESS_CLAMP, " \ + "comparisonFunc = COMPARISON_NEVER, " \ + "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK)" )] +#if defined(FFX_FSR3UPSCALER_EMBED_ROOTSIG) +#define FFX_FSR3UPSCALER_EMBED_ROOTSIG_CONTENT FFX_FSR3UPSCALER_ROOTSIG +#define FFX_FSR3UPSCALER_EMBED_CB2_ROOTSIG_CONTENT FFX_FSR3UPSCALER_CB2_ROOTSIG +#else +#define FFX_FSR3UPSCALER_EMBED_ROOTSIG_CONTENT +#define FFX_FSR3UPSCALER_EMBED_CB2_ROOTSIG_CONTENT +#endif // #if FFX_FSR3UPSCALER_EMBED_ROOTSIG + +#if defined(FSR3UPSCALER_BIND_CB_AUTOREACTIVE) +cbuffer cbGenerateReactive : FFX_FSR3UPSCALER_DECLARE_CB(FSR3UPSCALER_BIND_CB_AUTOREACTIVE) +{ + FfxFloat32 fTcThreshold; // 0.1 is a good starting value, lower will result in more TC pixels + FfxFloat32 fTcScale; + FfxFloat32 fReactiveScale; + FfxFloat32 fReactiveMax; +}; + +FfxFloat32 TcThreshold() +{ + return fTcThreshold; +} + +FfxFloat32 TcScale() +{ + return fTcScale; +} + +FfxFloat32 ReactiveScale() +{ + return fReactiveScale; +} + +FfxFloat32 ReactiveMax() +{ + return fReactiveMax; +} +#endif // #if defined(FSR3UPSCALER_BIND_CB_AUTOREACTIVE) + +#if defined(FSR3UPSCALER_BIND_CB_RCAS) +cbuffer cbRCAS : FFX_FSR3UPSCALER_DECLARE_CB(FSR3UPSCALER_BIND_CB_RCAS) +{ + FfxUInt32x4 rcasConfig; +}; + +FfxUInt32x4 RCASConfig() +{ + return rcasConfig; +} +#endif // #if defined(FSR3UPSCALER_BIND_CB_RCAS) + + +#if defined(FSR3UPSCALER_BIND_CB_REACTIVE) +cbuffer cbGenerateReactive : FFX_FSR3UPSCALER_DECLARE_CB(FSR3UPSCALER_BIND_CB_REACTIVE) +{ + FfxFloat32 gen_reactive_scale; + FfxFloat32 gen_reactive_threshold; + FfxFloat32 gen_reactive_binaryValue; + FfxUInt32 gen_reactive_flags; +}; + +FfxFloat32 GenReactiveScale() +{ + return gen_reactive_scale; +} + +FfxFloat32 GenReactiveThreshold() +{ + return gen_reactive_threshold; +} + +FfxFloat32 GenReactiveBinaryValue() +{ + return gen_reactive_binaryValue; +} + +FfxUInt32 GenReactiveFlags() +{ + return gen_reactive_flags; +} +#endif // #if defined(FSR3UPSCALER_BIND_CB_REACTIVE) + +#if defined(FSR3UPSCALER_BIND_CB_SPD) +cbuffer cbSPD : FFX_FSR3UPSCALER_DECLARE_CB(FSR3UPSCALER_BIND_CB_SPD) { + + FfxUInt32 mips; + FfxUInt32 numWorkGroups; + FfxUInt32x2 workGroupOffset; + FfxUInt32x2 renderSize; +}; + +FfxUInt32 MipCount() +{ + return mips; +} + +FfxUInt32 NumWorkGroups() +{ + return numWorkGroups; +} + +FfxUInt32x2 WorkGroupOffset() +{ + return workGroupOffset; +} + +FfxUInt32x2 SPD_RenderSize() +{ + return renderSize; +} +#endif // #if defined(FSR3UPSCALER_BIND_CB_SPD) + +SamplerState s_PointClamp : register(s0); +SamplerState s_LinearClamp : register(s1); + +#if defined(FSR3UPSCALER_BIND_SRV_SPD_MIPS) +Texture2D r_spd_mips : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_SPD_MIPS); + +FfxInt32x2 GetSPDMipDimensions(FfxUInt32 uMipLevel) +{ + FfxUInt32 uWidth; + FfxUInt32 uHeight; + FfxUInt32 uLevels; + r_spd_mips.GetDimensions(uMipLevel, uWidth, uHeight, uLevels); + + return FfxInt32x2(uWidth, uHeight); +} + +FfxFloat32x2 SampleSPDMipLevel(FfxFloat32x2 fUV, FfxUInt32 mipLevel) +{ + return r_spd_mips.SampleLevel(s_LinearClamp, fUV, mipLevel); +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_INPUT_DEPTH) +Texture2D r_input_depth : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_INPUT_DEPTH); + +FfxFloat32 LoadInputDepth(FfxUInt32x2 iPxPos) +{ + return r_input_depth[iPxPos]; +} + +FfxFloat32 SampleInputDepth(FfxFloat32x2 fUV) +{ + return r_input_depth.SampleLevel(s_LinearClamp, fUV, 0).x; +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_REACTIVE_MASK) +Texture2D r_reactive_mask : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_REACTIVE_MASK); + +FfxFloat32 LoadReactiveMask(FfxUInt32x2 iPxPos) +{ + return r_reactive_mask[iPxPos] * fReactivenessScale; +} + +FfxInt32x2 GetReactiveMaskResourceDimensions() +{ + FfxUInt32 uWidth; + FfxUInt32 uHeight; + r_reactive_mask.GetDimensions(uWidth, uHeight); + + return FfxInt32x2(uWidth, uHeight); +} + +FfxFloat32 SampleReactiveMask(FfxFloat32x2 fUV) +{ + return r_reactive_mask.SampleLevel(s_LinearClamp, fUV, 0).x * fReactivenessScale; +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK) +Texture2D r_transparency_and_composition_mask : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK); + +FfxFloat32 LoadTransparencyAndCompositionMask(FfxUInt32x2 iPxPos) +{ + return r_transparency_and_composition_mask[iPxPos]; +} + +FfxInt32x2 GetTransparencyAndCompositionMaskResourceDimensions() +{ + FfxUInt32 uWidth; + FfxUInt32 uHeight; + r_transparency_and_composition_mask.GetDimensions(uWidth, uHeight); + + return FfxInt32x2(uWidth, uHeight); +} + +FfxFloat32 SampleTransparencyAndCompositionMask(FfxFloat32x2 fUV) +{ + return r_transparency_and_composition_mask.SampleLevel(s_LinearClamp, fUV, 0).x; +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_INPUT_COLOR) +Texture2D r_input_color_jittered : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_INPUT_COLOR); + +FfxFloat32x3 LoadInputColor(FfxUInt32x2 iPxPos) +{ + return r_input_color_jittered[iPxPos].rgb; +} + +FfxFloat32x3 SampleInputColor(FfxFloat32x2 fUV) +{ + return r_input_color_jittered.SampleLevel(s_LinearClamp, fUV, 0).rgb; +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_INPUT_MOTION_VECTORS) +Texture2D r_input_motion_vectors : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_INPUT_MOTION_VECTORS); + +FfxFloat32x2 LoadInputMotionVector(FfxUInt32x2 iPxDilatedMotionVectorPos) +{ + FfxFloat32x2 fSrcMotionVector = r_input_motion_vectors[iPxDilatedMotionVectorPos].xy; + + FfxFloat32x2 fUvMotionVector = fSrcMotionVector * MotionVectorScale(); + +#if FFX_FSR3UPSCALER_OPTION_JITTERED_MOTION_VECTORS + fUvMotionVector -= MotionVectorJitterCancellation(); +#endif + + return fUvMotionVector; +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_INTERNAL_UPSCALED) +Texture2D r_internal_upscaled_color : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_INTERNAL_UPSCALED); + +FfxFloat32x4 LoadHistory(FfxUInt32x2 iPxHistory) +{ + return r_internal_upscaled_color[iPxHistory]; +} + +FfxFloat32x4 SampleHistory(FfxFloat32x2 fUV) +{ + return r_internal_upscaled_color.SampleLevel(s_LinearClamp, fUV, 0); +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_LUMA_HISTORY) +RWTexture2D rw_luma_history : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_LUMA_HISTORY); + +void StoreLumaHistory(FfxUInt32x2 iPxPos, FfxFloat32x4 fLumaHistory) +{ + rw_luma_history[iPxPos] = fLumaHistory; +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_LUMA_HISTORY) +Texture2D r_luma_history : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_LUMA_HISTORY); + +FfxFloat32x4 LoadLumaHistory(FfxInt32x2 iPxPos) +{ + return r_luma_history[iPxPos]; +} + +FfxFloat32x4 SampleLumaHistory(FfxFloat32x2 fUV) +{ + return r_luma_history.SampleLevel(s_LinearClamp, fUV, 0); +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_RCAS_INPUT) +Texture2D r_rcas_input : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_RCAS_INPUT); + +FfxFloat32x4 LoadRCAS_Input(FfxInt32x2 iPxPos) +{ + return r_rcas_input[iPxPos]; +} + +FfxFloat32x3 SampleRCAS_Input(FfxFloat32x2 fUV) +{ + return r_rcas_input.SampleLevel(s_LinearClamp, fUV, 0).rgb; +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_INTERNAL_UPSCALED) +RWTexture2D rw_internal_upscaled_color : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_INTERNAL_UPSCALED); + +void StoreReprojectedHistory(FfxUInt32x2 iPxHistory, FfxFloat32x4 fHistory) +{ + rw_internal_upscaled_color[iPxHistory] = fHistory; +} + +void StoreInternalColorAndWeight(FfxUInt32x2 iPxPos, FfxFloat32x4 fColorAndWeight) +{ + rw_internal_upscaled_color[iPxPos] = fColorAndWeight; +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_UPSCALED_OUTPUT) +RWTexture2D rw_upscaled_output : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_UPSCALED_OUTPUT); + +void StoreUpscaledOutput(FfxUInt32x2 iPxPos, FfxFloat32x3 fColor) +{ + rw_upscaled_output[iPxPos] = FfxFloat32x4(fColor, 1.f); +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_ACCUMULATION) +Texture2D r_accumulation : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_ACCUMULATION); + +FfxFloat32 SampleAccumulation(FfxFloat32x2 fUV) +{ + return r_accumulation.SampleLevel(s_LinearClamp, fUV, 0); +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_ACCUMULATION) +RWTexture2D rw_accumulation : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_ACCUMULATION); + +void StoreAccumulation(FfxUInt32x2 iPxPos, FfxFloat32 fAccumulation) +{ + rw_accumulation[iPxPos] = fAccumulation; +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_SHADING_CHANGE) +Texture2D r_shading_change : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_SHADING_CHANGE); + +FfxFloat32 LoadShadingChange(FfxUInt32x2 iPxPos) +{ + return r_shading_change[iPxPos] * fShadingChangeScale; +} + +FfxFloat32 SampleShadingChange(FfxFloat32x2 fUV) +{ + return r_shading_change.SampleLevel(s_LinearClamp, fUV, 0) * fShadingChangeScale; +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_SHADING_CHANGE) +RWTexture2D rw_shading_change : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_SHADING_CHANGE); + +void StoreShadingChange(FfxUInt32x2 iPxPos, FfxFloat32 fShadingChange) +{ + rw_shading_change[iPxPos] = fShadingChange; +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_FARTHEST_DEPTH) +Texture2D r_farthest_depth : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_FARTHEST_DEPTH); + +FfxInt32x2 GetFarthestDepthResourceDimensions() +{ + FfxUInt32 uWidth; + FfxUInt32 uHeight; + r_farthest_depth.GetDimensions(uWidth, uHeight); + + return FfxInt32x2(uWidth, uHeight); +} + +FfxFloat32 LoadFarthestDepth(FfxUInt32x2 iPxPos) +{ + return r_farthest_depth[iPxPos]; +} + +FfxFloat32 SampleFarthestDepth(FfxFloat32x2 fUV) +{ + return r_farthest_depth.SampleLevel(s_LinearClamp, fUV, 0); +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_FARTHEST_DEPTH) +RWTexture2D rw_farthest_depth : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_FARTHEST_DEPTH); + +void StoreFarthestDepth(FfxUInt32x2 iPxPos, FfxFloat32 fDepth) +{ + rw_farthest_depth[iPxPos] = fDepth; +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_FARTHEST_DEPTH_MIP1) +Texture2D r_farthest_depth_mip1 : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_FARTHEST_DEPTH_MIP1); + +FfxInt32x2 GetFarthestDepthMip1ResourceDimensions() +{ + FfxUInt32 uWidth; + FfxUInt32 uHeight; + r_farthest_depth_mip1.GetDimensions(uWidth, uHeight); + + return FfxInt32x2(uWidth, uHeight); +} + +FfxFloat32 LoadFarthestDepthMip1(FfxUInt32x2 iPxPos) +{ + return r_farthest_depth_mip1[iPxPos]; +} + +FfxFloat32 SampleFarthestDepthMip1(FfxFloat32x2 fUV) +{ + return r_farthest_depth_mip1.SampleLevel(s_LinearClamp, fUV, 0); +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_FARTHEST_DEPTH_MIP1) +RWTexture2D rw_farthest_depth_mip1 : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_FARTHEST_DEPTH_MIP1); + +void StoreFarthestDepthMip1(FfxUInt32x2 iPxPos, FfxFloat32 fDepth) +{ + rw_farthest_depth_mip1[iPxPos] = fDepth; +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_CURRENT_LUMA) +Texture2D r_current_luma : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_CURRENT_LUMA); + +FfxFloat32 LoadCurrentLuma(FfxUInt32x2 iPxPos) +{ + return r_current_luma[iPxPos]; +} + +FfxFloat32 SampleCurrentLuma(FfxFloat32x2 uv) +{ + return r_current_luma.SampleLevel(s_LinearClamp, uv, 0); +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_CURRENT_LUMA) +RWTexture2D rw_current_luma : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_CURRENT_LUMA); + +void StoreCurrentLuma(FfxUInt32x2 iPxPos, FfxFloat32 fLuma) +{ + rw_current_luma[iPxPos] = fLuma; +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_LUMA_INSTABILITY) +Texture2D r_luma_instability : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_LUMA_INSTABILITY); + +FfxFloat32 SampleLumaInstability(FfxFloat32x2 uv) +{ + return r_luma_instability.SampleLevel(s_LinearClamp, uv, 0); +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_LUMA_INSTABILITY) +RWTexture2D rw_luma_instability : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_LUMA_INSTABILITY); + +void StoreLumaInstability(FfxUInt32x2 iPxPos, FfxFloat32 fLumaInstability) +{ + rw_luma_instability[iPxPos] = fLumaInstability; +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_PREVIOUS_LUMA) +Texture2D r_previous_luma : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_PREVIOUS_LUMA); + +FfxFloat32 LoadPreviousLuma(FfxUInt32x2 iPxPos) +{ + return r_previous_luma[iPxPos]; +} + +FfxFloat32 SamplePreviousLuma(FfxFloat32x2 uv) +{ + return r_previous_luma.SampleLevel(s_LinearClamp, uv, 0); +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_NEW_LOCKS) +Texture2D r_new_locks : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_NEW_LOCKS); + +FfxFloat32 LoadNewLocks(FfxUInt32x2 iPxPos) +{ + return r_new_locks[iPxPos]; +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_NEW_LOCKS) +RWTexture2D rw_new_locks : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_NEW_LOCKS); + +FfxFloat32 LoadRwNewLocks(FfxUInt32x2 iPxPos) +{ + return rw_new_locks[iPxPos]; +} + +void StoreNewLocks(FfxUInt32x2 iPxPos, FfxFloat32 newLock) +{ + rw_new_locks[iPxPos] = newLock; +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH) +Texture2D r_reconstructed_previous_nearest_depth : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH); + +FfxFloat32 LoadReconstructedPrevDepth(FfxUInt32x2 iPxPos) +{ + return asfloat(r_reconstructed_previous_nearest_depth[iPxPos]); +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH) +RWTexture2D rw_reconstructed_previous_nearest_depth : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH); + +void StoreReconstructedDepth(FfxUInt32x2 iPxSample, FfxFloat32 fDepth) +{ + FfxUInt32 uDepth = asuint(fDepth); + +#if FFX_FSR3UPSCALER_OPTION_INVERTED_DEPTH + InterlockedMax(rw_reconstructed_previous_nearest_depth[iPxSample], uDepth); +#else + InterlockedMin(rw_reconstructed_previous_nearest_depth[iPxSample], uDepth); // min for standard, max for inverted depth +#endif +} + +void SetReconstructedDepth(FfxUInt32x2 iPxSample, const FfxUInt32 uValue) +{ + rw_reconstructed_previous_nearest_depth[iPxSample] = uValue; +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_DILATED_DEPTH) +RWTexture2D rw_dilated_depth : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_DILATED_DEPTH); + +void StoreDilatedDepth(FFX_PARAMETER_IN FfxUInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32 fDepth) +{ + rw_dilated_depth[iPxPos] = fDepth; +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_DILATED_MOTION_VECTORS) +RWTexture2D rw_dilated_motion_vectors : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_DILATED_MOTION_VECTORS); + +void StoreDilatedMotionVector(FFX_PARAMETER_IN FfxUInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x2 fMotionVector) +{ + rw_dilated_motion_vectors[iPxPos] = fMotionVector; +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_DILATED_MOTION_VECTORS) +Texture2D r_dilated_motion_vectors : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_DILATED_MOTION_VECTORS); + +FfxFloat32x2 LoadDilatedMotionVector(FfxUInt32x2 iPxInput) +{ + return r_dilated_motion_vectors[iPxInput]; +} + +FfxFloat32x2 SampleDilatedMotionVector(FfxFloat32x2 fUV) +{ + return r_dilated_motion_vectors.SampleLevel(s_LinearClamp, fUV, 0); +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_DILATED_DEPTH) +Texture2D r_dilated_depth : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_DILATED_DEPTH); + +FfxFloat32 LoadDilatedDepth(FfxUInt32x2 iPxInput) +{ + return r_dilated_depth[iPxInput]; +} + +FfxFloat32 SampleDilatedDepth(FfxFloat32x2 fUV) +{ + return r_dilated_depth.SampleLevel(s_LinearClamp, fUV, 0); +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_INPUT_EXPOSURE) +Texture2D r_input_exposure : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_INPUT_EXPOSURE); + +FfxFloat32 Exposure() +{ + FfxFloat32 exposure = r_input_exposure[FfxUInt32x2(0, 0)].x; + +#if defined(__XBOX_SCARLETT) + if (exposure < 0.000030517578/** 2^-15 */) { + exposure = 1.0f; + } +#else + if (exposure == 0.0f) { + exposure = 1.0f; + } +#endif // #if defined(__XBOX_SCARLETT) + + return exposure; +} +#endif + +// BEGIN: FSR3UPSCALER_BIND_SRV_LANCZOS_LUT +#if defined(FSR3UPSCALER_BIND_SRV_LANCZOS_LUT) +Texture2D r_lanczos_lut : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_LANCZOS_LUT); +#endif + +FfxFloat32 SampleLanczos2Weight(FfxFloat32 x) +{ +#if defined(FSR3UPSCALER_BIND_SRV_LANCZOS_LUT) + return r_lanczos_lut.SampleLevel(s_LinearClamp, FfxFloat32x2(x / 2, 0.5f), 0); +#else + return 0.f; +#endif +} +// END: FSR3UPSCALER_BIND_SRV_LANCZOS_LUT + +#if defined(FSR3UPSCALER_BIND_SRV_DILATED_REACTIVE_MASKS) +Texture2D r_dilated_reactive_masks : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_DILATED_REACTIVE_MASKS); + +FfxFloat32x4 SampleDilatedReactiveMasks(FfxFloat32x2 fUV) +{ + return r_dilated_reactive_masks.SampleLevel(s_LinearClamp, fUV, 0); +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_DILATED_REACTIVE_MASKS) +RWTexture2D rw_dilated_reactive_masks : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_DILATED_REACTIVE_MASKS); + +void StoreDilatedReactiveMasks(FFX_PARAMETER_IN FfxUInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x4 fDilatedReactiveMasks) +{ + rw_dilated_reactive_masks[iPxPos] = fDilatedReactiveMasks; +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_INPUT_OPAQUE_ONLY) +Texture2D r_input_opaque_only : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_INPUT_OPAQUE_ONLY); + +FfxFloat32x3 LoadOpaqueOnly(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos) +{ + return r_input_opaque_only[iPxPos].xyz; +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_PREV_PRE_ALPHA_COLOR) +Texture2D r_input_prev_color_pre_alpha : FFX_FSR3UPSCALER_DECLARE_SRV(FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR); + +FfxFloat32x3 LoadPrevPreAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos) +{ + return r_input_prev_color_pre_alpha[iPxPos]; +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_PREV_POST_ALPHA_COLOR) +Texture2D r_input_prev_color_post_alpha : FFX_FSR3UPSCALER_DECLARE_SRV(FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR); + +FfxFloat32x3 LoadPrevPostAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos) +{ + return r_input_prev_color_post_alpha[iPxPos]; +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_AUTOREACTIVE) && \ + defined(FSR3UPSCALER_BIND_UAV_AUTOCOMPOSITION) + +RWTexture2D rw_output_autoreactive : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_AUTOREACTIVE); +RWTexture2D rw_output_autocomposition : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_AUTOCOMPOSITION); + +void StoreAutoReactive(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F2 fReactive) +{ + rw_output_autoreactive[iPxPos] = fReactive.x; + + rw_output_autocomposition[iPxPos] = fReactive.y; +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_PREV_PRE_ALPHA_COLOR) +RWTexture2D rw_output_prev_color_pre_alpha : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_PREV_PRE_ALPHA_COLOR); + +void StorePrevPreAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F3 color) +{ + rw_output_prev_color_pre_alpha[iPxPos] = color; +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_PREV_POST_ALPHA_COLOR) +RWTexture2D rw_output_prev_color_post_alpha : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_PREV_POST_ALPHA_COLOR); + +void StorePrevPostAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F3 color) +{ + rw_output_prev_color_post_alpha[iPxPos] = color; +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_FRAME_INFO) +RWTexture2D rw_frame_info : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_FRAME_INFO); + +FfxFloat32x4 LoadFrameInfo() +{ + return rw_frame_info[FfxInt32x2(0, 0)]; +} + +void StoreFrameInfo(FfxFloat32x4 fInfo) +{ + rw_frame_info[FfxInt32x2(0, 0)] = fInfo; +} +#endif + +#if defined(FSR3UPSCALER_BIND_SRV_FRAME_INFO) +Texture2D r_frame_info : FFX_FSR3UPSCALER_DECLARE_SRV(FSR3UPSCALER_BIND_SRV_FRAME_INFO); + +FfxFloat32x4 FrameInfo() +{ + return r_frame_info[FfxInt32x2(0, 0)]; +} +#endif + +#if defined(FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_0) && \ + defined(FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_1) && \ + defined(FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_2) && \ + defined(FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_3) && \ + defined(FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_4) && \ + defined(FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_5) + +RWTexture2D rw_spd_mip0 : FFX_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_0); +RWTexture2D rw_spd_mip1 : FFX_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_1); +RWTexture2D rw_spd_mip2 : FFX_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_2); +RWTexture2D rw_spd_mip3 : FFX_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_3); +RWTexture2D rw_spd_mip4 : FFX_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_4); +globallycoherent RWTexture2D rw_spd_mip5 : FFX_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_5); + +FfxFloat32x2 RWLoadPyramid(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxUInt32 index) +{ +#define LOAD(idx) \ + if (index == idx) \ + { \ + return rw_spd_mip##idx[iPxPos]; \ + } + LOAD(0); + LOAD(1); + LOAD(2); + LOAD(3); + LOAD(4); + LOAD(5); + + return 0; + +#undef LOAD +} + +void StorePyramid(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x2 outValue, FFX_PARAMETER_IN FfxUInt32 index) +{ +#define STORE(idx) \ + if (index == idx) \ + { \ + rw_spd_mip##idx[iPxPos] = outValue; \ + } + + STORE(0); + STORE(1); + STORE(2); + STORE(3); + STORE(4); + STORE(5); + +#undef STORE +} +#endif + +#if defined FSR3UPSCALER_BIND_UAV_SPD_GLOBAL_ATOMIC +globallycoherent RWTexture2D rw_spd_global_atomic : FFX_FSR3UPSCALER_DECLARE_UAV(FSR3UPSCALER_BIND_UAV_SPD_GLOBAL_ATOMIC); + +void SPD_IncreaseAtomicCounter(inout FfxUInt32 spdCounter) +{ + InterlockedAdd(rw_spd_global_atomic[FfxInt32x2(0, 0)], 1, spdCounter); +} + +void SPD_ResetAtomicCounter() +{ + rw_spd_global_atomic[FfxInt32x2(0, 0)] = 0; +} +#endif + +#endif // #if defined(FFX_GPU) diff --git a/thirdparty/amd-ffx/gpu/fsr3upscaler/ffx_fsr3upscaler_common.h b/thirdparty/amd-ffx/gpu/fsr3upscaler/ffx_fsr3upscaler_common.h new file mode 100644 index 000000000000..87aa596db4fa --- /dev/null +++ b/thirdparty/amd-ffx/gpu/fsr3upscaler/ffx_fsr3upscaler_common.h @@ -0,0 +1,403 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#if !defined(FFX_FSR3UPSCALER_COMMON_H) +#define FFX_FSR3UPSCALER_COMMON_H + +#if defined(FFX_GPU) +#pragma warning(error : 3206) // treat vector truncation warnings as errors +#pragma warning(disable : 3205) // conversion from larger type to smaller +#pragma warning(disable : 3571) // in ffxPow(f, e), f could be negative + +FFX_STATIC const FfxFloat32 FSR3UPSCALER_FP16_MIN = 6.10e-05f; +FFX_STATIC const FfxFloat32 FSR3UPSCALER_FP16_MAX = 65504.0f; +FFX_STATIC const FfxFloat32 FSR3UPSCALER_EPSILON = FSR3UPSCALER_FP16_MIN; +FFX_STATIC const FfxFloat32 FSR3UPSCALER_TONEMAP_EPSILON = FSR3UPSCALER_FP16_MIN; +FFX_STATIC const FfxFloat32 FSR3UPSCALER_FP32_MAX = 3.402823466e+38f; +FFX_STATIC const FfxFloat32 FSR3UPSCALER_FP32_MIN = 1.175494351e-38f; + +// Reconstructed depth usage +FFX_STATIC const FfxFloat32 fReconstructedDepthBilinearWeightThreshold = FSR3UPSCALER_EPSILON * 10; + +FfxFloat32 ReconstructedDepthMvPxThreshold(FfxFloat32 fNearestDepthInMeters) +{ + return ffxLerp(0.25f, 0.75f, ffxSaturate(fNearestDepthInMeters / 100.0f)); +} + +// Accumulation +FFX_STATIC const FfxFloat32 fUpsampleLanczosWeightScale = 1.0f / 16.0f; +FFX_STATIC const FfxFloat32 fAverageLanczosWeightPerFrame = 0.74f * fUpsampleLanczosWeightScale; // Average lanczos weight for jitter accumulated samples +FFX_STATIC const FfxFloat32 fAccumulationMaxOnMotion = 3.0f * fUpsampleLanczosWeightScale; + +#define SHADING_CHANGE_SET_SIZE 5 +FFX_STATIC const FfxInt32 iShadingChangeMipStart = 0; +FFX_STATIC const FfxFloat32 fShadingChangeSamplePow = 1.0f / 1.0f; + + +FFX_STATIC const FfxFloat32 fLockThreshold = 1.0f; +FFX_STATIC const FfxFloat32 fLockMax = 2.0f; + +FFX_STATIC const FfxInt32 REACTIVE = 0; +FFX_STATIC const FfxInt32 DISOCCLUSION = 1; +FFX_STATIC const FfxInt32 SHADING_CHANGE = 2; +FFX_STATIC const FfxInt32 ACCUMULAION = 3; + +FFX_STATIC const FfxInt32 FRAME_INFO_EXPOSURE = 0; +FFX_STATIC const FfxInt32 FRAME_INFO_LOG_LUMA = 1; +FFX_STATIC const FfxInt32 FRAME_INFO_SCENE_AVERAGE_LUMA = 2; + +FfxBoolean TonemapFirstFrame() +{ + const FfxBoolean bEnabled = true; + return FrameIndex() == 0 && bEnabled; +} + +FfxFloat32 AverageLanczosWeightPerFrame() +{ + return 0.74f; +} + +FfxInt32x2 ShadingChangeRenderSize() +{ + return FfxInt32x2(RenderSize() * 0.5f); +} + +FfxInt32x2 ShadingChangeMaxRenderSize() +{ + return FfxInt32x2(MaxRenderSize() * 0.5f); +} + +FfxInt32x2 PreviousFrameShadingChangeRenderSize() +{ + return FfxInt32x2(PreviousFrameRenderSize() * 0.5f); +} + +#if defined(FSR3UPSCALER_BIND_SRV_FRAME_INFO) +FfxFloat32 SceneAverageLuma() +{ + return FrameInfo()[FRAME_INFO_SCENE_AVERAGE_LUMA]; +} +#endif + +// Auto exposure +FFX_STATIC const FfxFloat32 resetAutoExposureAverageSmoothing = 1e4f; + +struct AccumulationPassCommonParams +{ + FfxInt32x2 iPxHrPos; + FfxFloat32x2 fHrUv; + FfxFloat32x2 fLrUvJittered; + FfxFloat32x2 fLrUv_HwSampler; + FfxFloat32x2 fMotionVector; + FfxFloat32x2 fReprojectedHrUv; + FfxFloat32 f4KVelocity; + FfxFloat32 fDisocclusion; + FfxFloat32 fReactiveMask; + FfxFloat32 fShadingChange; + FfxFloat32 fAccumulation; + FfxFloat32 fLumaInstabilityFactor; + FfxFloat32 fFarthestDepthInMeters; + + FfxBoolean bIsExistingSample; + FfxBoolean bIsNewSample; +}; + +FfxFloat32 Get4KVelocity(FfxFloat32x2 fMotionVector) +{ + return length(fMotionVector * FfxFloat32x2(3840.0f, 2160.0f)); +} + +struct RectificationBox +{ + FfxFloat32x3 boxCenter; + FfxFloat32x3 boxVec; + FfxFloat32x3 aabbMin; + FfxFloat32x3 aabbMax; + FfxFloat32 fBoxCenterWeight; +}; + +struct AccumulationPassData +{ + RectificationBox clippingBox; + FfxFloat32x3 fUpsampledColor; + FfxFloat32 fUpsampledWeight; + FfxFloat32x3 fHistoryColor; + FfxFloat32 fHistoryWeight; + FfxFloat32 fLock; + FfxFloat32 fLockContributionThisFrame; +}; + +void RectificationBoxAddInitialSample(FFX_PARAMETER_INOUT RectificationBox rectificationBox, const FfxFloat32x3 colorSample, const FfxFloat32 fSampleWeight) +{ + rectificationBox.aabbMin = colorSample; + rectificationBox.aabbMax = colorSample; + + FfxFloat32x3 weightedSample = colorSample * fSampleWeight; + rectificationBox.boxCenter = weightedSample; + rectificationBox.boxVec = colorSample * weightedSample; + rectificationBox.fBoxCenterWeight = fSampleWeight; +} + +void RectificationBoxAddSample(FfxBoolean bInitialSample, FFX_PARAMETER_INOUT RectificationBox rectificationBox, const FfxFloat32x3 colorSample, const FfxFloat32 fSampleWeight) +{ + if (bInitialSample) { + RectificationBoxAddInitialSample(rectificationBox, colorSample, fSampleWeight); + } else { + rectificationBox.aabbMin = ffxMin(rectificationBox.aabbMin, colorSample); + rectificationBox.aabbMax = ffxMax(rectificationBox.aabbMax, colorSample); + + FfxFloat32x3 weightedSample = colorSample * fSampleWeight; + rectificationBox.boxCenter += weightedSample; + rectificationBox.boxVec += colorSample * weightedSample; + rectificationBox.fBoxCenterWeight += fSampleWeight; + } +} + +void RectificationBoxComputeVarianceBoxData(FFX_PARAMETER_INOUT RectificationBox rectificationBox) +{ + rectificationBox.fBoxCenterWeight = (abs(rectificationBox.fBoxCenterWeight) > FfxFloat32(FSR3UPSCALER_FP32_MIN) ? rectificationBox.fBoxCenterWeight : FfxFloat32(1.f)); + rectificationBox.boxCenter /= rectificationBox.fBoxCenterWeight; + rectificationBox.boxVec /= rectificationBox.fBoxCenterWeight; + FfxFloat32x3 stdDev = sqrt(abs(rectificationBox.boxVec - rectificationBox.boxCenter * rectificationBox.boxCenter)); + rectificationBox.boxVec = stdDev; +} + +FfxFloat32x3 SafeRcp3(FfxFloat32x3 v) +{ + return (all(FFX_NOT_EQUAL(v, FfxFloat32x3(0, 0, 0)))) ? (FfxFloat32x3(1, 1, 1) / v) : FfxFloat32x3(0, 0, 0); +} + +FfxFloat32 MinDividedByMax(const FfxFloat32 v0, const FfxFloat32 v1, const FfxFloat32 fOnZeroReturnValue) +{ + const FfxFloat32 m = ffxMax(v0, v1); + return m != 0 ? ffxMin(v0, v1) / m : fOnZeroReturnValue; +} + +FfxFloat32 MinDividedByMax(const FfxFloat32 v0, const FfxFloat32 v1) +{ + const FfxFloat32 m = ffxMax(v0, v1); + return m != 0 ? ffxMin(v0, v1) / m : 0; +} + +FfxFloat32x3 YCoCgToRGB(FfxFloat32x3 fYCoCg) +{ + FfxFloat32x3 fRgb; + + fRgb = FfxFloat32x3( + fYCoCg.x + fYCoCg.y - fYCoCg.z, + fYCoCg.x + fYCoCg.z, + fYCoCg.x - fYCoCg.y - fYCoCg.z); + + return fRgb; +} + +FfxFloat32x3 RGBToYCoCg(FfxFloat32x3 fRgb) +{ + FfxFloat32x3 fYCoCg; + + fYCoCg = FfxFloat32x3( + 0.25f * fRgb.r + 0.5f * fRgb.g + 0.25f * fRgb.b, + 0.5f * fRgb.r - 0.5f * fRgb.b, + -0.25f * fRgb.r + 0.5f * fRgb.g - 0.25f * fRgb.b); + + return fYCoCg; +} + +FfxFloat32 RGBToLuma(FfxFloat32x3 fLinearRgb) +{ + return dot(fLinearRgb, FfxFloat32x3(0.2126f, 0.7152f, 0.0722f)); +} + +FfxFloat32 RGBToPerceivedLuma(FfxFloat32x3 fLinearRgb) +{ + FfxFloat32 fLuminance = RGBToLuma(fLinearRgb); + + FfxFloat32 fPercievedLuminance = 0; + if (fLuminance <= 216.0f / 24389.0f) { + fPercievedLuminance = fLuminance * (24389.0f / 27.0f); + } + else { + fPercievedLuminance = ffxPow(fLuminance, 1.0f / 3.0f) * 116.0f - 16.0f; + } + + return fPercievedLuminance * 0.01f; +} + +FfxFloat32x3 Tonemap(FfxFloat32x3 fRgb) +{ + return fRgb / (ffxMax(ffxMax(0.f, fRgb.r), ffxMax(fRgb.g, fRgb.b)) + 1.f).xxx; +} + +FfxFloat32x3 InverseTonemap(FfxFloat32x3 fRgb) +{ + return fRgb / ffxMax(FSR3UPSCALER_TONEMAP_EPSILON, 1.f - ffxMax(fRgb.r, ffxMax(fRgb.g, fRgb.b))).xxx; +} + +FfxBoolean IsUvInside(FfxFloat32x2 fUv) +{ + return (fUv.x >= 0.0f && fUv.x <= 1.0f) && (fUv.y >= 0.0f && fUv.y <= 1.0f); +} + +FfxInt32x2 ClampLoad(FfxInt32x2 iPxSample, FfxInt32x2 iPxOffset, FfxInt32x2 iTextureSize) +{ + FfxInt32x2 result = iPxSample + iPxOffset; + result.x = ffxMax(0, ffxMin(result.x, iTextureSize.x - 1)); + result.y = ffxMax(0, ffxMin(result.y, iTextureSize.y - 1)); + return result; +} + +FfxFloat32x2 ClampUv(FfxFloat32x2 fUv, FfxInt32x2 iTextureSize, FfxInt32x2 iResourceSize) +{ + const FfxFloat32x2 fSampleLocation = fUv * iTextureSize; + const FfxFloat32x2 fClampedLocation = ffxMax(FfxFloat32x2(0.5f, 0.5f), ffxMin(fSampleLocation, FfxFloat32x2(iTextureSize) - FfxFloat32x2(0.5f, 0.5f))); + const FfxFloat32x2 fClampedUv = fClampedLocation / FfxFloat32x2(iResourceSize); + + return fClampedUv; +} + +FfxBoolean IsOnScreen(FfxInt32x2 pos, FfxInt32x2 size) +{ + return all(FFX_LESS_THAN(FfxUInt32x2(pos), FfxUInt32x2(size))); +} + +FfxFloat32 ComputeAutoExposureFromLavg(FfxFloat32 Lavg) +{ + Lavg = exp(Lavg); + + const FfxFloat32 S = 100.0f; //ISO arithmetic speed + const FfxFloat32 K = 12.5f; + FfxFloat32 ExposureISO100 = log2((Lavg * S) / K); + + const FfxFloat32 q = 0.65f; + FfxFloat32 Lmax = (78.0f / (q * S)) * ffxPow(2.0f, ExposureISO100); + + return 1.0f / Lmax; +} + +FfxInt32x2 ComputeHrPosFromLrPos(FfxInt32x2 iPxLrPos) +{ + FfxFloat32x2 fSrcJitteredPos = FfxFloat32x2(iPxLrPos) + 0.5f - Jitter(); + FfxFloat32x2 fLrPosInHr = (fSrcJitteredPos / RenderSize()) * UpscaleSize(); + FfxInt32x2 iPxHrPos = FfxInt32x2(floor(fLrPosInHr)); + return iPxHrPos; +} + +FfxFloat32x2 ComputeNdc(FfxFloat32x2 fPxPos, FfxInt32x2 iSize) +{ + return fPxPos / FfxFloat32x2(iSize) * FfxFloat32x2(2.0f, -2.0f) + FfxFloat32x2(-1.0f, 1.0f); +} + +FfxFloat32 GetViewSpaceDepth(FfxFloat32 fDeviceDepth) +{ + const FfxFloat32x4 fDeviceToViewDepth = DeviceToViewSpaceTransformFactors(); + + // fDeviceToViewDepth details found in ffx_fsr3upscaler.cpp + return (fDeviceToViewDepth[1] / (fDeviceDepth - fDeviceToViewDepth[0])); +} + +FfxFloat32 GetViewSpaceDepthInMeters(FfxFloat32 fDeviceDepth) +{ + return GetViewSpaceDepth(fDeviceDepth) * ViewSpaceToMetersFactor(); +} + +FfxFloat32x3 GetViewSpacePosition(FfxInt32x2 iViewportPos, FfxInt32x2 iViewportSize, FfxFloat32 fDeviceDepth) +{ + const FfxFloat32x4 fDeviceToViewDepth = DeviceToViewSpaceTransformFactors(); + + const FfxFloat32 Z = GetViewSpaceDepth(fDeviceDepth); + + const FfxFloat32x2 fNdcPos = ComputeNdc(iViewportPos, iViewportSize); + const FfxFloat32 X = fDeviceToViewDepth[2] * fNdcPos.x * Z; + const FfxFloat32 Y = fDeviceToViewDepth[3] * fNdcPos.y * Z; + + return FfxFloat32x3(X, Y, Z); +} + +FfxFloat32x3 GetViewSpacePositionInMeters(FfxInt32x2 iViewportPos, FfxInt32x2 iViewportSize, FfxFloat32 fDeviceDepth) +{ + return GetViewSpacePosition(iViewportPos, iViewportSize, fDeviceDepth) * ViewSpaceToMetersFactor(); +} + +FfxFloat32 GetMaxDistanceInMeters() +{ +#if FFX_FSR3UPSCALER_OPTION_INVERTED_DEPTH + return GetViewSpaceDepth(0.0f) * ViewSpaceToMetersFactor(); +#else + return GetViewSpaceDepth(1.0f) * ViewSpaceToMetersFactor(); +#endif +} + + +struct BilinearSamplingData +{ + FfxInt32x2 iOffsets[4]; + FfxFloat32 fWeights[4]; + FfxInt32x2 iBasePos; +}; + +BilinearSamplingData GetBilinearSamplingData(FfxFloat32x2 fUv, FfxInt32x2 iSize) +{ + BilinearSamplingData data; + + FfxFloat32x2 fPxSample = (fUv * iSize) - FfxFloat32x2(0.5f, 0.5f); + data.iBasePos = FfxInt32x2(floor(fPxSample)); + FfxFloat32x2 fPxFrac = ffxFract(fPxSample); + + data.iOffsets[0] = FfxInt32x2(0, 0); + data.iOffsets[1] = FfxInt32x2(1, 0); + data.iOffsets[2] = FfxInt32x2(0, 1); + data.iOffsets[3] = FfxInt32x2(1, 1); + + data.fWeights[0] = (1 - fPxFrac.x) * (1 - fPxFrac.y); + data.fWeights[1] = (fPxFrac.x) * (1 - fPxFrac.y); + data.fWeights[2] = (1 - fPxFrac.x) * (fPxFrac.y); + data.fWeights[3] = (fPxFrac.x) * (fPxFrac.y); + + return data; +} + +struct PlaneData +{ + FfxFloat32x3 fNormal; + FfxFloat32 fDistanceFromOrigin; +}; + +PlaneData GetPlaneFromPoints(FfxFloat32x3 fP0, FfxFloat32x3 fP1, FfxFloat32x3 fP2) +{ + PlaneData plane; + + FfxFloat32x3 v0 = fP0 - fP1; + FfxFloat32x3 v1 = fP0 - fP2; + plane.fNormal = normalize(cross(v0, v1)); + plane.fDistanceFromOrigin = -dot(fP0, plane.fNormal); + + return plane; +} + +FfxFloat32 PointToPlaneDistance(PlaneData plane, FfxFloat32x3 fPoint) +{ + return abs(dot(plane.fNormal, fPoint) + plane.fDistanceFromOrigin); +} + +#endif // #if defined(FFX_GPU) + +#endif //!defined(FFX_FSR3UPSCALER_COMMON_H) diff --git a/thirdparty/amd-ffx/gpu/fsr3upscaler/ffx_fsr3upscaler_debug_view.h b/thirdparty/amd-ffx/gpu/fsr3upscaler/ffx_fsr3upscaler_debug_view.h new file mode 100644 index 000000000000..6f4fa33fe3d7 --- /dev/null +++ b/thirdparty/amd-ffx/gpu/fsr3upscaler/ffx_fsr3upscaler_debug_view.h @@ -0,0 +1,159 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +struct FfxDebugViewport +{ + FfxInt32x2 offset; + FfxInt32x2 size; +}; + +// Macro to cull and draw debug viewport +#define DRAW_VIEWPORT(function, pos, vp) \ + { \ + if (pointIsInsideViewport(pos, vp)) \ + { \ + function(pos, vp); \ + } \ + } + +FfxFloat32x2 getTransformedUv(FfxInt32x2 iPxPos, FfxDebugViewport vp) +{ + FfxFloat32x2 fUv = (FfxFloat32x2(iPxPos - vp.offset) + 0.5f) / vp.size; + + return fUv; +} + +FfxFloat32x3 getMotionVectorColor(FfxFloat32x2 fMotionVector) +{ + return FfxFloat32x3(0.5f + fMotionVector * RenderSize() * 0.5f, 0.5f); +} + +FfxFloat32x4 getUnusedIndicationColor(FfxInt32x2 iPxPos, FfxDebugViewport vp) +{ + FfxInt32x2 basePos = iPxPos - vp.offset; + + FfxFloat32 ar = FfxFloat32(vp.size.x) / FfxFloat32(vp.size.y); + + return FfxFloat32x4(basePos.x == FfxInt32(basePos.y * ar), 0, 0, 1); +} + +void drawDilatedMotionVectors(FfxInt32x2 iPxPos, FfxDebugViewport vp) +{ + FfxFloat32x2 fUv = getTransformedUv(iPxPos, vp); + + FfxFloat32x2 fUv_HW = ClampUv(fUv, RenderSize(), MaxRenderSize()); + + FfxFloat32x2 fMotionVector = SampleDilatedMotionVector(fUv_HW); + + StoreUpscaledOutput(iPxPos, getMotionVectorColor(fMotionVector)); +} + +void drawDisocclusionMask(FfxInt32x2 iPxPos, FfxDebugViewport vp) +{ + FfxFloat32x2 fUv = getTransformedUv(iPxPos, vp); + + FfxFloat32x2 fUv_HW = ClampUv(fUv, RenderSize(), MaxRenderSize()); + + FfxFloat32 fDisocclusionFactor = ffxSaturate(SampleDilatedReactiveMasks(fUv_HW)[DISOCCLUSION]); + + StoreUpscaledOutput(iPxPos, FfxFloat32x3(0, fDisocclusionFactor, 0)); +} + +void drawDetailProtectionTakedown(FfxInt32x2 iPxPos, FfxDebugViewport vp) +{ + FfxFloat32x2 fUv = getTransformedUv(iPxPos, vp); + + FfxFloat32x2 fUv_HW = ClampUv(fUv, RenderSize(), MaxRenderSize()); + + FfxFloat32 fProtectionTakedown = ffxSaturate(SampleDilatedReactiveMasks(fUv_HW)[REACTIVE]); + + StoreUpscaledOutput(iPxPos, FfxFloat32x3(0, fProtectionTakedown, 0)); +} + +void drawReactiveness(FfxInt32x2 iPxPos, FfxDebugViewport vp) +{ + FfxFloat32x2 fUv = getTransformedUv(iPxPos, vp); + + FfxFloat32x2 fUv_HW = ClampUv(fUv, RenderSize(), MaxRenderSize()); + + FfxFloat32 fShadingChange = ffxSaturate(SampleDilatedReactiveMasks(fUv_HW)[SHADING_CHANGE]); + + StoreUpscaledOutput(iPxPos, FfxFloat32x3(0, fShadingChange, 0)); +} + +void drawProtectedAreas(FfxInt32x2 iPxPos, FfxDebugViewport vp) +{ + FfxFloat32x2 fUv = getTransformedUv(iPxPos, vp); + + FfxFloat32 fProtection = ffxSaturate(SampleHistory(fUv).w - fLockThreshold); + + StoreUpscaledOutput(iPxPos, FfxFloat32x3(fProtection, 0, 0)); +} + +void drawDilatedDepthInMeters(FfxInt32x2 iPxPos, FfxDebugViewport vp) +{ + FfxFloat32x2 fUv = getTransformedUv(iPxPos, vp); + + FfxFloat32x2 fUv_HW = ClampUv(fUv, RenderSize(), MaxRenderSize()); + + const FfxFloat32 fDilatedDepth = SampleDilatedDepth(fUv_HW); + const FfxFloat32 fDepthInMeters = GetViewSpaceDepthInMeters(fDilatedDepth); + + StoreUpscaledOutput(iPxPos, FfxFloat32x3(ffxSaturate(fDepthInMeters / 25.0f), 0, 0)); +} + +FfxBoolean pointIsInsideViewport(FfxInt32x2 iPxPos, FfxDebugViewport vp) +{ + FfxInt32x2 extent = vp.offset + vp.size; + + return (iPxPos.x >= vp.offset.x && iPxPos.x < extent.x) && (iPxPos.y >= vp.offset.y && iPxPos.y < extent.y); +} + +void DebugView(FfxInt32x2 iPxPos) +{ +#define VIEWPORT_GRID_SIZE_X 3 +#define VIEWPORT_GRID_SIZE_Y 3 + + FfxFloat32x2 fViewportScale = FfxFloat32x2(1.0f / VIEWPORT_GRID_SIZE_X, 1.0f / VIEWPORT_GRID_SIZE_Y); + FfxInt32x2 iViewportSize = FfxInt32x2(UpscaleSize() * fViewportScale); + + // compute grid [y][x] for easier placement of viewports + FfxDebugViewport vp[VIEWPORT_GRID_SIZE_Y][VIEWPORT_GRID_SIZE_X]; + for (FfxInt32 y = 0; y < VIEWPORT_GRID_SIZE_Y; y++) + { + for (FfxInt32 x = 0; x < VIEWPORT_GRID_SIZE_X; x++) + { + vp[y][x].offset = iViewportSize * FfxInt32x2(x, y); + vp[y][x].size = iViewportSize; + } + } + + // top row + DRAW_VIEWPORT(drawDilatedMotionVectors, iPxPos, vp[0][0]); + DRAW_VIEWPORT(drawProtectedAreas, iPxPos, vp[0][1]); + DRAW_VIEWPORT(drawDilatedDepthInMeters, iPxPos, vp[0][2]); + + // bottom row + DRAW_VIEWPORT(drawDisocclusionMask, iPxPos, vp[2][0]); + DRAW_VIEWPORT(drawReactiveness, iPxPos, vp[2][1]); + DRAW_VIEWPORT(drawDetailProtectionTakedown, iPxPos, vp[2][2]); +} diff --git a/thirdparty/amd-ffx/gpu/fsr3upscaler/ffx_fsr3upscaler_luma_instability.h b/thirdparty/amd-ffx/gpu/fsr3upscaler/ffx_fsr3upscaler_luma_instability.h new file mode 100644 index 000000000000..624b7ca941be --- /dev/null +++ b/thirdparty/amd-ffx/gpu/fsr3upscaler/ffx_fsr3upscaler_luma_instability.h @@ -0,0 +1,115 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +struct LumaInstabilityFactorData +{ + FfxFloat32x4 fLumaHistory; + FfxFloat32 fLumaInstabilityFactor; +}; + +LumaInstabilityFactorData ComputeLumaInstabilityFactor(LumaInstabilityFactorData data, FfxFloat32 fCurrentFrameLuma, FfxFloat32 fFarthestDepthInMeters) +{ + const FfxInt32 N_MINUS_1 = 0; + const FfxInt32 N_MINUS_2 = 1; + const FfxInt32 N_MINUS_3 = 2; + const FfxInt32 N_MINUS_4 = 3; + + FfxFloat32 fLumaInstability = 0.0f; + const FfxFloat32 fDiffs0 = (fCurrentFrameLuma - data.fLumaHistory[N_MINUS_1]); + const FfxFloat32 fSimilarity0 = MinDividedByMax(fCurrentFrameLuma, data.fLumaHistory[N_MINUS_1], 1.0f); + + FfxFloat32 fMaxSimilarity = fSimilarity0; + + if (fSimilarity0 < 1.0f) { + for (int i = N_MINUS_2; i <= N_MINUS_4; i++) { + const FfxFloat32 fDiffs1 = (fCurrentFrameLuma - data.fLumaHistory[i]); + const FfxFloat32 fSimilarity1 = MinDividedByMax(fCurrentFrameLuma, data.fLumaHistory[i]); + + if (sign(fDiffs0) == sign(fDiffs1)) { + + fMaxSimilarity = ffxMax(fMaxSimilarity, fSimilarity1); + } + } + + fLumaInstability = FfxFloat32(fMaxSimilarity > fSimilarity0); + } + + // Shift history + data.fLumaHistory[N_MINUS_4] = data.fLumaHistory[N_MINUS_3]; + data.fLumaHistory[N_MINUS_3] = data.fLumaHistory[N_MINUS_2]; + data.fLumaHistory[N_MINUS_2] = data.fLumaHistory[N_MINUS_1]; + data.fLumaHistory[N_MINUS_1] = fCurrentFrameLuma; + + data.fLumaHistory /= Exposure(); + + data.fLumaInstabilityFactor = fLumaInstability * FfxFloat32(data.fLumaHistory[N_MINUS_4] != 0); + + return data; +} + +void LumaInstability(FfxInt32x2 iPxPos) +{ + LumaInstabilityFactorData data; + data.fLumaInstabilityFactor = 0.0f; + data.fLumaHistory = FfxFloat32x4(0.0f, 0.0f, 0.0f, 0.0f); + + const FfxFloat32x2 fDilatedMotionVector = LoadDilatedMotionVector(iPxPos); + const FfxFloat32x2 fUv = (iPxPos + 0.5f) / RenderSize(); + const FfxFloat32x2 fUvCurrFrameJittered = fUv + Jitter() / RenderSize(); + const FfxFloat32x2 fUvPrevFrameJittered = fUv + PreviousFrameJitter() / PreviousFrameRenderSize(); + const FfxFloat32x2 fReprojectedUv = fUvPrevFrameJittered + fDilatedMotionVector; + + if (IsUvInside(fReprojectedUv)) + { + const FfxFloat32x2 fUvReactive_HW = ClampUv(fUvCurrFrameJittered, RenderSize(), MaxRenderSize()); + + const FfxFloat32x4 fDilatedReactiveMasks = SampleDilatedReactiveMasks(fUvReactive_HW); + const FfxFloat32 fReactiveMask = ffxSaturate(fDilatedReactiveMasks[REACTIVE]); + const FfxFloat32 fDisocclusion = ffxSaturate(fDilatedReactiveMasks[DISOCCLUSION]); + const FfxFloat32 fShadingChange = ffxSaturate(fDilatedReactiveMasks[SHADING_CHANGE]); + const FfxFloat32 fAccumulation = ffxSaturate(fDilatedReactiveMasks[ACCUMULAION]); + + const FfxBoolean bAccumulationFactor = fAccumulation > 0.9f; + + const FfxBoolean bComputeInstability = bAccumulationFactor; + + if (bComputeInstability) { + + const FfxFloat32x2 fUv_HW = ClampUv(fUvCurrFrameJittered, RenderSize(), MaxRenderSize()); + const FfxFloat32 fCurrentFrameLuma = SampleCurrentLuma(fUv_HW) * Exposure(); + + const FfxFloat32x2 fReprojectedUv_HW = ClampUv(fReprojectedUv, PreviousFrameRenderSize(), MaxRenderSize()); + data.fLumaHistory = SampleLumaHistory(fReprojectedUv_HW) * DeltaPreExposure() * Exposure(); + + const FfxFloat32x2 fFarthestDepthUv_HW = ClampUv(fUvCurrFrameJittered, RenderSize() / 2, GetFarthestDepthMip1ResourceDimensions()); + const FfxFloat32 fFarthestDepthInMeters = SampleFarthestDepthMip1(fFarthestDepthUv_HW); + + data = ComputeLumaInstabilityFactor(data, fCurrentFrameLuma, fFarthestDepthInMeters); + + const FfxFloat32 fVelocityWeight = 1.0f - ffxSaturate(Get4KVelocity(fDilatedMotionVector) / 20.0f); + data.fLumaInstabilityFactor *= fVelocityWeight * (1.0f - fDisocclusion) * (1.0f - fReactiveMask) * (1.0f - fShadingChange); + } + } + + StoreLumaHistory(iPxPos, data.fLumaHistory); + StoreLumaInstability(iPxPos, data.fLumaInstabilityFactor); +} diff --git a/thirdparty/amd-ffx/gpu/fsr3upscaler/ffx_fsr3upscaler_luma_pyramid.h b/thirdparty/amd-ffx/gpu/fsr3upscaler/ffx_fsr3upscaler_luma_pyramid.h new file mode 100644 index 000000000000..e8a8c496227b --- /dev/null +++ b/thirdparty/amd-ffx/gpu/fsr3upscaler/ffx_fsr3upscaler_luma_pyramid.h @@ -0,0 +1,192 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +FFX_GROUPSHARED FfxUInt32 spdCounter; + +void SpdIncreaseAtomicCounter(FfxUInt32 slice) +{ + SPD_IncreaseAtomicCounter(spdCounter); +} + +FfxUInt32 SpdGetAtomicCounter() +{ + return spdCounter; +} + +void SpdResetAtomicCounter(FfxUInt32 slice) +{ + SPD_ResetAtomicCounter(); +} + +#ifndef SPD_PACKED_ONLY +FFX_GROUPSHARED FfxFloat32 spdIntermediateR[16][16]; +FFX_GROUPSHARED FfxFloat32 spdIntermediateG[16][16]; +FFX_GROUPSHARED FfxFloat32 spdIntermediateB[16][16]; +FFX_GROUPSHARED FfxFloat32 spdIntermediateA[16][16]; + +FFX_STATIC const FfxInt32 LOG_LUMA = 0; +FFX_STATIC const FfxInt32 LUMA = 1; +FFX_STATIC const FfxInt32 DEPTH_IN_METERS = 2; + +FfxFloat32x4 SpdLoadSourceImage(FfxFloat32x2 iPxPos, FfxUInt32 slice) +{ + //We assume linear data. if non-linear input (sRGB, ...), + //then we should convert to linear first and back to sRGB on output. + const FfxInt32x2 iPxSamplePos = ClampLoad(FfxInt32x2(iPxPos), FfxInt32x2(0, 0), FfxInt32x2(RenderSize())); + + const FfxFloat32 fLuma = LoadCurrentLuma(iPxSamplePos); + const FfxFloat32 fLogLuma = ffxMax(FSR3UPSCALER_EPSILON, log(fLuma)); + const FfxFloat32 fFarthestDepthInMeters = LoadFarthestDepth(iPxSamplePos); + + FfxFloat32x4 fOutput = FfxFloat32x4(0.0f, 0.0f, 0.0f, 0.0f); + fOutput[LOG_LUMA] = fLogLuma; + fOutput[LUMA] = fLuma; + fOutput[DEPTH_IN_METERS] = fFarthestDepthInMeters; + + return fOutput; +} + +FfxFloat32x4 SpdLoad(FfxInt32x2 tex, FfxUInt32 slice) +{ + return FfxFloat32x4(RWLoadPyramid(tex, 5), 0, 0); +} + +FfxFloat32x4 SpdReduce4(FfxFloat32x4 v0, FfxFloat32x4 v1, FfxFloat32x4 v2, FfxFloat32x4 v3) +{ + return (v0 + v1 + v2 + v3) * 0.25f; +} + +void SpdStore(FfxInt32x2 pix, FfxFloat32x4 outValue, FfxUInt32 index, FfxUInt32 slice) +{ + if (index == 5) + { + StorePyramid(pix, outValue.xy, index); + } + else if (index == 0) { + StoreFarthestDepthMip1(pix, outValue[DEPTH_IN_METERS]); + } + + if (index == MipCount() - 1) { //accumulate on 1x1 level + + if (all(FFX_EQUAL(pix, FfxInt32x2(0, 0)))) + { + FfxFloat32x4 frameInfo = LoadFrameInfo(); + const FfxFloat32 fSceneAvgLuma = outValue[LUMA]; + const FfxFloat32 fPrevLogLuma = frameInfo[FRAME_INFO_LOG_LUMA]; + FfxFloat32 fLogLuma = outValue[LOG_LUMA]; + + if (fPrevLogLuma < resetAutoExposureAverageSmoothing) // Compare Lavg, so small or negative values + { + fLogLuma = fPrevLogLuma + (fLogLuma - fPrevLogLuma) * (1.0f - exp(-DeltaTime())); + fLogLuma = ffxMax(0.0f, fLogLuma); + } + + frameInfo[FRAME_INFO_EXPOSURE] = ComputeAutoExposureFromLavg(fLogLuma); + frameInfo[FRAME_INFO_LOG_LUMA] = fLogLuma; + frameInfo[FRAME_INFO_SCENE_AVERAGE_LUMA] = fSceneAvgLuma; + + StoreFrameInfo(frameInfo); + } + } +} + +FfxFloat32x4 SpdLoadIntermediate(FfxUInt32 x, FfxUInt32 y) +{ + return FfxFloat32x4( + spdIntermediateR[x][y], + spdIntermediateG[x][y], + spdIntermediateB[x][y], + spdIntermediateA[x][y]); +} +void SpdStoreIntermediate(FfxUInt32 x, FfxUInt32 y, FfxFloat32x4 value) +{ + spdIntermediateR[x][y] = value.x; + spdIntermediateG[x][y] = value.y; + spdIntermediateB[x][y] = value.z; + spdIntermediateA[x][y] = value.w; +} + +#endif + +// define fetch and store functions Packed +#if FFX_HALF + +FFX_GROUPSHARED FfxFloat16x2 spdIntermediateRG[16][16]; +FFX_GROUPSHARED FfxFloat16x2 spdIntermediateBA[16][16]; + +FfxFloat16x4 SpdLoadSourceImageH(FfxFloat32x2 tex, FfxUInt32 slice) +{ + return FfxFloat16x4(0, 0, 0, 0); +} + +FfxFloat16x4 SpdLoadH(FfxInt32x2 p, FfxUInt32 slice) +{ + return FfxFloat16x4(0, 0, 0, 0); +} + +void SpdStoreH(FfxInt32x2 p, FfxFloat16x4 value, FfxUInt32 mip, FfxUInt32 slice) +{ +} + +FfxFloat16x4 SpdLoadIntermediateH(FfxUInt32 x, FfxUInt32 y) +{ + return FfxFloat16x4( + spdIntermediateRG[x][y].x, + spdIntermediateRG[x][y].y, + spdIntermediateBA[x][y].x, + spdIntermediateBA[x][y].y); +} + +void SpdStoreIntermediateH(FfxUInt32 x, FfxUInt32 y, FfxFloat16x4 value) +{ + spdIntermediateRG[x][y] = value.xy; + spdIntermediateBA[x][y] = value.zw; +} + +FfxFloat16x4 SpdReduce4H(FfxFloat16x4 v0, FfxFloat16x4 v1, FfxFloat16x4 v2, FfxFloat16x4 v3) +{ + return (v0 + v1 + v2 + v3) * FfxFloat16(0.25); +} +#endif + +#include "../spd/ffx_spd.h" + +void ComputeAutoExposure(FfxUInt32x3 WorkGroupId, FfxUInt32 LocalThreadIndex) +{ +#if FFX_HALF + SpdDownsampleH( + FfxUInt32x2(WorkGroupId.xy), + FfxUInt32(LocalThreadIndex), + FfxUInt32(MipCount()), + FfxUInt32(NumWorkGroups()), + FfxUInt32(WorkGroupId.z), + FfxUInt32x2(WorkGroupOffset())); +#else + SpdDownsample( + FfxUInt32x2(WorkGroupId.xy), + FfxUInt32(LocalThreadIndex), + FfxUInt32(MipCount()), + FfxUInt32(NumWorkGroups()), + FfxUInt32(WorkGroupId.z), + FfxUInt32x2(WorkGroupOffset())); +#endif +} diff --git a/thirdparty/amd-ffx/gpu/fsr3upscaler/ffx_fsr3upscaler_prepare_inputs.h b/thirdparty/amd-ffx/gpu/fsr3upscaler/ffx_fsr3upscaler_prepare_inputs.h new file mode 100644 index 000000000000..59c765b52722 --- /dev/null +++ b/thirdparty/amd-ffx/gpu/fsr3upscaler/ffx_fsr3upscaler_prepare_inputs.h @@ -0,0 +1,152 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +void ReconstructPrevDepth(FfxInt32x2 iPxPos, FfxFloat32 fDepth, FfxFloat32x2 fMotionVector) +{ + const FfxFloat32 fNearestDepthInMeters = ffxMin(GetViewSpaceDepthInMeters(fDepth), FSR3UPSCALER_FP16_MAX); + const FfxFloat32 fReconstructedDeptMvThreshold = ReconstructedDepthMvPxThreshold(fNearestDepthInMeters); + + // Discard small mvs + fMotionVector *= FfxFloat32(Get4KVelocity(fMotionVector) > fReconstructedDeptMvThreshold); + + const FfxFloat32x2 fUv = (iPxPos + FfxFloat32(0.5)) / RenderSize(); + const FfxFloat32x2 fReprojectedUv = fUv + fMotionVector; + const BilinearSamplingData bilinearInfo = GetBilinearSamplingData(fReprojectedUv, RenderSize()); + + // Project current depth into previous frame locations. + // Push to all pixels having some contribution if reprojection is using bilinear logic. + for (FfxInt32 iSampleIndex = 0; iSampleIndex < 4; iSampleIndex++) { + + const FfxInt32x2 iOffset = bilinearInfo.iOffsets[iSampleIndex]; + const FfxFloat32 fWeight = bilinearInfo.fWeights[iSampleIndex]; + + if (fWeight > fReconstructedDepthBilinearWeightThreshold) { + + const FfxInt32x2 iStorePos = bilinearInfo.iBasePos + iOffset; + if (IsOnScreen(iStorePos, RenderSize())) { + StoreReconstructedDepth(iStorePos, fDepth); + } + } + } +} + +struct DepthExtents +{ + FfxFloat32 fNearest; + FfxInt32x2 fNearestCoord; + FfxFloat32 fFarthest; +}; + +DepthExtents FindDepthExtents(FFX_PARAMETER_IN FfxInt32x2 iPxPos) +{ + DepthExtents extents; + const FfxInt32 iSampleCount = 9; + const FfxInt32x2 iSampleOffsets[iSampleCount] = { + FfxInt32x2(+0, +0), + FfxInt32x2(+1, +0), + FfxInt32x2(+0, +1), + FfxInt32x2(+0, -1), + FfxInt32x2(-1, +0), + FfxInt32x2(-1, +1), + FfxInt32x2(+1, +1), + FfxInt32x2(-1, -1), + FfxInt32x2(+1, -1), + }; + + // pull out the depth loads to allow SC to batch them + FfxFloat32 depth[9]; + FfxInt32 iSampleIndex = 0; + FFX_UNROLL + for (iSampleIndex = 0; iSampleIndex < iSampleCount; ++iSampleIndex) { + + FfxInt32x2 iPos = iPxPos + iSampleOffsets[iSampleIndex]; + depth[iSampleIndex] = LoadInputDepth(iPos); + } + + // find closest depth + extents.fNearestCoord = iPxPos; + extents.fNearest = depth[0]; + extents.fFarthest = depth[0]; + FFX_UNROLL + for (iSampleIndex = 1; iSampleIndex < iSampleCount; ++iSampleIndex) { + + const FfxInt32x2 iPos = iPxPos + iSampleOffsets[iSampleIndex]; + if (IsOnScreen(iPos, RenderSize())) { + + FfxFloat32 fNdDepth = depth[iSampleIndex]; +#if FFX_FSR3UPSCALER_OPTION_INVERTED_DEPTH + if (fNdDepth > extents.fNearest) { + extents.fFarthest = ffxMin(extents.fFarthest, fNdDepth); +#else + if (fNdDepth < extents.fNearest) { + extents.fFarthest = ffxMax(extents.fFarthest, fNdDepth); +#endif + extents.fNearestCoord = iPos; + extents.fNearest = fNdDepth; + } + } + } + + return extents; +} + +FfxFloat32x2 DilateMotionVector(FfxInt32x2 iPxPos, const DepthExtents depthExtents) +{ +#if FFX_FSR3UPSCALER_OPTION_LOW_RESOLUTION_MOTION_VECTORS + const FfxInt32x2 iSamplePos = iPxPos; + const FfxInt32x2 iMotionVectorPos = depthExtents.fNearestCoord; +#else + const FfxInt32x2 iSamplePos = ComputeHrPosFromLrPos(iPxPos); + const FfxInt32x2 iMotionVectorPos = ComputeHrPosFromLrPos(depthExtents.fNearestCoord); +#endif + + const FfxFloat32x2 fDilatedMotionVector = LoadInputMotionVector(iMotionVectorPos); + + return fDilatedMotionVector; +} + +FfxFloat32 GetCurrentFrameLuma(FfxInt32x2 iPxPos) +{ + //We assume linear data. if non-linear input (sRGB, ...), + //then we should convert to linear first and back to sRGB on output. + const FfxFloat32x3 fRgb = ffxMax(FfxFloat32x3(0, 0, 0), LoadInputColor(iPxPos)); + const FfxFloat32 fLuma = RGBToLuma(fRgb); + + return fLuma; +} + +void PrepareInputs(FfxInt32x2 iPxPos) +{ + const DepthExtents depthExtents = FindDepthExtents(iPxPos); + const FfxFloat32x2 fDilatedMotionVector = DilateMotionVector(iPxPos, depthExtents); + + ReconstructPrevDepth(iPxPos, depthExtents.fNearest, fDilatedMotionVector); + + StoreDilatedMotionVector(iPxPos, fDilatedMotionVector); + StoreDilatedDepth(iPxPos, depthExtents.fNearest); + + const FfxFloat32 fFarthestDepthInMeters = ffxMin(GetViewSpaceDepthInMeters(depthExtents.fFarthest), FSR3UPSCALER_FP16_MAX); + StoreFarthestDepth(iPxPos, fFarthestDepthInMeters); + + const FfxFloat32 fLuma = GetCurrentFrameLuma(iPxPos); + StoreCurrentLuma(iPxPos, fLuma); +} diff --git a/thirdparty/amd-ffx/gpu/fsr3upscaler/ffx_fsr3upscaler_prepare_reactivity.h b/thirdparty/amd-ffx/gpu/fsr3upscaler/ffx_fsr3upscaler_prepare_reactivity.h new file mode 100644 index 000000000000..ae24545c8005 --- /dev/null +++ b/thirdparty/amd-ffx/gpu/fsr3upscaler/ffx_fsr3upscaler_prepare_reactivity.h @@ -0,0 +1,283 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +FfxFloat32 ComputeDisocclusions(FfxFloat32x2 fUv, FfxFloat32x2 fMotionVector, FfxFloat32 fCurrentDepthViewSpace) +{ + const FfxFloat32 fNearestDepthInMeters = ffxMin(fCurrentDepthViewSpace * ViewSpaceToMetersFactor(), FSR3UPSCALER_FP16_MAX); + const FfxFloat32 fReconstructedDeptMvThreshold = ReconstructedDepthMvPxThreshold(fNearestDepthInMeters); + + fMotionVector *= FfxFloat32(Get4KVelocity(fMotionVector) > fReconstructedDeptMvThreshold); + + const FfxFloat32x2 fReprojectedUv = fUv + fMotionVector; + const BilinearSamplingData bilinearInfo = GetBilinearSamplingData(fReprojectedUv, RenderSize()); + + FfxFloat32 fDisocclusion = 0.0f; + FfxFloat32 fWeightSum = 0.0f; + FfxBoolean bPotentialDisocclusion = true; + + for (FfxInt32 iSampleIndex = 0; iSampleIndex < 4 && bPotentialDisocclusion; iSampleIndex++) + { + + const FfxInt32x2 iOffset = bilinearInfo.iOffsets[iSampleIndex]; + const FfxInt32x2 iSamplePos = ClampLoad(bilinearInfo.iBasePos, iOffset, FfxInt32x2(RenderSize())); + + if (IsOnScreen(iSamplePos, RenderSize())) { + const FfxFloat32 fWeight = bilinearInfo.fWeights[iSampleIndex]; + if (fWeight > fReconstructedDepthBilinearWeightThreshold) { + + const FfxFloat32 fPrevNearestDepthViewSpace = GetViewSpaceDepth(LoadReconstructedPrevDepth(iSamplePos)); + const FfxFloat32 fDepthDifference = fCurrentDepthViewSpace - fPrevNearestDepthViewSpace; + + bPotentialDisocclusion = bPotentialDisocclusion && (fDepthDifference > FSR3UPSCALER_FP32_MIN); + + if (bPotentialDisocclusion) { + const FfxFloat32 fHalfViewportWidth = length(FfxFloat32x2(RenderSize()) * 0.5f); + const FfxFloat32 fDepthThreshold = ffxMax(fCurrentDepthViewSpace, fPrevNearestDepthViewSpace); + + const FfxFloat32 Ksep = 1.37e-05f; + const FfxFloat32 fRequiredDepthSeparation = Ksep * fHalfViewportWidth * fDepthThreshold; + + fDisocclusion += ffxSaturate(FfxFloat32(fRequiredDepthSeparation / fDepthDifference)) * fWeight; + fWeightSum += fWeight; + } + } + } + } + + fDisocclusion = (bPotentialDisocclusion && fWeightSum > 0) ? ffxSaturate(1.0f - fDisocclusion / fWeightSum) : 0.0f; + + return fDisocclusion; +} + +FfxFloat32 ComputeMotionDivergence(FfxFloat32x2 fUv, FfxFloat32x2 fMotionVector, FfxFloat32 fCurrentDepthSample) +{ + const FfxInt32x2 iPxReprojectedPos = FfxInt32x2((fUv + fMotionVector) * RenderSize()); + const FfxFloat32 fReprojectedDepth = LoadDilatedDepth(iPxReprojectedPos); + const FfxFloat32x2 fReprojectedMotionVector = LoadDilatedMotionVector(iPxReprojectedPos); + + const FfxFloat32 fReprojectedVelocity = Get4KVelocity(fReprojectedMotionVector); + const FfxFloat32 f4KVelocity = Get4KVelocity(fMotionVector); + + const FfxFloat32 fMaxLen = max(length(fMotionVector), length(fReprojectedMotionVector)); + + const FfxFloat32 fNucleusDepthInMeters = GetViewSpaceDepthInMeters(fReprojectedDepth); + const FfxFloat32 fCurrentDepthInMeters = GetViewSpaceDepthInMeters(fCurrentDepthSample); + + const FfxFloat32 fDistanceFactor = MinDividedByMax(fNucleusDepthInMeters, fCurrentDepthInMeters); + const FfxFloat32 fVelocityFactor = ffxSaturate(f4KVelocity / 10.0f); + const FfxFloat32 fMotionVectorFieldConfidence = (1.0f - ffxSaturate(fReprojectedVelocity / f4KVelocity)) * fDistanceFactor * fVelocityFactor; + + return fMotionVectorFieldConfidence; +} + +FfxFloat32 DilateReactiveMasks(FfxInt32x2 iPxPos, FfxFloat32x2 fUv) +{ + FfxFloat32 fDilatedReactiveMasks = 0.0f; + + FFX_UNROLL + for (FfxInt32 y = -1; y <=1; y++) + { + FFX_UNROLL + for (FfxInt32 x = -1; x <= 1; x++) + { + const FfxInt32x2 sampleCoord = ClampLoad(iPxPos, FfxInt32x2(x, y), FfxInt32x2(RenderSize())); + fDilatedReactiveMasks = ffxMax(fDilatedReactiveMasks, LoadReactiveMask(sampleCoord)); + } + } + + return fDilatedReactiveMasks; +} + +FfxFloat32 DilateTransparencyAndCompositionMasks(FfxInt32x2 iPxPos, FfxFloat32x2 fUv) +{ + const FfxFloat32x2 fUvTransparencyAndCompositionMask = ClampUv(fUv, RenderSize(), GetTransparencyAndCompositionMaskResourceDimensions()); + return SampleTransparencyAndCompositionMask(fUvTransparencyAndCompositionMask); +} + +FfxFloat32 ComputeThinFeatureConfidence(FfxInt32x2 iPxPos) +{ + /* + 1 2 3 + 4 0 5 + 6 7 8 + */ + + const FfxInt32 iNucleusIndex = 0; + const FfxInt32 iSampleCount = 9; + const FfxInt32x2 iSampleOffsets[iSampleCount] = { + FfxInt32x2(+0, +0), + FfxInt32x2(-1, -1), + FfxInt32x2(+0, -1), + FfxInt32x2(+1, -1), + FfxInt32x2(-1, +0), + FfxInt32x2(+1, +0), + FfxInt32x2(-1, +1), + FfxInt32x2(+0, +1), + FfxInt32x2(+1, +1), + }; + + FfxFloat32 fSamples[iSampleCount]; + + FfxFloat32 fLumaMin = FSR3UPSCALER_FP32_MAX; + FfxFloat32 fLumaMax = FSR3UPSCALER_FP32_MIN; + + FFX_UNROLL + for (FfxInt32 iSampleIndex = 0; iSampleIndex < iSampleCount; ++iSampleIndex) { + const FfxInt32x2 iPxSamplePos = ClampLoad(iPxPos, iSampleOffsets[iSampleIndex], FfxInt32x2(RenderSize())); + fSamples[iSampleIndex] = LoadCurrentLuma(iPxSamplePos) * Exposure(); + + fLumaMin = ffxMin(fLumaMin, fSamples[iSampleIndex]); + fLumaMax = ffxMax(fLumaMax, fSamples[iSampleIndex]); + } + + const FfxFloat32 fThreshold = 0.9f; + FfxFloat32 fDissimilarLumaMin = FSR3UPSCALER_FP32_MAX; + FfxFloat32 fDissimilarLumaMax = 0; + +#define SETBIT(x) (1U << x) + + FfxUInt32 uPatternMask = SETBIT(iNucleusIndex); // Flag nucleus as similar + + const FfxUInt32 uNumRejectionMasks = 4; + const FfxUInt32 uRejectionMasks[uNumRejectionMasks] = { + SETBIT(1) | SETBIT(2) | SETBIT(4) | SETBIT(iNucleusIndex), // Upper left + SETBIT(2) | SETBIT(3) | SETBIT(5) | SETBIT(iNucleusIndex), // Upper right + SETBIT(4) | SETBIT(6) | SETBIT(7) | SETBIT(iNucleusIndex), // Lower left + SETBIT(5) | SETBIT(7) | SETBIT(8) | SETBIT(iNucleusIndex) // Lower right + }; + + FfxInt32 iBitIndex = 1; + FFX_UNROLL + for (FfxInt32 iSampleIndex = 1; iSampleIndex < iSampleCount; ++iSampleIndex, ++iBitIndex) { + + const FfxFloat32 fDifference = abs(fSamples[iSampleIndex] - fSamples[iNucleusIndex]) / (fLumaMax - fLumaMin); + + if (fDifference < fThreshold) + { + uPatternMask |= SETBIT(iBitIndex); + } + else + { + fDissimilarLumaMin = ffxMin(fDissimilarLumaMin, fSamples[iSampleIndex]); + fDissimilarLumaMax = ffxMax(fDissimilarLumaMax, fSamples[iSampleIndex]); + } + } + + const FfxBoolean bIsRidge = fSamples[iNucleusIndex] > fDissimilarLumaMax || fSamples[iNucleusIndex] < fDissimilarLumaMin; + + if (FFX_FALSE == bIsRidge) + { + return 0.0f; + } + + FFX_UNROLL + for (FfxInt32 i = 0; i < uNumRejectionMasks; i++) + { + if ((uPatternMask & uRejectionMasks[i]) == uRejectionMasks[i]) + { + return 0.0f; + } + } + + return 1.0f - fLumaMin / fLumaMax; +} + +FfxFloat32 UpdateAccumulation(FfxInt32x2 iPxPos, FfxFloat32x2 fUv, FfxFloat32x2 fMotionVector, FfxFloat32 fDisocclusion, FfxFloat32 fShadingChange) +{ + const FfxFloat32x2 fReprojectedUv = fUv + fMotionVector; + FfxFloat32 fAccumulation = 0.0f; + + if (IsUvInside(fReprojectedUv)) { + const FfxFloat32x2 fReprojectedUv_HW = ClampUv(fReprojectedUv, PreviousFrameRenderSize(), MaxRenderSize()); + fAccumulation = ffxSaturate(SampleAccumulation(fReprojectedUv_HW)); + } + const FfxFloat32 fAccumulationAddedPerFrame= AccumulationAddedPerFrame(); //default is 0.333 + + // Assume at frame N+0 fShadingChange is 1.0, and all subsequent frames fShadingChange is 0.0 and fDisocclusion is 0.0. Then, + // frame N+0 fAccumulation will be 0.000 + // frame N+2 fAccumulation will be 0.000 + 0.333 * 1 == 0.333 + // frame N+3 fAccumulation will be 0.000 + 0.333 * 2 == 0.666 + // frame N+4 fAccumulation will be 0.000 + 0.333 * 3 == 0.999 + fAccumulation = ffxLerp(fAccumulation, 0.0f, fShadingChange); + + const FfxFloat32 fMinDisocclusionAccumulation = MinDisocclusionAccumulation(); //default is -0.333 + // Assume at frame N+0 fDisocclusion is 1.0, and all subsequent frames fShadingChange is 0.0 and fDisocclusion is 0.0. Then, + // frame N+0 fAccumulation will be -0.333f (but normalized to store in unorm) + // frame N+1 fAccumulation will be -0.333f + 0.333 * 1 == 0.000 + // frame N+2 fAccumulation will be -0.333f + 0.333 * 2 == 0.333 + // frame N+3 fAccumulation will be -0.333f + 0.333 * 3 == 0.666 + // frame N+4 fAccumulation will be -0.333f + 0.333 * 4 == 0.999 + fAccumulation = ffxLerp(fAccumulation, ffxMin(fMinDisocclusionAccumulation, fAccumulation), fDisocclusion); + + fAccumulation *= FfxFloat32(round(fAccumulation * 100.0f) > 1.0f); + + // Update for next frame, normalize to store in unorm + const FfxFloat32 fAccumulatedFramesToStore = ffxSaturate(fAccumulation + fAccumulationAddedPerFrame); + StoreAccumulation(iPxPos, fAccumulatedFramesToStore); + + return fAccumulation; +} + +FfxFloat32 ComputeShadingChange(FfxFloat32x2 fUv) +{ + // NOTE: Here we re-apply jitter, will be reverted again when sampled in accumulation pass + const FfxFloat32x2 fShadingChangeUv = ClampUv(fUv - Jitter() / RenderSize(), ShadingChangeRenderSize(), ShadingChangeMaxRenderSize()); + const FfxFloat32 fShadingChange = ffxSaturate(SampleShadingChange(fShadingChangeUv)); + + return fShadingChange; +} + +void PrepareReactivity(FfxInt32x2 iPxPos) +{ + const FfxFloat32x2 fUv = (iPxPos + 0.5f) / RenderSize(); + const FfxFloat32x2 fMotionVector = LoadDilatedMotionVector(iPxPos); + + // Discard small mvs + const FfxFloat32 f4KVelocity = Get4KVelocity(fMotionVector); + + const FfxFloat32x2 fDilatedUv = fUv + fMotionVector; + const FfxFloat32 fDilatedDepth = LoadDilatedDepth(iPxPos); + const FfxFloat32 fDepthInMeters = GetViewSpaceDepthInMeters(fDilatedDepth); + + const FfxFloat32 fDisocclusion = ComputeDisocclusions(fUv, fMotionVector, GetViewSpaceDepth(fDilatedDepth)); + const FfxFloat32 fShadingChange = ffxMax(DilateReactiveMasks(iPxPos, fUv), ComputeShadingChange(fUv)); + + const FfxFloat32 fMotionDivergence = ComputeMotionDivergence(fUv, fMotionVector, fDilatedDepth); + const FfxFloat32 fDilatedTransparencyAndComposition = DilateTransparencyAndCompositionMasks(iPxPos, fUv); + const FfxFloat32 fFinalReactiveness = ffxMax(fMotionDivergence, fDilatedTransparencyAndComposition); + + const FfxFloat32 fAccumulation = UpdateAccumulation(iPxPos, fUv, fMotionVector, fDisocclusion, fShadingChange); + + FfxFloat32x4 fOutput; + fOutput[REACTIVE] = fFinalReactiveness; + fOutput[DISOCCLUSION] = fDisocclusion; + fOutput[SHADING_CHANGE] = fShadingChange; + fOutput[ACCUMULAION] = fAccumulation; + + StoreDilatedReactiveMasks(iPxPos, fOutput); + + const FfxFloat32 fLockStrength = ComputeThinFeatureConfidence(iPxPos); + if (fLockStrength > (1.0f / 100.0f)) + { + StoreNewLocks(ComputeHrPosFromLrPos(FfxInt32x2(iPxPos)), fLockStrength); + } +} diff --git a/thirdparty/amd-ffx/gpu/fsr3upscaler/ffx_fsr3upscaler_rcas.h b/thirdparty/amd-ffx/gpu/fsr3upscaler/ffx_fsr3upscaler_rcas.h new file mode 100644 index 000000000000..1639815219de --- /dev/null +++ b/thirdparty/amd-ffx/gpu/fsr3upscaler/ffx_fsr3upscaler_rcas.h @@ -0,0 +1,124 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#define GROUP_SIZE 8 +#define FSR_RCAS_DENOISE 1 + +#include "../ffx_core.h" + +#if FFX_HALF && defined(__XBOX_SCARLETT) && defined(__XBATG_EXTRA_16_BIT_OPTIMISATION) && (__XBATG_EXTRA_16_BIT_OPTIMISATION == 1) + #define FSR_RCAS_PREFER_PAIRED_VERSION 1 +#else + #define FSR_RCAS_PREFER_PAIRED_VERSION 0 +#endif + +void WriteUpscaledOutput(FFX_MIN16_U2 iPxHrPos, FfxFloat32x3 fUpscaledColor) +{ + StoreUpscaledOutput(FFX_MIN16_I2(iPxHrPos), fUpscaledColor); +} + +#if FSR_RCAS_PREFER_PAIRED_VERSION + + #define FSR_RCAS_HX2 1 + FfxFloat16x4 FsrRcasLoadHx2(FfxInt16x2 p) + { + return FfxFloat16x4(LoadRCAS_Input(p)); + } + void FsrRcasInputHx2(inout FfxFloat16x2 r, inout FfxFloat16x2 g, inout FfxFloat16x2 b) + { + FfxFloat32 e = Exposure(); + r = FfxFloat16x2(r * e); + g = FfxFloat16x2(g * e); + b = FfxFloat16x2(b * e); + } + +#else + + #define FSR_RCAS_F 1 + FfxFloat32x4 FsrRcasLoadF(FfxInt32x2 p) + { + FfxFloat32x4 fColor = LoadRCAS_Input(p); + + fColor.rgb *= Exposure(); + + return fColor; + } + void FsrRcasInputF(inout FfxFloat32 r, inout FfxFloat32 g, inout FfxFloat32 b) {} + +#endif // #if FSR_RCAS_PREFER_PAIRED_VERSION + +// GODOT BEGINS +// Workaround for Godot GLSL processor not supporting conditional include. +// Thus we have to take the include statement out of the conditional block. + +#include "../fsr1/ffx_fsr1.h" + +#if FSR_RCAS_PREFER_PAIRED_VERSION + + void CurrFilterPaired(FFX_MIN16_U2 pos) + { + FfxFloat16x2 cr; + FfxFloat16x2 cg; + FfxFloat16x2 cb; + FsrRcasHx2(cr, cg, cb, pos, RCASConfig()); + FfxFloat32 InvExposure = 1.0f / Exposure(); + cr = FfxFloat16x2(cr * InvExposure); + cg = FfxFloat16x2(cg * InvExposure); + cb = FfxFloat16x2(cb * InvExposure); + WriteUpscaledOutput(pos, FfxFloat16x3(cr.x, cg.x, cb.x)); //TODO: fix type + pos.x += 8; + WriteUpscaledOutput(pos, FfxFloat16x3(cr.y, cg.y, cb.y)); //TODO: fix type + } + +#else + + void CurrFilter(FFX_MIN16_U2 pos) + { + FfxFloat32x3 c; + FsrRcasF(c.r, c.g, c.b, pos, RCASConfig()); + + c /= Exposure(); + + WriteUpscaledOutput(pos, c); + } + +#endif // #if FSR_RCAS_PREFER_PAIRED_VERSION +// GODOT ENDS + +void RCAS(FfxUInt32x3 LocalThreadId, FfxUInt32x3 WorkGroupId, FfxUInt32x3 Dtid) +{ + // Do remapping of local xy in workgroup for a more PS-like swizzle pattern. + FfxUInt32x2 gxy = ffxRemapForQuad(LocalThreadId.x) + FfxUInt32x2(WorkGroupId.x << 4u, WorkGroupId.y << 4u); +#if FSR_RCAS_PREFER_PAIRED_VERSION + CurrFilterPaired(FFX_MIN16_U2(gxy)); + gxy.y += 8u; + CurrFilterPaired(FFX_MIN16_U2(gxy)); +#else + CurrFilter(FFX_MIN16_U2(gxy)); + gxy.x += 8u; + CurrFilter(FFX_MIN16_U2(gxy)); + gxy.y += 8u; + CurrFilter(FFX_MIN16_U2(gxy)); + gxy.x -= 8u; + CurrFilter(FFX_MIN16_U2(gxy)); +#endif +} diff --git a/thirdparty/amd-ffx/gpu/fsr3upscaler/ffx_fsr3upscaler_reproject.h b/thirdparty/amd-ffx/gpu/fsr3upscaler/ffx_fsr3upscaler_reproject.h new file mode 100644 index 000000000000..c72f40fb6fd4 --- /dev/null +++ b/thirdparty/amd-ffx/gpu/fsr3upscaler/ffx_fsr3upscaler_reproject.h @@ -0,0 +1,79 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifndef FFX_FSR3UPSCALER_OPTION_REPROJECT_USE_LANCZOS_TYPE +#define FFX_FSR3UPSCALER_OPTION_REPROJECT_USE_LANCZOS_TYPE 0 // Reference +#endif + +FfxFloat32x4 WrapHistory(FfxInt32x2 iPxSample) +{ + return LoadHistory(iPxSample); +} + +DeclareCustomFetchBicubicSamples(FetchHistorySamples, WrapHistory) +DeclareCustomTextureSample(HistorySample, FFX_FSR3UPSCALER_GET_LANCZOS_SAMPLER1D(FFX_FSR3UPSCALER_OPTION_REPROJECT_USE_LANCZOS_TYPE), FetchHistorySamples) + +#if FFX_HALF +FFX_MIN16_F4 WrapHistory16(FfxInt32x2 iPxSample) +{ + return FFX_MIN16_F4(LoadHistory(iPxSample)); +} + +DeclareCustomFetchBicubicSamplesMin16(FetchHistorySamples16, WrapHistory16) +DeclareCustomTextureSampleMin16(HistorySample16, FFX_FSR3UPSCALER_GET_LANCZOS_SAMPLER1D(FFX_FSR3UPSCALER_OPTION_REPROJECT_USE_LANCZOS_TYPE), FetchHistorySamples16) +#endif + +FfxFloat32x2 GetMotionVector(FfxInt32x2 iPxHrPos, FfxFloat32x2 fHrUv) +{ +#if FFX_FSR3UPSCALER_OPTION_LOW_RESOLUTION_MOTION_VECTORS + const FfxFloat32x2 fDilatedMotionVector = LoadDilatedMotionVector(FFX_MIN16_I2(fHrUv * RenderSize())); +#else + const FfxFloat32x2 fDilatedMotionVector = LoadInputMotionVector(iPxHrPos); +#endif + + return fDilatedMotionVector; +} + +void ComputeReprojectedUVs(FFX_PARAMETER_INOUT AccumulationPassCommonParams params) +{ + params.fReprojectedHrUv = params.fHrUv + params.fMotionVector; + + params.bIsExistingSample = IsUvInside(params.fReprojectedHrUv); +} + +void ReprojectHistoryColor(const AccumulationPassCommonParams params, FFX_PARAMETER_INOUT AccumulationPassData data) + +{ +#if FFX_HALF && FFX_FSR3UPSCALER_OPTION_REPROJECT_SAMPLERS_USE_DATA_HALF + const FfxFloat32x4 fReprojectedHistory = FfxFloat32x4(HistorySample16(params.fReprojectedHrUv, UpscaleSize())); +#else + const FfxFloat32x4 fReprojectedHistory = HistorySample(params.fReprojectedHrUv, PreviousFrameUpscaleSize()); +#endif + + data.fHistoryColor = fReprojectedHistory.rgb; + data.fHistoryColor *= DeltaPreExposure(); + data.fHistoryColor *= Exposure(); + + data.fHistoryColor = RGBToYCoCg(data.fHistoryColor); + + data.fLock = fReprojectedHistory.w; +} diff --git a/thirdparty/amd-ffx/gpu/fsr3upscaler/ffx_fsr3upscaler_resources.h b/thirdparty/amd-ffx/gpu/fsr3upscaler/ffx_fsr3upscaler_resources.h new file mode 100644 index 000000000000..b3d8ddb17aea --- /dev/null +++ b/thirdparty/amd-ffx/gpu/fsr3upscaler/ffx_fsr3upscaler_resources.h @@ -0,0 +1,100 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifndef FFX_FSR3UPSCALER_RESOURCES_H +#define FFX_FSR3UPSCALER_RESOURCES_H + +#if defined(FFX_CPU) || defined(FFX_GPU) +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_NULL 0 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_OPAQUE_ONLY 1 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_COLOR 2 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_MOTION_VECTORS 3 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_DEPTH 4 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_EXPOSURE 5 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK 6 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INPUT_TRANSPARENCY_AND_COMPOSITION_MASK 7 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH 8 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS 9 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_DILATED_DEPTH 10 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR 11 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_ACCUMULATION 12 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_NEW_LOCKS 13 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_HISTORY 14 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_DEBUG_OUTPUT 15 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LANCZOS_LUT 16 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SPD_ATOMIC_COUNT 17 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT 18 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_RCAS_INPUT 19 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_ACCUMULATION_1 20 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_ACCUMULATION_2 21 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_1 22 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_2 23 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_REACTIVITY 24 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_TRANSPARENCY_AND_COMPOSITION 25 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS 26 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SPD_MIPS 27 // same as FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SPD_MIPS_LEVEL_0 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SPD_MIPS_LEVEL_0 27 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SPD_MIPS_LEVEL_1 28 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SPD_MIPS_LEVEL_2 29 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SPD_MIPS_LEVEL_3 30 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SPD_MIPS_LEVEL_4 31 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SPD_MIPS_LEVEL_5 32 + +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_EXPOSURE 33 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_FRAME_INFO 34 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_AUTOREACTIVE 35 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_AUTOCOMPOSITION_DEPRECATED 36 + +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_HISTORY_1 37 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_HISTORY_2 38 + +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_1 40 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_2 41 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SHADING_CHANGE 42 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_FARTHEST_DEPTH 43 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_FARTHEST_DEPTH_MIP1 44 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_CURRENT_LUMA 45 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_PREVIOUS_LUMA 46 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_LUMA_INSTABILITY 48 +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_INTERMEDIATE_FP16x1 49 + + +// Shading change detection mip level setting, value must be in the range [FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_0, FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_12] +//#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_SHADING_CHANGE FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_4 +//#define FFX_FSR3UPSCALER_SHADING_CHANGE_MIP_LEVEL (FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_SHADING_CHANGE - FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_SCENE_LUMINANCE) + +#define FFX_FSR3UPSCALER_RESOURCE_IDENTIFIER_COUNT 60 + +#define FFX_FSR3UPSCALER_CONSTANTBUFFER_IDENTIFIER_FSR3UPSCALER 0 +#define FFX_FSR3UPSCALER_CONSTANTBUFFER_IDENTIFIER_SPD 1 +#define FFX_FSR3UPSCALER_CONSTANTBUFFER_IDENTIFIER_RCAS 2 +#define FFX_FSR3UPSCALER_CONSTANTBUFFER_IDENTIFIER_GENREACTIVE 3 +#define FFX_FSR3UPSCALER_CONSTANTBUFFER_COUNT 4 + +#define FFX_FSR3UPSCALER_AUTOREACTIVEFLAGS_APPLY_TONEMAP 1 +#define FFX_FSR3UPSCALER_AUTOREACTIVEFLAGS_APPLY_INVERSETONEMAP 2 +#define FFX_FSR3UPSCALER_AUTOREACTIVEFLAGS_APPLY_THRESHOLD 4 +#define FFX_FSR3UPSCALER_AUTOREACTIVEFLAGS_USE_COMPONENTS_MAX 8 + +#endif // #if defined(FFX_CPU) || defined(FFX_GPU) + +#endif //!defined( FFX_FSR3UPSCALER_RESOURCES_H ) diff --git a/thirdparty/amd-ffx/gpu/fsr3upscaler/ffx_fsr3upscaler_sample.h b/thirdparty/amd-ffx/gpu/fsr3upscaler/ffx_fsr3upscaler_sample.h new file mode 100644 index 000000000000..7a723d55628b --- /dev/null +++ b/thirdparty/amd-ffx/gpu/fsr3upscaler/ffx_fsr3upscaler_sample.h @@ -0,0 +1,628 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifndef FFX_FSR3UPSCALER_SAMPLE_H +#define FFX_FSR3UPSCALER_SAMPLE_H + +// suppress warnings +#ifdef FFX_HLSL +#pragma warning(disable: 4008) // potentially divide by zero +#endif //FFX_HLSL + +struct FetchedBilinearSamples { + + FfxFloat32x4 fColor00; + FfxFloat32x4 fColor10; + + FfxFloat32x4 fColor01; + FfxFloat32x4 fColor11; +}; + +struct FetchedBicubicSamples { + + FfxFloat32x4 fColor00; + FfxFloat32x4 fColor10; + FfxFloat32x4 fColor20; + FfxFloat32x4 fColor30; + + FfxFloat32x4 fColor01; + FfxFloat32x4 fColor11; + FfxFloat32x4 fColor21; + FfxFloat32x4 fColor31; + + FfxFloat32x4 fColor02; + FfxFloat32x4 fColor12; + FfxFloat32x4 fColor22; + FfxFloat32x4 fColor32; + + FfxFloat32x4 fColor03; + FfxFloat32x4 fColor13; + FfxFloat32x4 fColor23; + FfxFloat32x4 fColor33; +}; + +#if FFX_HALF +struct FetchedBilinearSamplesMin16 { + + FFX_MIN16_F4 fColor00; + FFX_MIN16_F4 fColor10; + + FFX_MIN16_F4 fColor01; + FFX_MIN16_F4 fColor11; +}; + +struct FetchedBicubicSamplesMin16 { + + FFX_MIN16_F4 fColor00; + FFX_MIN16_F4 fColor10; + FFX_MIN16_F4 fColor20; + FFX_MIN16_F4 fColor30; + + FFX_MIN16_F4 fColor01; + FFX_MIN16_F4 fColor11; + FFX_MIN16_F4 fColor21; + FFX_MIN16_F4 fColor31; + + FFX_MIN16_F4 fColor02; + FFX_MIN16_F4 fColor12; + FFX_MIN16_F4 fColor22; + FFX_MIN16_F4 fColor32; + + FFX_MIN16_F4 fColor03; + FFX_MIN16_F4 fColor13; + FFX_MIN16_F4 fColor23; + FFX_MIN16_F4 fColor33; +}; +#else //FFX_HALF +#define FetchedBicubicSamplesMin16 FetchedBicubicSamples +#define FetchedBilinearSamplesMin16 FetchedBilinearSamples +#endif //FFX_HALF + +FfxFloat32x4 Linear(FfxFloat32x4 A, FfxFloat32x4 B, FfxFloat32 t) +{ + return A + (B - A) * t; +} + +FfxFloat32x4 Bilinear(FetchedBilinearSamples BilinearSamples, FfxFloat32x2 fPxFrac) +{ + FfxFloat32x4 fColorX0 = Linear(BilinearSamples.fColor00, BilinearSamples.fColor10, fPxFrac.x); + FfxFloat32x4 fColorX1 = Linear(BilinearSamples.fColor01, BilinearSamples.fColor11, fPxFrac.x); + FfxFloat32x4 fColorXY = Linear(fColorX0, fColorX1, fPxFrac.y); + return fColorXY; +} + +#if FFX_HALF +FFX_MIN16_F4 Linear(FFX_MIN16_F4 A, FFX_MIN16_F4 B, FFX_MIN16_F t) +{ + return A + (B - A) * t; +} + +FFX_MIN16_F4 Bilinear(FetchedBilinearSamplesMin16 BilinearSamples, FFX_MIN16_F2 fPxFrac) +{ + FFX_MIN16_F4 fColorX0 = Linear(BilinearSamples.fColor00, BilinearSamples.fColor10, fPxFrac.x); + FFX_MIN16_F4 fColorX1 = Linear(BilinearSamples.fColor01, BilinearSamples.fColor11, fPxFrac.x); + FFX_MIN16_F4 fColorXY = Linear(fColorX0, fColorX1, fPxFrac.y); + return fColorXY; +} +#endif + +FfxFloat32 Lanczos2NoClamp(FfxFloat32 x) +{ + const FfxFloat32 PI = 3.141592653589793f; // TODO: share SDK constants + return abs(x) < FSR3UPSCALER_EPSILON ? 1.f : (sin(PI * x) / (PI * x)) * (sin(0.5f * PI * x) / (0.5f * PI * x)); +} + +FfxFloat32 Lanczos2(FfxFloat32 x) +{ + x = ffxMin(abs(x), 2.0f); + return Lanczos2NoClamp(x); +} + +#if FFX_HALF + +#if 0 +FFX_MIN16_F Lanczos2NoClamp(FFX_MIN16_F x) +{ + const FFX_MIN16_F PI = FFX_MIN16_F(3.141592653589793f); // TODO: share SDK constants + return abs(x) < FFX_MIN16_F(FSR3UPSCALER_EPSILON) ? FFX_MIN16_F(1.f) : (sin(PI * x) / (PI * x)) * (sin(FFX_MIN16_F(0.5f) * PI * x) / (FFX_MIN16_F(0.5f) * PI * x)); +} +#endif + +FFX_MIN16_F Lanczos2(FFX_MIN16_F x) +{ + x = ffxMin(abs(x), FFX_MIN16_F(2.0f)); + return FFX_MIN16_F(Lanczos2NoClamp(x)); +} +#endif //FFX_HALF + +// FSR1 lanczos approximation. Input is x*x and must be <= 4. +FfxFloat32 Lanczos2ApproxSqNoClamp(FfxFloat32 x2) +{ + FfxFloat32 a = (2.0f / 5.0f) * x2 - 1; + FfxFloat32 b = (1.0f / 4.0f) * x2 - 1; + return ((25.0f / 16.0f) * a * a - (25.0f / 16.0f - 1)) * (b * b); +} + +#if FFX_HALF +FFX_MIN16_F Lanczos2ApproxSqNoClamp(FFX_MIN16_F x2) +{ + FFX_MIN16_F a = FFX_MIN16_F(2.0f / 5.0f) * x2 - FFX_MIN16_F(1); + FFX_MIN16_F b = FFX_MIN16_F(1.0f / 4.0f) * x2 - FFX_MIN16_F(1); + return (FFX_MIN16_F(25.0f / 16.0f) * a * a - FFX_MIN16_F(25.0f / 16.0f - 1)) * (b * b); +} + +#if defined(__XBOX_SCARLETT) && defined(__XBATG_EXTRA_16_BIT_OPTIMISATION) && (__XBATG_EXTRA_16_BIT_OPTIMISATION == 1) +FFX_MIN16_F2 PairedLanczos2ApproxSqNoClamp(FFX_MIN16_F2 x2) +{ + // Xbox ATG (Pavel): + // + // 2.0 * x2 - 5.0 25.0 25.0 - 16.0 (2.0 * x2 - 5.0)^2 - (3.0)^2 (2.0 * x2 - 8.0) * (2.0 * x2 - 2.0) (x2 - 4.0) * (x2 - 1.0) + // a = -------------- ==> ---- * a^2 - -------------- = ----------------------------- = ---------------------------------- = ----------------------- = b * (x2 - 1.0) + // 5.0 16.0 16.0 16.0 16.0 4.0 + // + // so we need to compute just (b * b) * (b * x2 - b), so we should get four packed instructions: 2 fma + 2 mul + // + + FFX_MIN16_F2 b = (0.25 * x2 - 1.0); + return (b * b) * (b * x2 - b); +} +#endif + +#endif //FFX_HALF + +FfxFloat32 Lanczos2ApproxSq(FfxFloat32 x2) +{ + x2 = ffxMin(x2, 4.0f); + return Lanczos2ApproxSqNoClamp(x2); +} + +#if FFX_HALF +FFX_MIN16_F Lanczos2ApproxSq(FFX_MIN16_F x2) +{ + x2 = ffxMin(x2, FFX_MIN16_F(4.0f)); + return Lanczos2ApproxSqNoClamp(x2); +} + +#if defined(__XBOX_SCARLETT) && defined(__XBATG_EXTRA_16_BIT_OPTIMISATION) && (__XBATG_EXTRA_16_BIT_OPTIMISATION == 1) +FFX_MIN16_F2 PairedLanczos2ApproxSq(FFX_MIN16_F2 x2) +{ + x2 = ffxMin(x2, FFX_MIN16_F2(4.0, 4.0)); + return PairedLanczos2ApproxSqNoClamp(x2); +} +#endif +#endif //FFX_HALF + +FfxFloat32 Lanczos2ApproxNoClamp(FfxFloat32 x) +{ + return Lanczos2ApproxSqNoClamp(x * x); +} + +#if FFX_HALF +FFX_MIN16_F Lanczos2ApproxNoClamp(FFX_MIN16_F x) +{ + return Lanczos2ApproxSqNoClamp(x * x); +} +#endif //FFX_HALF + +FfxFloat32 Lanczos2Approx(FfxFloat32 x) +{ + return Lanczos2ApproxSq(x * x); +} + +#if FFX_HALF +FFX_MIN16_F Lanczos2Approx(FFX_MIN16_F x) +{ + return Lanczos2ApproxSq(x * x); +} +#endif //FFX_HALF + +FfxFloat32 Lanczos2_UseLUT(FfxFloat32 x) +{ + return SampleLanczos2Weight(abs(x)); +} + +#if FFX_HALF +FFX_MIN16_F Lanczos2_UseLUT(FFX_MIN16_F x) +{ + return FFX_MIN16_F(SampleLanczos2Weight(abs(x))); +} +#endif //FFX_HALF + +FfxFloat32x4 Lanczos2_UseLUT(FfxFloat32x4 fColor0, FfxFloat32x4 fColor1, FfxFloat32x4 fColor2, FfxFloat32x4 fColor3, FfxFloat32 t) +{ + FfxFloat32 fWeight0 = Lanczos2_UseLUT(-1.f - t); + FfxFloat32 fWeight1 = Lanczos2_UseLUT(-0.f - t); + FfxFloat32 fWeight2 = Lanczos2_UseLUT(+1.f - t); + FfxFloat32 fWeight3 = Lanczos2_UseLUT(+2.f - t); + return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3); +} +#if FFX_HALF +FFX_MIN16_F4 Lanczos2_UseLUT(FFX_MIN16_F4 fColor0, FFX_MIN16_F4 fColor1, FFX_MIN16_F4 fColor2, FFX_MIN16_F4 fColor3, FFX_MIN16_F t) +{ + FFX_MIN16_F fWeight0 = Lanczos2_UseLUT(FFX_MIN16_F(-1.f) - t); + FFX_MIN16_F fWeight1 = Lanczos2_UseLUT(FFX_MIN16_F(-0.f) - t); + FFX_MIN16_F fWeight2 = Lanczos2_UseLUT(FFX_MIN16_F(+1.f) - t); + FFX_MIN16_F fWeight3 = Lanczos2_UseLUT(FFX_MIN16_F(+2.f) - t); + return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3); +} +#endif + +FfxFloat32x4 Lanczos2(FfxFloat32x4 fColor0, FfxFloat32x4 fColor1, FfxFloat32x4 fColor2, FfxFloat32x4 fColor3, FfxFloat32 t) +{ + FfxFloat32 fWeight0 = Lanczos2(-1.f - t); + FfxFloat32 fWeight1 = Lanczos2(-0.f - t); + FfxFloat32 fWeight2 = Lanczos2(+1.f - t); + FfxFloat32 fWeight3 = Lanczos2(+2.f - t); + return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3); +} + +FfxFloat32x4 Lanczos2(FetchedBicubicSamples Samples, FfxFloat32x2 fPxFrac) +{ + FfxFloat32x4 fColorX0 = Lanczos2(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x); + FfxFloat32x4 fColorX1 = Lanczos2(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x); + FfxFloat32x4 fColorX2 = Lanczos2(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x); + FfxFloat32x4 fColorX3 = Lanczos2(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x); + FfxFloat32x4 fColorXY = Lanczos2(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y); + + // Deringing + + // TODO: only use 4 by checking jitter + const FfxInt32 iDeringingSampleCount = 4; + const FfxFloat32x4 fDeringingSamples[4] = { + Samples.fColor11, + Samples.fColor21, + Samples.fColor12, + Samples.fColor22, + }; + + FfxFloat32x4 fDeringingMin = fDeringingSamples[0]; + FfxFloat32x4 fDeringingMax = fDeringingSamples[0]; + + FFX_UNROLL + for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) { + + fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]); + fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]); + } + + fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax); + + return fColorXY; +} + +#if FFX_HALF +FFX_MIN16_F4 Lanczos2(FFX_MIN16_F4 fColor0, FFX_MIN16_F4 fColor1, FFX_MIN16_F4 fColor2, FFX_MIN16_F4 fColor3, FFX_MIN16_F t) +{ + FFX_MIN16_F fWeight0 = Lanczos2(FFX_MIN16_F(-1.f) - t); + FFX_MIN16_F fWeight1 = Lanczos2(FFX_MIN16_F(-0.f) - t); + FFX_MIN16_F fWeight2 = Lanczos2(FFX_MIN16_F(+1.f) - t); + FFX_MIN16_F fWeight3 = Lanczos2(FFX_MIN16_F(+2.f) - t); + return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3); +} + +FFX_MIN16_F4 Lanczos2(FetchedBicubicSamplesMin16 Samples, FFX_MIN16_F2 fPxFrac) +{ + FFX_MIN16_F4 fColorX0 = Lanczos2(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x); + FFX_MIN16_F4 fColorX1 = Lanczos2(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x); + FFX_MIN16_F4 fColorX2 = Lanczos2(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x); + FFX_MIN16_F4 fColorX3 = Lanczos2(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x); + FFX_MIN16_F4 fColorXY = Lanczos2(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y); + + // Deringing + + // TODO: only use 4 by checking jitter + const FfxInt32 iDeringingSampleCount = 4; + const FFX_MIN16_F4 fDeringingSamples[4] = { + Samples.fColor11, + Samples.fColor21, + Samples.fColor12, + Samples.fColor22, + }; + + FFX_MIN16_F4 fDeringingMin = fDeringingSamples[0]; + FFX_MIN16_F4 fDeringingMax = fDeringingSamples[0]; + + FFX_UNROLL + for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) + { + fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]); + fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]); + } + + fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax); + + return fColorXY; +} +#endif //FFX_HALF + + +FfxFloat32x4 Lanczos2LUT(FetchedBicubicSamples Samples, FfxFloat32x2 fPxFrac) +{ + FfxFloat32x4 fColorX0 = Lanczos2_UseLUT(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x); + FfxFloat32x4 fColorX1 = Lanczos2_UseLUT(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x); + FfxFloat32x4 fColorX2 = Lanczos2_UseLUT(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x); + FfxFloat32x4 fColorX3 = Lanczos2_UseLUT(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x); + FfxFloat32x4 fColorXY = Lanczos2_UseLUT(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y); + + // Deringing + + // TODO: only use 4 by checking jitter + const FfxInt32 iDeringingSampleCount = 4; + const FfxFloat32x4 fDeringingSamples[4] = { + Samples.fColor11, + Samples.fColor21, + Samples.fColor12, + Samples.fColor22, + }; + + FfxFloat32x4 fDeringingMin = fDeringingSamples[0]; + FfxFloat32x4 fDeringingMax = fDeringingSamples[0]; + + FFX_UNROLL + for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) { + + fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]); + fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]); + } + + fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax); + + return fColorXY; +} + +#if FFX_HALF +FFX_MIN16_F4 Lanczos2LUT(FetchedBicubicSamplesMin16 Samples, FFX_MIN16_F2 fPxFrac) +{ + FFX_MIN16_F4 fColorX0 = Lanczos2_UseLUT(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x); + FFX_MIN16_F4 fColorX1 = Lanczos2_UseLUT(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x); + FFX_MIN16_F4 fColorX2 = Lanczos2_UseLUT(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x); + FFX_MIN16_F4 fColorX3 = Lanczos2_UseLUT(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x); + FFX_MIN16_F4 fColorXY = Lanczos2_UseLUT(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y); + + // Deringing + + // TODO: only use 4 by checking jitter + const FfxInt32 iDeringingSampleCount = 4; + const FFX_MIN16_F4 fDeringingSamples[4] = { + Samples.fColor11, + Samples.fColor21, + Samples.fColor12, + Samples.fColor22, + }; + + FFX_MIN16_F4 fDeringingMin = fDeringingSamples[0]; + FFX_MIN16_F4 fDeringingMax = fDeringingSamples[0]; + + FFX_UNROLL + for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) + { + fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]); + fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]); + } + + fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax); + + return fColorXY; +} +#endif //FFX_HALF + + + +FfxFloat32x4 Lanczos2Approx(FfxFloat32x4 fColor0, FfxFloat32x4 fColor1, FfxFloat32x4 fColor2, FfxFloat32x4 fColor3, FfxFloat32 t) +{ + FfxFloat32 fWeight0 = Lanczos2ApproxNoClamp(-1.f - t); + FfxFloat32 fWeight1 = Lanczos2ApproxNoClamp(-0.f - t); + FfxFloat32 fWeight2 = Lanczos2ApproxNoClamp(+1.f - t); + FfxFloat32 fWeight3 = Lanczos2ApproxNoClamp(+2.f - t); + return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3); +} + +#if FFX_HALF +FFX_MIN16_F4 Lanczos2Approx(FFX_MIN16_F4 fColor0, FFX_MIN16_F4 fColor1, FFX_MIN16_F4 fColor2, FFX_MIN16_F4 fColor3, FFX_MIN16_F t) +{ + FFX_MIN16_F fWeight0 = Lanczos2ApproxNoClamp(FFX_MIN16_F(-1.f) - t); + FFX_MIN16_F fWeight1 = Lanczos2ApproxNoClamp(FFX_MIN16_F(-0.f) - t); + FFX_MIN16_F fWeight2 = Lanczos2ApproxNoClamp(FFX_MIN16_F(+1.f) - t); + FFX_MIN16_F fWeight3 = Lanczos2ApproxNoClamp(FFX_MIN16_F(+2.f) - t); + return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3); +} +#endif //FFX_HALF + +FfxFloat32x4 Lanczos2Approx(FetchedBicubicSamples Samples, FfxFloat32x2 fPxFrac) +{ + FfxFloat32x4 fColorX0 = Lanczos2Approx(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x); + FfxFloat32x4 fColorX1 = Lanczos2Approx(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x); + FfxFloat32x4 fColorX2 = Lanczos2Approx(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x); + FfxFloat32x4 fColorX3 = Lanczos2Approx(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x); + FfxFloat32x4 fColorXY = Lanczos2Approx(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y); + + // Deringing + + // TODO: only use 4 by checking jitter + const FfxInt32 iDeringingSampleCount = 4; + const FfxFloat32x4 fDeringingSamples[4] = { + Samples.fColor11, + Samples.fColor21, + Samples.fColor12, + Samples.fColor22, + }; + + FfxFloat32x4 fDeringingMin = fDeringingSamples[0]; + FfxFloat32x4 fDeringingMax = fDeringingSamples[0]; + + FFX_UNROLL + for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) + { + fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]); + fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]); + } + + fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax); + + return fColorXY; +} + +#if FFX_HALF +FFX_MIN16_F4 Lanczos2Approx(FetchedBicubicSamplesMin16 Samples, FFX_MIN16_F2 fPxFrac) +{ + FFX_MIN16_F4 fColorX0 = Lanczos2Approx(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x); + FFX_MIN16_F4 fColorX1 = Lanczos2Approx(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x); + FFX_MIN16_F4 fColorX2 = Lanczos2Approx(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x); + FFX_MIN16_F4 fColorX3 = Lanczos2Approx(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x); + FFX_MIN16_F4 fColorXY = Lanczos2Approx(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y); + + // Deringing + + // TODO: only use 4 by checking jitter + const FfxInt32 iDeringingSampleCount = 4; + const FFX_MIN16_F4 fDeringingSamples[4] = { + Samples.fColor11, + Samples.fColor21, + Samples.fColor12, + Samples.fColor22, + }; + + FFX_MIN16_F4 fDeringingMin = fDeringingSamples[0]; + FFX_MIN16_F4 fDeringingMax = fDeringingSamples[0]; + + FFX_UNROLL + for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) + { + fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]); + fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]); + } + + fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax); + + return fColorXY; +} +#endif + +// Clamp by offset direction. Assuming iPxSample is already in range and iPxOffset is compile time constant. +FfxInt32x2 ClampCoord(FfxInt32x2 iPxSample, FfxInt32x2 iPxOffset, FfxInt32x2 iTextureSize) +{ + FfxInt32x2 result = iPxSample + iPxOffset; + result.x = ffxMax(1, ffxMin(result.x, iTextureSize.x - 2)); + result.y = ffxMax(1, ffxMin(result.y, iTextureSize.y - 2)); + return result; +} +#if FFX_HALF +FFX_MIN16_I2 ClampCoord(FFX_MIN16_I2 iPxSample, FFX_MIN16_I2 iPxOffset, FFX_MIN16_I2 iTextureSize) +{ + FFX_MIN16_I2 result = iPxSample + iPxOffset; + result.x = ffxMax(FFX_MIN16_I(1), ffxMin(result.x, iTextureSize.x - FFX_MIN16_I(2))); + result.y = ffxMax(FFX_MIN16_I(1), ffxMin(result.y, iTextureSize.y - FFX_MIN16_I(2))); + return result; +} +#endif //FFX_HALF + + +#define DeclareCustomFetchBicubicSamplesWithType(SampleType, TextureType, AddrType, Name, LoadTexture) \ + SampleType Name(AddrType iPxSample, AddrType iTextureSize) \ + { \ + SampleType Samples; \ + \ + Samples.fColor00 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, -1), iTextureSize))); \ + Samples.fColor10 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, -1), iTextureSize))); \ + Samples.fColor20 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, -1), iTextureSize))); \ + Samples.fColor30 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, -1), iTextureSize))); \ + \ + Samples.fColor01 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, +0), iTextureSize))); \ + Samples.fColor11 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +0), iTextureSize))); \ + Samples.fColor21 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +0), iTextureSize))); \ + Samples.fColor31 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, +0), iTextureSize))); \ + \ + Samples.fColor02 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, +1), iTextureSize))); \ + Samples.fColor12 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +1), iTextureSize))); \ + Samples.fColor22 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +1), iTextureSize))); \ + Samples.fColor32 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, +1), iTextureSize))); \ + \ + Samples.fColor03 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, +2), iTextureSize))); \ + Samples.fColor13 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +2), iTextureSize))); \ + Samples.fColor23 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +2), iTextureSize))); \ + Samples.fColor33 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, +2), iTextureSize))); \ + \ + return Samples; \ + } + +#define DeclareCustomFetchBicubicSamples(Name, LoadTexture) \ + DeclareCustomFetchBicubicSamplesWithType(FetchedBicubicSamples, FfxFloat32x4, FfxInt32x2, Name, LoadTexture) + +#define DeclareCustomFetchBicubicSamplesMin16(Name, LoadTexture) \ + DeclareCustomFetchBicubicSamplesWithType(FetchedBicubicSamplesMin16, FFX_MIN16_F4, FfxInt32x2, Name, LoadTexture) + +#define DeclareCustomFetchBilinearSamplesWithType(SampleType, TextureType,AddrType, Name, LoadTexture) \ + SampleType Name(AddrType iPxSample, AddrType iTextureSize) \ + { \ + SampleType Samples; \ + Samples.fColor00 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +0), iTextureSize))); \ + Samples.fColor10 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +0), iTextureSize))); \ + Samples.fColor01 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +1), iTextureSize))); \ + Samples.fColor11 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +1), iTextureSize))); \ + return Samples; \ + } + +#define DeclareCustomFetchBilinearSamples(Name, LoadTexture) \ + DeclareCustomFetchBilinearSamplesWithType(FetchedBilinearSamples, FfxFloat32x4, FfxInt32x2, Name, LoadTexture) + +#define DeclareCustomFetchBilinearSamplesMin16(Name, LoadTexture) \ + DeclareCustomFetchBilinearSamplesWithType(FetchedBilinearSamplesMin16, FFX_MIN16_F4, FfxInt32x2, Name, LoadTexture) + +// BE CAREFUL: there is some precision issues and (3253, 125) leading to (3252.9989778, 125.001102) +// is common, so iPxSample can "jitter" +#define DeclareCustomTextureSample(Name, InterpolateSamples, FetchSamples) \ + FfxFloat32x4 Name(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize) \ + { \ + FfxFloat32x2 fPxSample = (fUvSample * FfxFloat32x2(iTextureSize)) - FfxFloat32x2(0.5f, 0.5f); \ + FfxFloat32x2 fPxFrac = ffxFract(fPxSample); \ + /* Clamp base coords */ \ + fPxSample.x = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.x-1), fPxSample.x)); \ + fPxSample.y = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.y-1), fPxSample.y)); \ + /* */ \ + FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample)); \ + FfxFloat32x4 fColorXY = FfxFloat32x4(InterpolateSamples(FetchSamples(iPxSample, iTextureSize), fPxFrac)); \ + return fColorXY; \ + } + +#define DeclareCustomTextureSampleMin16(Name, InterpolateSamples, FetchSamples) \ + FFX_MIN16_F4 Name(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize) \ + { \ + FfxFloat32x2 fPxSample = (fUvSample * FfxFloat32x2(iTextureSize)) - FfxFloat32x2(0.5f, 0.5f); \ + FFX_MIN16_F2 fPxFrac = FFX_MIN16_F2(ffxFract(fPxSample)); \ + /* Clamp base coords */ \ + fPxSample.x = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.x), fPxSample.x)); \ + fPxSample.y = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.y), fPxSample.y)); \ + /* */ \ + FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample)); \ + FFX_MIN16_F4 fColorXY = FFX_MIN16_F4(InterpolateSamples(FetchSamples(iPxSample, iTextureSize), fPxFrac)); \ + return fColorXY; \ + } + +#define FFX_FSR3UPSCALER_CONCAT_ID(x, y) x ## y +#define FFX_FSR3UPSCALER_CONCAT(x, y) FFX_FSR3UPSCALER_CONCAT_ID(x, y) +#define FFX_FSR3UPSCALER_SAMPLER_1D_0 Lanczos2 +#define FFX_FSR3UPSCALER_SAMPLER_1D_1 Lanczos2LUT +#define FFX_FSR3UPSCALER_SAMPLER_1D_2 Lanczos2Approx + +#define FFX_FSR3UPSCALER_GET_LANCZOS_SAMPLER1D(x) FFX_FSR3UPSCALER_CONCAT(FFX_FSR3UPSCALER_SAMPLER_1D_, x) + +#endif //!defined( FFX_FSR3UPSCALER_SAMPLE_H ) diff --git a/thirdparty/amd-ffx/gpu/fsr3upscaler/ffx_fsr3upscaler_shading_change.h b/thirdparty/amd-ffx/gpu/fsr3upscaler/ffx_fsr3upscaler_shading_change.h new file mode 100644 index 000000000000..2eb23aacdfa7 --- /dev/null +++ b/thirdparty/amd-ffx/gpu/fsr3upscaler/ffx_fsr3upscaler_shading_change.h @@ -0,0 +1,68 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +FFX_STATIC const FfxInt32 s_MipLevelsToUse = 3; + +struct ShadingChangeLumaInfo +{ + FfxFloat32 fSamples[s_MipLevelsToUse]; +}; + +ShadingChangeLumaInfo ComputeShadingChangeLuma(FfxInt32x2 iPxPos, FfxFloat32x2 fUv, const FfxInt32x2 iCurrentSize) +{ + ShadingChangeLumaInfo info; + + const FfxFloat32x2 fMipUv = ClampUv(fUv, ShadingChangeRenderSize(), GetSPDMipDimensions(0)); + + FFX_UNROLL + for (FfxInt32 iMipLevel = iShadingChangeMipStart; iMipLevel < s_MipLevelsToUse; iMipLevel++) { + + const FfxFloat32x2 fSample = SampleSPDMipLevel(fMipUv, iMipLevel); + + info.fSamples[iMipLevel] = abs(fSample.x * fSample.y); + } + + return info; +} + +void ShadingChange(FfxInt32x2 iPxPos) +{ + if (IsOnScreen(FfxInt32x2(iPxPos), ShadingChangeRenderSize())) { + + const FfxFloat32x2 fUv = (iPxPos + 0.5f) / ShadingChangeRenderSize(); + const FfxFloat32x2 fUvJittered = fUv + Jitter() / RenderSize(); + + const ShadingChangeLumaInfo info = ComputeShadingChangeLuma(iPxPos, fUvJittered, ShadingChangeRenderSize()); + + const FfxFloat32 fScale = 1.0f + iShadingChangeMipStart / s_MipLevelsToUse; + FfxFloat32 fShadingChange = 0.0f; + FFX_UNROLL + for (int iMipLevel = iShadingChangeMipStart; iMipLevel < s_MipLevelsToUse; iMipLevel++) + { + if (info.fSamples[iMipLevel] > 0) { + fShadingChange = ffxMax(fShadingChange, info.fSamples[iMipLevel]) * fScale; + } + } + + StoreShadingChange(iPxPos, ffxSaturate(fShadingChange)); + } +} diff --git a/thirdparty/amd-ffx/gpu/fsr3upscaler/ffx_fsr3upscaler_shading_change_pyramid.h b/thirdparty/amd-ffx/gpu/fsr3upscaler/ffx_fsr3upscaler_shading_change_pyramid.h new file mode 100644 index 000000000000..190cd5cad3cc --- /dev/null +++ b/thirdparty/amd-ffx/gpu/fsr3upscaler/ffx_fsr3upscaler_shading_change_pyramid.h @@ -0,0 +1,297 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +FFX_GROUPSHARED FfxUInt32 spdCounter; + +void SpdIncreaseAtomicCounter(FfxUInt32 slice) +{ + SPD_IncreaseAtomicCounter(spdCounter); +} + +FfxUInt32 SpdGetAtomicCounter() +{ + return spdCounter; +} + +void SpdResetAtomicCounter(FfxUInt32 slice) +{ + SPD_ResetAtomicCounter(); +} + +#ifndef SPD_PACKED_ONLY +FFX_GROUPSHARED FfxFloat32 spdIntermediateR[16][16]; +FFX_GROUPSHARED FfxFloat32 spdIntermediateG[16][16]; +FFX_GROUPSHARED FfxFloat32 spdIntermediateB[16][16]; +FFX_GROUPSHARED FfxFloat32 spdIntermediateA[16][16]; + +FFX_STATIC const FfxInt32 DIFFERENCE = 0; +FFX_STATIC const FfxInt32 SIGN_SUM = 1; +FFX_STATIC const FfxInt32 MIP0_INDICATOR = 2; + +FfxFloat32x2 Sort2(FfxFloat32x2 v) +{ + return FfxFloat32x2(ffxMin(v.x, v.y), ffxMax(v.x, v.y)); +} + +struct SampleSet +{ + FfxFloat32 fSamples[SHADING_CHANGE_SET_SIZE]; +}; + +#define CompareSwap(i, j) \ +{ \ +FfxFloat32 fTmp = ffxMin(fSet.fSamples[i], fSet.fSamples[j]);\ +fSet.fSamples[j] = ffxMax(fSet.fSamples[i], fSet.fSamples[j]);\ +fSet.fSamples[i] = fTmp;\ +} + +#if SHADING_CHANGE_SET_SIZE == 5 +FFX_STATIC const FfxInt32x2 iSampleOffsets[5] = {FfxInt32x2(+0, +0), FfxInt32x2(-1, +0), FfxInt32x2(+1, +0), FfxInt32x2(+0, -1), FfxInt32x2(+0, +1)}; + +void SortSet(FFX_PARAMETER_INOUT SampleSet fSet) +{ + CompareSwap(0, 3); + CompareSwap(1, 4); + CompareSwap(0, 2); + CompareSwap(1, 3); + CompareSwap(0, 1); + CompareSwap(2, 4); + CompareSwap(1, 2); + CompareSwap(3, 4); + CompareSwap(2, 3); +} +#endif + +FfxFloat32 ComputeMinimumDifference(FfxInt32x2 iPxPos, SampleSet fSet0, SampleSet fSet1) +{ + FfxFloat32 fMinDiff = FSR3UPSCALER_FP16_MAX - 1; + FfxInt32 a = 0; + FfxInt32 b = 0; + + SortSet(fSet0); + SortSet(fSet1); + + const FfxFloat32 fMax = ffxMin(fSet0.fSamples[SHADING_CHANGE_SET_SIZE-1], fSet1.fSamples[SHADING_CHANGE_SET_SIZE-1]); + + if (fMax > FSR3UPSCALER_FP32_MIN) { + + FFX_UNROLL + for (FfxInt32 i = 0; i < SHADING_CHANGE_SET_SIZE && (fMinDiff < FSR3UPSCALER_FP16_MAX); i++) { + + FfxFloat32 fDiff = fSet0.fSamples[a] - fSet1.fSamples[b]; + + if (abs(fDiff) > FSR3UPSCALER_FP16_MIN) { + + fDiff = sign(fDiff) * (1.0f - MinDividedByMax(fSet0.fSamples[a], fSet1.fSamples[b])); + + fMinDiff = (abs(fDiff) < abs(fMinDiff)) ? fDiff : fMinDiff; + + a += FfxInt32(fSet0.fSamples[a] < fSet1.fSamples[b]); + b += FfxInt32(fSet0.fSamples[a] >= fSet1.fSamples[b]); + } + else + { + fMinDiff = FSR3UPSCALER_FP16_MAX; + } + } + } + + return fMinDiff * FfxFloat32(fMinDiff < (FSR3UPSCALER_FP16_MAX - 1)); +} + +SampleSet GetCurrentLumaBilinearSamples(FfxFloat32x2 fUv) +{ + const FfxFloat32x2 fUvJittered = fUv + Jitter() / RenderSize(); + const FfxInt32x2 iBasePos = FfxInt32x2(floor(fUvJittered * RenderSize())); + + SampleSet fSet; + + for (FfxInt32 iSampleIndex = 0; iSampleIndex < SHADING_CHANGE_SET_SIZE; iSampleIndex++) { + const FfxInt32x2 iSamplePos = ClampLoad(iBasePos, iSampleOffsets[iSampleIndex], RenderSize()); + fSet.fSamples[iSampleIndex] = LoadCurrentLuma(iSamplePos) * Exposure(); + fSet.fSamples[iSampleIndex] = ffxPow(fSet.fSamples[iSampleIndex], fShadingChangeSamplePow); + fSet.fSamples[iSampleIndex] = ffxMax(fSet.fSamples[iSampleIndex], FSR3UPSCALER_EPSILON); + } + + return fSet; +} + +struct PreviousLumaBilinearSamplesData +{ + SampleSet fSet; + FfxBoolean bIsExistingSample; +}; + +PreviousLumaBilinearSamplesData GetPreviousLumaBilinearSamples(FfxFloat32x2 fUv, FfxFloat32x2 fMotionVector) +{ + PreviousLumaBilinearSamplesData data; + + const FfxFloat32x2 fUvJittered = fUv + PreviousFrameJitter() / PreviousFrameRenderSize(); + const FfxFloat32x2 fReprojectedUv = fUvJittered + fMotionVector; + + data.bIsExistingSample = IsUvInside(fReprojectedUv); + + if (data.bIsExistingSample) { + + const FfxInt32x2 iBasePos = FfxInt32x2(floor(fReprojectedUv * PreviousFrameRenderSize())); + + for (FfxInt32 iSampleIndex = 0; iSampleIndex < SHADING_CHANGE_SET_SIZE; iSampleIndex++) { + + const FfxInt32x2 iSamplePos = ClampLoad(iBasePos, iSampleOffsets[iSampleIndex], PreviousFrameRenderSize()); + data.fSet.fSamples[iSampleIndex] = LoadPreviousLuma(iSamplePos) * DeltaPreExposure() * Exposure(); + data.fSet.fSamples[iSampleIndex] = ffxPow(data.fSet.fSamples[iSampleIndex], fShadingChangeSamplePow); + data.fSet.fSamples[iSampleIndex] = ffxMax(data.fSet.fSamples[iSampleIndex], FSR3UPSCALER_EPSILON); + } + } + + return data; +} + +FfxFloat32 ComputeDiff(FfxInt32x2 iPxPos, FfxFloat32x2 fUv, FfxFloat32x2 fMotionVector) +{ + FfxFloat32 fMinDiff = 0.0f; + + const SampleSet fCurrentSamples = GetCurrentLumaBilinearSamples(fUv); + const PreviousLumaBilinearSamplesData previousData = GetPreviousLumaBilinearSamples(fUv, fMotionVector); + + if (previousData.bIsExistingSample) { + fMinDiff = ComputeMinimumDifference(iPxPos, fCurrentSamples, previousData.fSet); + } + + return fMinDiff; +} + +FfxFloat32x4 SpdLoadSourceImage(FfxFloat32x2 iPxPos, FfxUInt32 slice) +{ + const FfxInt32x2 iPxSamplePos = ClampLoad(FfxInt32x2(iPxPos), FfxInt32x2(0, 0), FfxInt32x2(RenderSize())); + const FfxFloat32x2 fDilatedMotionVector = LoadDilatedMotionVector(iPxSamplePos); + const FfxFloat32x2 fUv = (iPxSamplePos + 0.5f) / RenderSize(); + + const FfxFloat32 fScaledAndSignedLumaDiff = ComputeDiff(iPxSamplePos, fUv, fDilatedMotionVector); + + FfxFloat32x4 fOutput = FfxFloat32x4(0.0f, 0.0f, 0.0f, 0.0f); + fOutput[DIFFERENCE] = fScaledAndSignedLumaDiff; + fOutput[SIGN_SUM] = (fScaledAndSignedLumaDiff != 0.0f) ? sign(fScaledAndSignedLumaDiff) : 0.0f; + fOutput[MIP0_INDICATOR] = 1.0f; + + return fOutput; +} + +FfxFloat32x4 SpdLoad(FfxInt32x2 tex, FfxUInt32 slice) +{ + return FfxFloat32x4(RWLoadPyramid(tex, 5), 0, 0); +} + +FfxFloat32x4 SpdReduce4(FfxFloat32x4 v0, FfxFloat32x4 v1, FfxFloat32x4 v2, FfxFloat32x4 v3) +{ + return (v0 + v1 + v2 + v3) * 0.25f; +} + +void SpdStore(FfxInt32x2 pix, FfxFloat32x4 outValue, FfxUInt32 index, FfxUInt32 slice) +{ + if (index >= iShadingChangeMipStart) + { + StorePyramid(pix, outValue.xy, index); + } +} + +FfxFloat32x4 SpdLoadIntermediate(FfxUInt32 x, FfxUInt32 y) +{ + return FfxFloat32x4( + spdIntermediateR[x][y], + spdIntermediateG[x][y], + spdIntermediateB[x][y], + spdIntermediateA[x][y]); +} +void SpdStoreIntermediate(FfxUInt32 x, FfxUInt32 y, FfxFloat32x4 value) +{ + spdIntermediateR[x][y] = value.x; + spdIntermediateG[x][y] = value.y; + spdIntermediateB[x][y] = value.z; + spdIntermediateA[x][y] = value.w; +} + +#endif + +// define fetch and store functions Packed +#if FFX_HALF + +FFX_GROUPSHARED FfxFloat16x2 spdIntermediateRG[16][16]; +FFX_GROUPSHARED FfxFloat16x2 spdIntermediateBA[16][16]; + +FfxFloat16x4 SpdLoadSourceImageH(FfxFloat32x2 tex, FfxUInt32 slice) +{ + return FfxFloat16x4(0, 0, 0, 0); +} + +FfxFloat16x4 SpdLoadH(FfxInt32x2 p, FfxUInt32 slice) +{ + return FfxFloat16x4(0, 0, 0, 0); +} + +void SpdStoreH(FfxInt32x2 p, FfxFloat16x4 value, FfxUInt32 mip, FfxUInt32 slice) +{ +} + +FfxFloat16x4 SpdLoadIntermediateH(FfxUInt32 x, FfxUInt32 y) +{ + return FfxFloat16x4( + spdIntermediateRG[x][y].x, + spdIntermediateRG[x][y].y, + spdIntermediateBA[x][y].x, + spdIntermediateBA[x][y].y); +} + +void SpdStoreIntermediateH(FfxUInt32 x, FfxUInt32 y, FfxFloat16x4 value) +{ + spdIntermediateRG[x][y] = value.xy; + spdIntermediateBA[x][y] = value.zw; +} + +FfxFloat16x4 SpdReduce4H(FfxFloat16x4 v0, FfxFloat16x4 v1, FfxFloat16x4 v2, FfxFloat16x4 v3) +{ + return (v0 + v1 + v2 + v3) * FfxFloat16(0.25); +} +#endif + +#include "../spd/ffx_spd.h" + +void ComputeShadingChangePyramid(FfxUInt32x3 WorkGroupId, FfxUInt32 LocalThreadIndex) +{ +#if FFX_HALF + SpdDownsampleH( + FfxUInt32x2(WorkGroupId.xy), + FfxUInt32(LocalThreadIndex), + FfxUInt32(MipCount()), + FfxUInt32(NumWorkGroups()), + FfxUInt32(WorkGroupId.z), + FfxUInt32x2(WorkGroupOffset())); +#else + SpdDownsample( + FfxUInt32x2(WorkGroupId.xy), + FfxUInt32(LocalThreadIndex), + FfxUInt32(MipCount()), + FfxUInt32(NumWorkGroups()), + FfxUInt32(WorkGroupId.z), + FfxUInt32x2(WorkGroupOffset())); +#endif +} diff --git a/thirdparty/amd-ffx/gpu/fsr3upscaler/ffx_fsr3upscaler_upsample.h b/thirdparty/amd-ffx/gpu/fsr3upscaler/ffx_fsr3upscaler_upsample.h new file mode 100644 index 000000000000..801a0a907e00 --- /dev/null +++ b/thirdparty/amd-ffx/gpu/fsr3upscaler/ffx_fsr3upscaler_upsample.h @@ -0,0 +1,644 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +void Deringing(RectificationBox clippingBox, FFX_PARAMETER_INOUT FfxFloat32x3 fColor) +{ + fColor = clamp(fColor, clippingBox.aabbMin, clippingBox.aabbMax); +} + +#ifndef FFX_FSR3UPSCALER_OPTION_UPSAMPLE_USE_LANCZOS_TYPE +#define FFX_FSR3UPSCALER_OPTION_UPSAMPLE_USE_LANCZOS_TYPE 2 // Approximate +#endif + +FfxFloat32 GetUpsampleLanczosWeight(FfxFloat32x2 fSrcSampleOffset, FfxFloat32 fKernelWeight) +{ + FfxFloat32x2 fSrcSampleOffsetBiased = fSrcSampleOffset * fKernelWeight.xx; +#if FFX_FSR3UPSCALER_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 0 // LANCZOS_TYPE_REFERENCE + FfxFloat32 fSampleWeight = Lanczos2(length(fSrcSampleOffsetBiased)); +#elif FFX_FSR3UPSCALER_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 1 // LANCZOS_TYPE_LUT + FfxFloat32 fSampleWeight = Lanczos2_UseLUT(length(fSrcSampleOffsetBiased)); +#elif FFX_FSR3UPSCALER_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 2 // LANCZOS_TYPE_APPROXIMATE + FfxFloat32 fSampleWeight = Lanczos2ApproxSq(dot(fSrcSampleOffsetBiased, fSrcSampleOffsetBiased)); +#else +#error "Invalid Lanczos type" +#endif + return fSampleWeight; +} + +#if FFX_HALF +FFX_MIN16_F GetUpsampleLanczosWeight(FFX_MIN16_F2 fSrcSampleOffset, FFX_MIN16_F fKernelWeight) +{ + FFX_MIN16_F2 fSrcSampleOffsetBiased = fSrcSampleOffset * fKernelWeight.xx; +#if FFX_FSR3UPSCALER_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 0 // LANCZOS_TYPE_REFERENCE + FFX_MIN16_F fSampleWeight = Lanczos2(length(fSrcSampleOffsetBiased)); +#elif FFX_FSR3UPSCALER_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 1 // LANCZOS_TYPE_LUT + FFX_MIN16_F fSampleWeight = Lanczos2_UseLUT(length(fSrcSampleOffsetBiased)); +#elif FFX_FSR3UPSCALER_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 2 // LANCZOS_TYPE_APPROXIMATE + FFX_MIN16_F fSampleWeight = Lanczos2ApproxSq(dot(fSrcSampleOffsetBiased, fSrcSampleOffsetBiased)); + + // To Test: Save reciproqual sqrt compute + // FfxFloat32 fSampleWeight = Lanczos2Sq_UseLUT(dot(fSrcSampleOffsetBiased, fSrcSampleOffsetBiased)); +#else +#error "Invalid Lanczos type" +#endif + return fSampleWeight; +} +#endif + +FfxFloat32 ComputeMaxKernelWeight(const AccumulationPassCommonParams params, FFX_PARAMETER_INOUT AccumulationPassData data) { + + const FfxFloat32 fKernelSizeBias = 1.0f + (1.0f / FfxFloat32x2(DownscaleFactor()) - 1.0f).x; + + return ffxMin(FfxFloat32(1.99f), fKernelSizeBias); +} + +FfxFloat32x3 LoadPreparedColor(FfxInt32x2 iSamplePos) +{ + const FfxFloat32x3 fRgb = ffxMax(FfxFloat32x3(0, 0, 0), LoadInputColor(iSamplePos)) * Exposure(); + const FfxFloat32x3 fPreparedYCoCg = RGBToYCoCg(fRgb); + + return fPreparedYCoCg; +} + +#if FFX_HALF && (FFX_FSR3UPSCALER_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 2) && defined(__XBOX_SCARLETT) && defined(__XBATG_EXTRA_16_BIT_OPTIMISATION) && (__XBATG_EXTRA_16_BIT_OPTIMISATION == 1) +#define FFX_FSR3UPSCALER_USE_XBOX_PAIRED_16BIT_MATH_OPTIMIZATIONS 1 +#else +#define FFX_FSR3UPSCALER_USE_XBOX_PAIRED_16BIT_MATH_OPTIMIZATIONS 0 +#endif + +#if FFX_FSR3UPSCALER_USE_XBOX_PAIRED_16BIT_MATH_OPTIMIZATIONS + +void LoadPreparedColorPairedRgb(FFX_PARAMETER_OUT FFX_MIN16_F2 r, + FFX_PARAMETER_OUT FFX_MIN16_F2 g, + FFX_PARAMETER_OUT FFX_MIN16_F2 b, + FfxInt32x2 iSamplePos0, + FfxInt32x2 iSamplePos1) +{ + const FFX_MIN16_F3 sample0 = FFX_MIN16_F3(LoadInputColor(iSamplePos0)); + const FFX_MIN16_F3 sample1 = FFX_MIN16_F3(LoadInputColor(iSamplePos1)); + + r = ffxMax(FFX_MIN16_F2(0, 0), FFX_MIN16_F2(sample0.r, sample1.r)); + g = ffxMax(FFX_MIN16_F2(0, 0), FFX_MIN16_F2(sample0.g, sample1.g)); + b = ffxMax(FFX_MIN16_F2(0, 0), FFX_MIN16_F2(sample0.b, sample1.b)); + + r = FFX_MIN16_F2(r * Exposure()); + g = FFX_MIN16_F2(g * Exposure()); + b = FFX_MIN16_F2(b * Exposure()); +} + +void TonemapPaired(FFX_PARAMETER_INOUT FFX_MIN16_F2 r, FFX_PARAMETER_INOUT FFX_MIN16_F2 g, FFX_PARAMETER_INOUT FFX_MIN16_F2 b) +{ + FFX_MIN16_F2 denomF16 = ffxMax(ffxMax(ffxMax(0.0, r), g), b) + FFX_MIN16_F2(1.0, 1.0); + + // NOTE: expect 2 x v_cvt_f32_f16 + FfxFloat32x2 denomF32 = FfxFloat32x2(denomF16); + // NOTE: expect 2 x v_rcp_f32 + FfxFloat32x2 normF32 = FfxFloat32x2(1.0, 1.0) / denomF32; + // NOTE: expect 2 x v_cvt_f16_f32 + FFX_MIN16_F2 normF16 = FFX_MIN16_F2(normF32); + + r *= normF16; + g *= normF16; + b *= normF16; +} + +void RGBToYCoCgPaired(FFX_PARAMETER_INOUT FFX_MIN16_F2 r, FFX_PARAMETER_INOUT FFX_MIN16_F2 g, FFX_PARAMETER_INOUT FFX_MIN16_F2 b) +{ + /** + * NOTE: given the following conversion + * + * fYCoCg = FfxFloat32x3( + * 0.25f * fRgb.r + 0.5f * fRgb.g + 0.25f * fRgb.b, + * 0.5f * fRgb.r - 0.5f * fRgb.b, + * -0.25f * fRgb.r + 0.5f * fRgb.g - 0.25f * fRgb.b); + * + * it's possible to notice that we can compute: + * RplusBdiv4 = 0.25 * (R + B) + * + * so everything else is computed in 3 instructions + * Y = G * 0.5 + RplusBdiv4 + * Co = 2 * RplusBdiv4 - G + * Cg = G * 0.5 - RplusBdiv4 + */ + + // NOTE: expect v_pk_add_f32 + v_pk_mul_f32 + FFX_MIN16_F2 RplusBdiv4 = (r + b) * 0.25; + FFX_MIN16_F2 G = g; + FFX_MIN16_F2 B = b; + + // NOTE: expect 3x v_pk_fma_f32 + r = G * 0.5 + RplusBdiv4; + g = RplusBdiv4 * 2.0 - B; + b = G * 0.5 - RplusBdiv4; +} + +FFX_MIN16_F2 Compute3x3SamplesMinMaxPaired(FFX_PARAMETER_IN FFX_MIN16_F2 sampleCenter, + FFX_PARAMETER_IN FFX_MIN16_F2 sample0, + FFX_PARAMETER_IN FFX_MIN16_F2 sample1, + FFX_PARAMETER_IN FFX_MIN16_F2 sample2, + FFX_PARAMETER_IN FFX_MIN16_F2 sample3) +{ + FFX_MIN16_F2 twoMinValues = ffxMin(ffxMin(sample0, sample1), ffxMin(sample2, sample3)); + FFX_MIN16_F2 twoMaxValues = ffxMax(ffxMax(sample0, sample1), ffxMax(sample2, sample3)); + + return FFX_MIN16_F2( + ffxMin3Half(twoMinValues.x, twoMinValues.y, sampleCenter.x), + ffxMax3Half(twoMaxValues.x, twoMaxValues.y, sampleCenter.x) + ); +} + + +FFX_MIN16_F2 Bool2ToFloat16x2(bool x, bool y) +{ + uint lo = x ? 0x00003c00 : 0x00000000; + uint hi = y ? 0x3c000000 : 0x00000000; + return FFX_MIN16_F2(__XB_AsHalf(lo).x, __XB_AsHalf(hi).y); +} + +struct PairedRectificationBoxAndAccumulatedColorAndWeight +{ + FFX_MIN16_F2 boxCenterR; + FFX_MIN16_F2 boxCenterG; + FFX_MIN16_F2 boxCenterB; + + FFX_MIN16_F2 boxVecR; + FFX_MIN16_F2 boxVecG; + FFX_MIN16_F2 boxVecB; + + FFX_MIN16_F2 fBoxCenterWeight; + + FFX_MIN16_F2 fColorR; + FFX_MIN16_F2 fColorG; + FFX_MIN16_F2 fColorB; + FFX_MIN16_F2 fWeight; + + FFX_MIN16_F fKernelBiasSq; + FFX_MIN16_F fRectificationCurveBias; + + void setKernelBiasAndRectificationCurveBias(FfxFloat32 kernelBias, FfxFloat32 rectificationCurveBias) + { + fKernelBiasSq = FFX_MIN16_F(kernelBias * kernelBias); + fRectificationCurveBias = FFX_MIN16_F(rectificationCurveBias); + } + + void initUpscaledColor(FFX_MIN16_F fSrcSampleOffsetSq, FFX_MIN16_F fOnScreenWeight, FFX_MIN16_F2 sampleR, FFX_MIN16_F2 sampleG, FFX_MIN16_F2 sampleB) + { + #if FFX_FSR3UPSCALER_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 2 // LANCZOS_TYPE_APPROXIMATE + const FFX_MIN16_F2 LanczosUpsampleWeight = FFX_MIN16_F2( + PairedLanczos2ApproxSq(fSrcSampleOffsetSq * fKernelBiasSq).x, + 0.0 + ); + #else + #error "Only LANCZOS_TYPE_APPROXIMATE is supported in paired version so far" + #endif + const FFX_MIN16_F2 fSampleWeight = fOnScreenWeight * LanczosUpsampleWeight; + + fColorR = sampleR * fSampleWeight; + fColorG = sampleG * fSampleWeight; + fColorB = sampleB * fSampleWeight; + fWeight = fSampleWeight; + } + + void initBox(FFX_MIN16_F fSrcSampleOffsetSq, FFX_MIN16_F fOnScreenWeight, FFX_MIN16_F2 sampleR, FFX_MIN16_F2 sampleG, FFX_MIN16_F2 sampleB) + { + const FFX_MIN16_F2 fBoxSampleWeight = FFX_MIN16_F2( + exp(fRectificationCurveBias * fSrcSampleOffsetSq) * fOnScreenWeight, + 0.0 + ); + + FFX_MIN16_F2 weightedSampleR = sampleR * fBoxSampleWeight; + FFX_MIN16_F2 weightedSampleG = sampleG * fBoxSampleWeight; + FFX_MIN16_F2 weightedSampleB = sampleB * fBoxSampleWeight; + + boxCenterR = weightedSampleR; + boxCenterG = weightedSampleG; + boxCenterB = weightedSampleB; + + boxVecR = sampleR * weightedSampleR; + boxVecG = sampleG * weightedSampleG; + boxVecB = sampleB * weightedSampleB; + + fBoxCenterWeight = fBoxSampleWeight; + } + + void addUpscaledColorSample(FFX_MIN16_F2 fSrcSampleOffsetSq, FFX_MIN16_F2 fOnScreenWeight, FFX_MIN16_F2 sampleR, FFX_MIN16_F2 sampleG, FFX_MIN16_F2 sampleB) + { + #if FFX_FSR3UPSCALER_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 2 // LANCZOS_TYPE_APPROXIMATE + const FFX_MIN16_F2 LanczosUpsampleWeight = PairedLanczos2ApproxSq(fSrcSampleOffsetSq * fKernelBiasSq); + #else + #error "Only LANCZOS_TYPE_APPROXIMATE is supported in paired version so far" + #endif + const FFX_MIN16_F2 fSampleWeight = fOnScreenWeight * LanczosUpsampleWeight; + + fColorR += sampleR * fSampleWeight; + fColorG += sampleG * fSampleWeight; + fColorB += sampleB * fSampleWeight; + fWeight += fSampleWeight; + } + + void addBoxSample(FFX_MIN16_F2 fSrcSampleOffsetSq, FFX_MIN16_F2 fOnScreenWeight, FFX_MIN16_F2 sampleR, FFX_MIN16_F2 sampleG, FFX_MIN16_F2 sampleB) + { + // NOTE: ideally expect here 2x v_fma_mix + 2x v_exp_f32 + 2x v_fma_mix + const FFX_MIN16_F2 fBoxSampleWeight = exp(fRectificationCurveBias * fSrcSampleOffsetSq) * fOnScreenWeight; + + FFX_MIN16_F2 weightedSampleR = sampleR * fBoxSampleWeight; + FFX_MIN16_F2 weightedSampleG = sampleG * fBoxSampleWeight; + FFX_MIN16_F2 weightedSampleB = sampleB * fBoxSampleWeight; + + boxCenterR += weightedSampleR; + boxCenterG += weightedSampleG; + boxCenterB += weightedSampleB; + + boxVecR += sampleR * weightedSampleR; + boxVecG += sampleG * weightedSampleG; + boxVecB += sampleB * weightedSampleB; + + fBoxCenterWeight += fBoxSampleWeight; + } + + void finalizeUpscaledColor(FFX_PARAMETER_OUT FfxFloat32x4 upscaledColorAndWeight) + { + upscaledColorAndWeight.r = fColorR.x + fColorR.y; + upscaledColorAndWeight.g = fColorG.x + fColorG.y; + upscaledColorAndWeight.b = fColorB.x + fColorB.y; + + upscaledColorAndWeight.a = fWeight.x + fWeight.y; + } + + void finalizeBox(FFX_PARAMETER_OUT FfxFloat32x2 boxCenterAndVecR, + FFX_PARAMETER_OUT FfxFloat32x2 boxCenterAndVecG, + FFX_PARAMETER_OUT FfxFloat32x2 boxCenterAndVecB, + FFX_PARAMETER_OUT FfxFloat32 boxCenterWeight) + { + boxCenterAndVecR = FfxFloat32x2(boxCenterR.x + boxCenterR.y, boxVecR.x + boxVecR.y); + boxCenterAndVecG = FfxFloat32x2(boxCenterG.x + boxCenterG.y, boxVecG.x + boxVecG.y); + boxCenterAndVecB = FfxFloat32x2(boxCenterB.x + boxCenterB.y, boxVecB.x + boxVecB.y); + + boxCenterWeight = fBoxCenterWeight.x + fBoxCenterWeight.y; + } +}; +#endif // #if FFX_FSR3UPSCALER_USE_XBOX_PAIRED_16BIT_MATH_OPTIMIZATIONS + +void ComputeUpsampledColorAndWeight(const AccumulationPassCommonParams params, FFX_PARAMETER_INOUT AccumulationPassData data) +{ + // We compute a sliced lanczos filter with 2 lobes (other slices are accumulated temporaly) + const FfxFloat32x2 fDstOutputPos = FfxFloat32x2(params.iPxHrPos) + FFX_BROADCAST_FLOAT32X2(0.5f); + const FfxFloat32x2 fSrcOutputPos = fDstOutputPos * DownscaleFactor(); + const FfxInt32x2 iSrcInputPos = FfxInt32x2(floor(fSrcOutputPos)); + const FfxFloat32x2 fSrcUnjitteredPos = (FfxFloat32x2(iSrcInputPos) + FfxFloat32x2(0.5f, 0.5f)) - Jitter(); // This is the un-jittered position of the sample at offset 0,0 + const FfxFloat32x2 fBaseSampleOffset = FfxFloat32x2(fSrcUnjitteredPos - fSrcOutputPos); + + FfxInt32x2 offsetTL; + offsetTL.x = (fSrcUnjitteredPos.x > fSrcOutputPos.x) ? FfxInt32(-2) : FfxInt32(-1); + offsetTL.y = (fSrcUnjitteredPos.y > fSrcOutputPos.y) ? FfxInt32(-2) : FfxInt32(-1); + + //Load samples + // If fSrcUnjitteredPos.y > fSrcOutputPos.y, indicates offsetTL.y = -2, sample offset Y will be [-2, 1], clipbox will be rows [1, 3]. + // Flip row# for sampling offset in this case, so first 0~2 rows in the sampled array can always be used for computing the clipbox. + // This reduces branch or cmove on sampled colors, but moving this overhead to sample position / weight calculation time which apply to less values. + const FfxBoolean bFlipRow = fSrcUnjitteredPos.y > fSrcOutputPos.y; + const FfxBoolean bFlipCol = fSrcUnjitteredPos.x > fSrcOutputPos.x; + const FfxFloat32x2 fOffsetTL = FfxFloat32x2(offsetTL); + + const FfxBoolean bIsInitialSample = (params.fAccumulation == 0.0f); + +#if FFX_FSR3UPSCALER_USE_XBOX_PAIRED_16BIT_MATH_OPTIMIZATIONS + // Unroll the loop to load samples on Scarlett to help the shader compiler + const FFX_MIN16_F2 fSampleOffsetX02 = __XB_AsHalf(bFlipCol ? __XB_AsUInt(FFX_MIN16_F2( 1, -1)) : __XB_AsUInt(FFX_MIN16_F2(-1, 1))); + const FFX_MIN16_F2 fSampleOffsetY02 = __XB_AsHalf(bFlipRow ? __XB_AsUInt(FFX_MIN16_F2( 1, -1)) : __XB_AsUInt(FFX_MIN16_F2(-1, 1))); + + typedef FfxInt32 FfxTexCoordI; + typedef FfxInt32x2 FfxTexCoordI2; + + const FfxTexCoordI2 iSrcSamplePosX01 = FfxTexCoordI2(iSrcInputPos.xx) + (bFlipCol ? FfxTexCoordI2( 1, 0) : FfxTexCoordI2(-1, 0)); + const FfxTexCoordI2 iSrcSamplePosX23 = FfxTexCoordI2(iSrcInputPos.xx) + (bFlipCol ? FfxTexCoordI2(-1, -2) : FfxTexCoordI2( 1, 2)); + + const FfxTexCoordI2 iSrcSamplePosY01 = FfxTexCoordI2(iSrcInputPos.yy) + (bFlipRow ? FfxTexCoordI2( 1, 0) : FfxTexCoordI2(-1, 0)); + const FfxTexCoordI2 iSrcSamplePosY23 = FfxTexCoordI2(iSrcInputPos.yy) + (bFlipRow ? FfxTexCoordI2(-1, -2) : FfxTexCoordI2( 1, 2)); + + const FfxTexCoordI2 renderSizeLastTexelCoord = FfxTexCoordI2(RenderSize()) - FfxTexCoordI2(1, 1); + + const FfxTexCoordI2 iSrcSamplePosX01Clamped = FfxTexCoordI2( + __XB_Med3_I32(iSrcSamplePosX01.x, 0, renderSizeLastTexelCoord.x), + __XB_Med3_I32(iSrcSamplePosX01.y, 0, renderSizeLastTexelCoord.x) + ); + + const FfxTexCoordI2 iSrcSamplePosX23Clamped = FfxTexCoordI2( + __XB_Med3_I32(iSrcSamplePosX23.x, 0, renderSizeLastTexelCoord.x), + __XB_Med3_I32(iSrcSamplePosX23.y, 0, renderSizeLastTexelCoord.x) + ); + + const FfxTexCoordI2 iSrcSamplePosY01Clamped = FfxTexCoordI2( + __XB_Med3_I32(iSrcSamplePosY01.x, 0, renderSizeLastTexelCoord.y), + __XB_Med3_I32(iSrcSamplePosY01.y, 0, renderSizeLastTexelCoord.y) + ); + + const FfxTexCoordI2 iSrcSamplePosY23Clamped = FfxTexCoordI2( + __XB_Med3_I32(iSrcSamplePosY23.x, 0, renderSizeLastTexelCoord.y), + __XB_Med3_I32(iSrcSamplePosY23.y, 0, renderSizeLastTexelCoord.y) + ); + + FFX_MIN16_F2 TopCornerR, BotCornerR, HorzR, VertR, CenterR; + FFX_MIN16_F2 TopCornerG, BotCornerG, HorzG, VertG, CenterG; + FFX_MIN16_F2 TopCornerB, BotCornerB, HorzB, VertB, CenterB; + + LoadPreparedColorPairedRgb(TopCornerR, TopCornerG, TopCornerB, + FfxTexCoordI2(iSrcSamplePosX01Clamped.x, iSrcSamplePosY01Clamped.x), + FfxTexCoordI2(iSrcSamplePosX23Clamped.x, iSrcSamplePosY01Clamped.x) + ); + + LoadPreparedColorPairedRgb(BotCornerR, BotCornerG, BotCornerB, + FfxTexCoordI2(iSrcSamplePosX01Clamped.x, iSrcSamplePosY23Clamped.x), + FfxTexCoordI2(iSrcSamplePosX23Clamped.x, iSrcSamplePosY23Clamped.x) + ); + + LoadPreparedColorPairedRgb(HorzR, HorzG, HorzB, + FfxTexCoordI2(iSrcSamplePosX01Clamped.x, iSrcSamplePosY01Clamped.y), + FfxTexCoordI2(iSrcSamplePosX23Clamped.x, iSrcSamplePosY01Clamped.y) + ); + + LoadPreparedColorPairedRgb(VertR, VertG, VertB, + FfxTexCoordI2(iSrcSamplePosX01Clamped.y, iSrcSamplePosY01Clamped.x), + FfxTexCoordI2(iSrcSamplePosX01Clamped.y, iSrcSamplePosY23Clamped.x) + ); + + // NOTE: duplicated data + LoadPreparedColorPairedRgb(CenterR, CenterG, CenterB, + FfxTexCoordI2(iSrcSamplePosX01Clamped.y, iSrcSamplePosY01Clamped.y), + FfxTexCoordI2(iSrcSamplePosX01Clamped.y, iSrcSamplePosY01Clamped.y) + ); + + #if FFX_FSR3UPSCALER_OPTION_HDR_COLOR_INPUT + if (bIsInitialSample) + { + TonemapPaired(TopCornerR, TopCornerG, TopCornerB); + TonemapPaired(BotCornerR, BotCornerG, BotCornerB); + TonemapPaired(HorzR, HorzG, HorzB); + TonemapPaired(VertR, VertG, VertB); + TonemapPaired(CenterR, CenterG, CenterB); + } + #endif + + RGBToYCoCgPaired(TopCornerR, TopCornerG, TopCornerB); + RGBToYCoCgPaired(BotCornerR, BotCornerG, BotCornerB); + RGBToYCoCgPaired(HorzR, HorzG, HorzB); + RGBToYCoCgPaired(VertR, VertG, VertB); + RGBToYCoCgPaired(CenterR, CenterG, CenterB); + +#else + FfxFloat32x3 fSamples[9]; + FfxInt32 iSampleIndex = 0; + + FFX_UNROLL + for (FfxInt32 row = 0; row < 3; row++) { + FFX_UNROLL + for (FfxInt32 col = 0; col < 3; col++) { + const FfxInt32x2 iSampleColRow = FfxInt32x2(bFlipCol ? (3 - col) : col, bFlipRow ? (3 - row) : row); + const FfxInt32x2 iSrcSamplePos = FfxInt32x2(iSrcInputPos) + offsetTL + iSampleColRow; + const FfxInt32x2 iSampleCoord = ClampLoad(iSrcSamplePos, FfxInt32x2(0, 0), FfxInt32x2(RenderSize())); + + fSamples[iSampleIndex] = LoadPreparedColor(iSampleCoord); + + ++iSampleIndex; + } + } + +#if FFX_FSR3UPSCALER_OPTION_HDR_COLOR_INPUT + if (bIsInitialSample) + { + for (iSampleIndex = 0; iSampleIndex < 9; ++iSampleIndex) + { + //YCoCg -> RGB -> Tonemap -> YCoCg (Use RGB tonemapper to avoid color desaturation) + fSamples[iSampleIndex] = RGBToYCoCg(Tonemap(YCoCgToRGB(fSamples[iSampleIndex]))); + } + } +#endif + +#endif // #if FFX_FSR3UPSCALER_USE_XBOX_PAIRED_16BIT_MATH_OPTIMIZATIONS + + // Identify how much of each upsampled color to be used for this frame + const FfxFloat32 fKernelBiasMax = ComputeMaxKernelWeight(params, data); + const FfxFloat32 fKernelBiasMin = ffxMax(1.0f, ((1.0f + fKernelBiasMax) * 0.3f)); + + const FfxFloat32 fKernelBiasWeight = + ffxMin(1.0f - params.fDisocclusion * 0.5f, + ffxMin(1.0f - params.fShadingChange, + ffxSaturate(data.fHistoryWeight * 5.0f) + )); + + const FfxFloat32 fKernelBias = ffxLerp(fKernelBiasMin, fKernelBiasMax, fKernelBiasWeight); + +#if FFX_FSR3UPSCALER_USE_XBOX_PAIRED_16BIT_MATH_OPTIMIZATIONS + // Unroll the loop to load samples on Scarlett to help the shader compiler + const bool coordX0OnScreen = iSrcSamplePosX01.x == iSrcSamplePosX01Clamped.x; + const bool coordX1OnScreen = iSrcSamplePosX01.y == iSrcSamplePosX01Clamped.y; + const bool coordX2OnScreen = iSrcSamplePosX23.x == iSrcSamplePosX23Clamped.x; + + const bool coordY0OnScreen = iSrcSamplePosY01.x == iSrcSamplePosY01Clamped.x; + const bool coordY1OnScreen = iSrcSamplePosY01.y == iSrcSamplePosY01Clamped.y; + const bool coordY2OnScreen = iSrcSamplePosY23.x == iSrcSamplePosY23Clamped.x; + + const FFX_MIN16_F2 fBaseSampleOffsetHalf = FFX_MIN16_F2(fBaseSampleOffset); + + const FFX_MIN16_F2 fSrcSampleOffsetX_02 = fBaseSampleOffsetHalf.xx + fSampleOffsetX02; + const FFX_MIN16_F2 fSrcSampleOffsetY_02 = fBaseSampleOffsetHalf.yy + fSampleOffsetY02; + + const FFX_MIN16_F2 fSrcSampleOffsetXSq_02 = fSrcSampleOffsetX_02 * fSrcSampleOffsetX_02; + const FFX_MIN16_F2 fSrcSampleOffsetYSq_02 = fSrcSampleOffsetY_02 * fSrcSampleOffsetY_02; + const FFX_MIN16_F2 fSrcSampleOffsetXYSq_11 = fBaseSampleOffsetHalf * fBaseSampleOffsetHalf; + + const FfxFloat32 fRectificationCurveBias = -2.3f; + PairedRectificationBoxAndAccumulatedColorAndWeight pairedBox; + pairedBox.setKernelBiasAndRectificationCurveBias(fKernelBias, fRectificationCurveBias); + + // init by o o o + // o x o + // o o o + pairedBox.initBox( + fSrcSampleOffsetXYSq_11.x + fSrcSampleOffsetXYSq_11.y, + Bool2ToFloat16x2(coordX1OnScreen && coordY1OnScreen, false).x, + CenterR, CenterG, CenterB + ); + + // add remaining two samples from 1st row x o x + // o * o + // o o o + pairedBox.addBoxSample( + fSrcSampleOffsetXSq_02 + fSrcSampleOffsetYSq_02.xx, + Bool2ToFloat16x2(coordX0OnScreen && coordY0OnScreen, coordX2OnScreen && coordY0OnScreen), + TopCornerR, TopCornerG, TopCornerB + ); + + // add two samples from 2nd row * o * + // o * o + // x o x + pairedBox.addBoxSample( + fSrcSampleOffsetXSq_02 + fSrcSampleOffsetYSq_02.yy, + Bool2ToFloat16x2(coordX0OnScreen && coordY2OnScreen, coordX2OnScreen && coordY2OnScreen), + BotCornerR, BotCornerG, BotCornerB + ); + + // add two samples from 3rd row * o * + // x * x + // * o * + pairedBox.addBoxSample( + fSrcSampleOffsetXSq_02 + fSrcSampleOffsetXYSq_11.yy, + Bool2ToFloat16x2(coordX0OnScreen && coordY1OnScreen, coordX2OnScreen && coordY1OnScreen), + HorzR, HorzG, HorzB + ); + + // add remaining samples * x * + // * * * + // * x * + pairedBox.addBoxSample( + fSrcSampleOffsetXYSq_11.xx + fSrcSampleOffsetYSq_02, + Bool2ToFloat16x2(coordX1OnScreen && coordY0OnScreen, coordX1OnScreen && coordY2OnScreen), + VertR, VertG, VertB + ); + + FfxFloat32x2 boxCenterAndVecR, boxCenterAndVecG, boxCenterAndVecB; + FfxFloat32 boxCenterWeight; + pairedBox.finalizeBox(boxCenterAndVecR, boxCenterAndVecG, boxCenterAndVecB, boxCenterWeight); + + if (!bIsInitialSample) + { + pairedBox.initUpscaledColor( + fSrcSampleOffsetXYSq_11.x + fSrcSampleOffsetXYSq_11.y, + Bool2ToFloat16x2(coordX1OnScreen && coordY1OnScreen, false).x, + CenterR, CenterG, CenterB + ); + + // add remaining two samples from 1st row x o x + // o * o + // o o o + pairedBox.addUpscaledColorSample( + fSrcSampleOffsetXSq_02 + fSrcSampleOffsetYSq_02.xx, + Bool2ToFloat16x2(coordX0OnScreen && coordY0OnScreen, coordX2OnScreen && coordY0OnScreen), + TopCornerR, TopCornerG, TopCornerB + ); + + // add two samples from 2nd row * o * + // o * o + // x o x + pairedBox.addUpscaledColorSample( + fSrcSampleOffsetXSq_02 + fSrcSampleOffsetYSq_02.yy, + Bool2ToFloat16x2(coordX0OnScreen && coordY2OnScreen, coordX2OnScreen && coordY2OnScreen), + BotCornerR, BotCornerG, BotCornerB + ); + + // add two samples from 3rd row * o * + // x * x + // * o * + pairedBox.addUpscaledColorSample( + fSrcSampleOffsetXSq_02 + fSrcSampleOffsetXYSq_11.yy, + Bool2ToFloat16x2(coordX0OnScreen && coordY1OnScreen, coordX2OnScreen && coordY1OnScreen), + HorzR, HorzG, HorzB + ); + + // add remaining samples * x * + // * * * + // * x * + pairedBox.addUpscaledColorSample( + fSrcSampleOffsetXYSq_11.xx + fSrcSampleOffsetYSq_02, + Bool2ToFloat16x2(coordX1OnScreen && coordY0OnScreen, coordX1OnScreen && coordY2OnScreen), + VertR, VertG, VertB + ); + + FfxFloat32x4 upscaledColorAndWeight = 0.0; + pairedBox.finalizeUpscaledColor(upscaledColorAndWeight); + + data.fUpsampledColor = FfxFloat32x3(upscaledColorAndWeight.rgb); + data.fUpsampledWeight = FfxFloat32(upscaledColorAndWeight.w); + } + + FFX_MIN16_F2 aabbMinMaxR = Compute3x3SamplesMinMaxPaired(CenterR, TopCornerR, BotCornerR, HorzR, VertR); + FFX_MIN16_F2 aabbMinMaxG = Compute3x3SamplesMinMaxPaired(CenterG, TopCornerG, BotCornerG, HorzG, VertG); + FFX_MIN16_F2 aabbMinMaxB = Compute3x3SamplesMinMaxPaired(CenterB, TopCornerB, BotCornerB, HorzB, VertB); + + data.clippingBox.boxCenter = FfxFloat32x3(boxCenterAndVecR.x, boxCenterAndVecG.x, boxCenterAndVecB.x); + data.clippingBox.boxVec = FfxFloat32x3(boxCenterAndVecR.y, boxCenterAndVecG.y, boxCenterAndVecB.y); + data.clippingBox.aabbMin = FfxFloat32x3(aabbMinMaxR.x, aabbMinMaxG.x, aabbMinMaxB.x); + data.clippingBox.aabbMax = FfxFloat32x3(aabbMinMaxR.y, aabbMinMaxG.y, aabbMinMaxB.y); + data.clippingBox.fBoxCenterWeight = FfxFloat32(boxCenterWeight); +#else + + iSampleIndex = 0; + + FFX_UNROLL + for (FfxInt32 row = 0; row < 3; row++) + { + FFX_UNROLL + for (FfxInt32 col = 0; col < 3; col++) + { + const FfxInt32x2 sampleColRow = FfxInt32x2(bFlipCol ? (3 - col) : col, bFlipRow ? (3 - row) : row); + const FfxFloat32x2 fOffset = fOffsetTL + FfxFloat32x2(sampleColRow); + const FfxFloat32x2 fSrcSampleOffset = fBaseSampleOffset + fOffset; + + const FfxInt32x2 iSrcSamplePos = FfxInt32x2(iSrcInputPos) + FfxInt32x2(offsetTL) + sampleColRow; + const FfxFloat32 fOnScreenFactor = FfxFloat32(IsOnScreen(FfxInt32x2(iSrcSamplePos), FfxInt32x2(RenderSize()))); + + if (!bIsInitialSample) + { + const FfxFloat32 fSampleWeight = fOnScreenFactor * FfxFloat32(GetUpsampleLanczosWeight(fSrcSampleOffset, fKernelBias)); + + data.fUpsampledColor += fSamples[iSampleIndex] * fSampleWeight; + data.fUpsampledWeight += fSampleWeight; + } + + // Update rectification box + { + const FfxFloat32 fRectificationCurveBias = -2.3f; + const FfxFloat32 fSrcSampleOffsetSq = dot(fSrcSampleOffset, fSrcSampleOffset); + const FfxFloat32 fBoxSampleWeight = exp(fRectificationCurveBias * fSrcSampleOffsetSq) * fOnScreenFactor; + + const FfxBoolean bInitialSample = (row == 0) && (col == 0); + RectificationBoxAddSample(bInitialSample, data.clippingBox, fSamples[iSampleIndex], fBoxSampleWeight); + } + ++iSampleIndex; + } + } + +#endif // #if FFX_FSR3UPSCALER_USE_XBOX_PAIRED_16BIT_MATH_OPTIMIZATIONS + + RectificationBoxComputeVarianceBoxData(data.clippingBox); + + data.fUpsampledWeight *= FfxFloat32(data.fUpsampledWeight > FSR3UPSCALER_EPSILON); + + if (data.fUpsampledWeight > FSR3UPSCALER_EPSILON) { + // Normalize for deringing (we need to compare colors) + data.fUpsampledColor = data.fUpsampledColor / data.fUpsampledWeight; + data.fUpsampledWeight *= fAverageLanczosWeightPerFrame; + + Deringing(data.clippingBox, data.fUpsampledColor); + } + + // Initial samples using tonemapped upsampling + if (bIsInitialSample) { +#if FFX_FSR3UPSCALER_OPTION_HDR_COLOR_INPUT + data.fUpsampledColor = RGBToYCoCg(InverseTonemap(YCoCgToRGB(data.clippingBox.boxCenter))); +#else + data.fUpsampledColor = data.clippingBox.boxCenter; +#endif + data.fUpsampledWeight = 1.0f; + data.fHistoryWeight = 0.0f; + } +} diff --git a/thirdparty/amd-ffx/gpu/opticalflow/ffx_opticalflow_callbacks_glsl.h b/thirdparty/amd-ffx/gpu/opticalflow/ffx_opticalflow_callbacks_glsl.h new file mode 100644 index 000000000000..69f21ead9daa --- /dev/null +++ b/thirdparty/amd-ffx/gpu/opticalflow/ffx_opticalflow_callbacks_glsl.h @@ -0,0 +1,578 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifndef FFX_OPTICALFLOW_CALLBACKS_HLSL_H +#define FFX_OPTICALFLOW_CALLBACKS_HLSL_H + +#include "ffx_opticalflow_resources.h" + +// no msad4 in glsl +#define FFX_OPTICALFLOW_USE_MSAD4_INSTRUCTION 0 +#define FFX_OPTICALFLOW_FIX_TOP_LEFT_BIAS 1 +#define FFX_OPTICALFLOW_USE_HEURISTICS 1 +#define FFX_OPTICALFLOW_BLOCK_SIZE 8 +#define FFX_LOCAL_SEARCH_FALLBACK 1 + +// perf optimization for h/w not supporting accelerated msad4() +//#if !defined(FFX_PREFER_WAVE64) && defined(FFX_OPTICALFLOW_USE_MSAD4_INSTRUCTION) +//#undef FFX_OPTICALFLOW_USE_MSAD4_INSTRUCTION +//#endif + +#if defined(FFX_GPU) +#include "ffx_core.h" +#endif // #if defined(FFX_GPU) + +#if defined(FFX_GPU) +#ifndef FFX_PREFER_WAVE64 +#define FFX_PREFER_WAVE64 +#endif // #if defined(FFX_GPU) + + +#include "opticalflow/ffx_opticalflow_common.h" + + +#if defined(FFX_OPTICALFLOW_BIND_CB_COMMON) + layout (set = 0, binding = FFX_OPTICALFLOW_BIND_CB_COMMON, std140) uniform cbOF_t + { + FfxInt32x2 iInputLumaResolution; + FfxUInt32 uOpticalFlowPyramidLevel; + FfxUInt32 uOpticalFlowPyramidLevelCount; + + FfxUInt32 iFrameIndex; + FfxUInt32 backbufferTransferFunction; + FfxFloat32x2 minMaxLuminance; + } cbOF; + +FfxInt32x2 DisplaySize() +{ + return cbOF.iInputLumaResolution; +} + +FfxUInt32 FrameIndex() +{ + return cbOF.iFrameIndex; +} + +FfxUInt32 BackbufferTransferFunction() +{ + return cbOF.backbufferTransferFunction; +} + +FfxFloat32x2 MinMaxLuminance() +{ + return cbOF.minMaxLuminance; +} + +FfxBoolean CrossedSceneChangeThreshold(FfxFloat32 sceneChangeValue) +{ + return sceneChangeValue > 0.45f; +} + +FfxUInt32 OpticalFlowPyramidLevel() +{ + return cbOF.uOpticalFlowPyramidLevel; +} + +FfxUInt32 OpticalFlowPyramidLevelCount() +{ + return cbOF.uOpticalFlowPyramidLevelCount; +} + +FfxInt32x2 OpticalFlowHistogramMaxVelocity() +{ + const FfxInt32 searchRadius = 8; + FfxInt32 scale = FfxInt32(1) << (OpticalFlowPyramidLevelCount() - 1 - OpticalFlowPyramidLevel()); + FfxInt32 maxVelocity = searchRadius * scale; + return FfxInt32x2(maxVelocity, maxVelocity); +} +#endif //FFX_OPTICALFLOW_BIND_CB_COMMON + +#if defined(FFX_OPTICALFLOW_BIND_CB_SPD) + layout (set = 0, binding = FFX_OPTICALFLOW_BIND_CB_SPD, std140) uniform cbOF_SPD_t + { + FfxUInt32 mips; + FfxUInt32 numWorkGroups; + FfxUInt32x2 workGroupOffset; + FfxUInt32 numWorkGroupOpticalFlowInputPyramid; + FfxUInt32 pad0_; + FfxUInt32 pad1_; + FfxUInt32 pad2_; + } cbOF_SPD; + +uint NumWorkGroups() +{ + return cbOF_SPD.numWorkGroupOpticalFlowInputPyramid; +} +#endif // defined(FFX_OPTICALFLOW_BIND_CB_SPD) + + #if defined FFX_OPTICALFLOW_BIND_SRV_INPUT_COLOR + layout (set = 0, binding = FFX_OPTICALFLOW_BIND_SRV_INPUT_COLOR) uniform texture2D r_input_color; + #endif + #if defined FFX_OPTICALFLOW_BIND_SRV_INPUT_MOTION_VECTORS + layout (set = 0, binding = FFX_OPTICALFLOW_BIND_SRV_INPUT_MOTION_VECTORS) uniform texture2D r_input_motion_vectors; + #endif + #if defined FFX_OPTICALFLOW_BIND_SRV_OPTICAL_FLOW_INPUT + layout (set = 0, binding = FFX_OPTICALFLOW_BIND_SRV_OPTICAL_FLOW_INPUT) uniform utexture2D r_optical_flow_input; + #endif + #if defined FFX_OPTICALFLOW_BIND_SRV_OPTICAL_FLOW_PREVIOUS_INPUT + layout (set = 0, binding = FFX_OPTICALFLOW_BIND_SRV_OPTICAL_FLOW_PREVIOUS_INPUT) uniform utexture2D r_optical_flow_previous_input; + #endif + #if defined FFX_OPTICALFLOW_BIND_SRV_OPTICAL_FLOW + layout (set = 0, binding = FFX_OPTICALFLOW_BIND_SRV_OPTICAL_FLOW) uniform itexture2D r_optical_flow; + #endif + #if defined FFX_OPTICALFLOW_BIND_SRV_OPTICAL_FLOW_PREVIOUS + layout (set = 0, binding = FFX_OPTICALFLOW_BIND_SRV_OPTICAL_FLOW_PREVIOUS) uniform itexture2D r_optical_flow_previous; + #endif + #if defined FFX_OPTICALFLOW_BIND_SRV_OPTICAL_FLOW_ADDITIONAL_INFO + layout (set = 0, binding = FFX_OPTICALFLOW_BIND_SRV_OPTICAL_FLOW_ADDITIONAL_INFO) uniform utexture2D r_optical_flow_additional_info; + #endif + #if defined FFX_OPTICALFLOW_BIND_SRV_OPTICAL_FLOW_ADDITIONAL_INFO_PREVIOUS + layout (set = 0, binding = FFX_OPTICALFLOW_BIND_SRV_OPTICAL_FLOW_ADDITIONAL_INFO_PREVIOUS) uniform utexture2D r_optical_flow_additional_info_previous; + #endif + #if defined FFX_OPTICALFLOW_BIND_SRV_OPTICAL_FLOW_HISTOGRAM + layout (set = 0, binding = FFX_OPTICALFLOW_BIND_SRV_OPTICAL_FLOW_HISTOGRAM) uniform utexture2D r_optical_flow_histogram; + #endif + #if defined FFX_OPTICALFLOW_BIND_SRV_OPTICAL_FLOW_GLOBAL_MOTION_SEARCH + layout (set = 0, binding = FFX_OPTICALFLOW_BIND_SRV_OPTICAL_FLOW_GLOBAL_MOTION_SEARCH) uniform utexture2D r_optical_flow_global_motion_search; + #endif + + // UAV declarations + #if defined FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_INPUT + layout (set = 0, binding = FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_INPUT, r8ui) uniform uimage2D rw_optical_flow_input; + #endif + #if defined FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_INPUT_LEVEL_1 + layout (set = 0, binding = FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_INPUT_LEVEL_1, r8ui) uniform uimage2D rw_optical_flow_input_level_1; + #endif + #if defined FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_INPUT_LEVEL_2 + layout (set = 0, binding = FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_INPUT_LEVEL_2, r8ui) uniform uimage2D rw_optical_flow_input_level_2; + #endif + #if defined FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_INPUT_LEVEL_3 + layout (set = 0, binding = FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_INPUT_LEVEL_3, r8ui) uniform uimage2D rw_optical_flow_input_level_3; + #endif + #if defined FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_INPUT_LEVEL_4 + layout (set = 0, binding = FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_INPUT_LEVEL_4, r8ui) uniform uimage2D rw_optical_flow_input_level_4; + #endif + #if defined FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_INPUT_LEVEL_5 + layout (set = 0, binding = FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_INPUT_LEVEL_5, r8ui) uniform uimage2D rw_optical_flow_input_level_5; + #endif + #if defined FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_INPUT_LEVEL_6 + layout (set = 0, binding = FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_INPUT_LEVEL_6, r8ui) uniform uimage2D rw_optical_flow_input_level_6; + #endif + #if defined FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW + layout (set = 0, binding = FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW, rg16i) uniform iimage2D rw_optical_flow; + #endif + #if defined FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_NEXT_LEVEL + layout (set = 0, binding = FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_NEXT_LEVEL, rg16i) uniform iimage2D rw_optical_flow_next_level; + #endif + //#if defined FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_ADDITIONAL_INFO + // RWTexture2D rw_optical_flow_additional_info : FFX_OPTICALFLOW_DECLARE_UAV(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_ADDITIONAL_INFO); + // layout (set = 0, binding = FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_ADDITIONAL_INFO, r32ui) uniform uimage2D rw_optical_flow_additional_info; + //#endif + //#if defined FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_ADDITIONAL_INFO_NEXT_LEVEL + // RWTexture2D rw_optical_flow_additional_info_next_level : FFX_OPTICALFLOW_DECLARE_UAV(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_ADDITIONAL_INFO_NEXT_LEVEL); + // layout (set = 0, binding = FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_ADDITIONAL_INFO_NEXT_LEVEL, r32ui) uniform uimage2D rw_optical_flow_additional_info_next_level; + //#endif + //#if defined FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_HISTOGRAM + // RWTexture2D rw_optical_flow_histogram : FFX_OPTICALFLOW_DECLARE_UAV(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_HISTOGRAM); + // layout (set = 0, binding = FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_HISTOGRAM, r32ui) uniform uimage2D rw_optical_flow_histogram; + //#endif + //#if defined FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_GLOBAL_MOTION_SEARCH + // globallycoherent RWTexture2D rw_optical_flow_global_motion_search: FFX_OPTICALFLOW_DECLARE_UAV(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_GLOBAL_MOTION_SEARCH); + // layout (set = 0, binding = FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_GLOBAL_MOTION_SEARCH, r32ui) coherent uniform uimage2D rw_optical_flow_global_motion_search; + //#endif + #if defined FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_SCD_HISTOGRAM + layout (set = 0, binding = FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_SCD_HISTOGRAM, r32ui) uniform uimage2D rw_optical_flow_scd_histogram; + #endif + #if defined FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_SCD_PREVIOUS_HISTOGRAM + layout (set = 0, binding = FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_SCD_PREVIOUS_HISTOGRAM, r32f) uniform image2D rw_optical_flow_scd_previous_histogram; + #endif + #if defined FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_SCD_TEMP + layout (set = 0, binding = FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_SCD_TEMP, r32ui) uniform uimage2D rw_optical_flow_scd_temp; + #endif + #if defined FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_SCD_OUTPUT + layout (set = 0, binding = FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_SCD_OUTPUT, r32ui) uniform uimage2D rw_optical_flow_scd_output; + #endif + +#if defined(FFX_OPTICALFLOW_BIND_SRV_INPUT_COLOR) +FfxFloat32x4 LoadInputColor(FfxUInt32x2 iPxHistory) +{ + return texelFetch(r_input_color, FfxInt32x2(iPxHistory), 0); +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_SRV_INPUT_MOTION_VECTORS) +FfxFloat32x2 LoadGameMotionVector(FfxInt32x2 iPxPos) +{ + FfxFloat32x2 positionScale = FfxFloat32x2(RenderSize()) / DisplaySize(); + return texelFetch(r_input_motion_vectors, FfxInt32x2(iPxPos * positionScale), 0).xy * motionVectorScale / positionScale; +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_INPUT) +void StoreOpticalFlowInput(FfxInt32x2 iPxPos, FfxUInt32 fLuma) +{ + imageStore(rw_optical_flow_input, iPxPos, FfxUInt32x4(fLuma, 0, 0, 0)); +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_SRV_OPTICAL_FLOW_INPUT) +FfxUInt32 LoadOpticalFlowInput(FfxInt32x2 iPxPos) +{ +#if FFX_OPTICALFLOW_USE_MSAD4_INSTRUCTION == 1 + return max(1, texelFetch(r_optical_flow_input, iPxPos, 0).x); +#else + return texelFetch(r_optical_flow_input, iPxPos, 0).x; +#endif +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_INPUT) +FfxUInt32 LoadRwOpticalFlowInput(FfxInt32x2 iPxPos) +{ + return imageLoad(rw_optical_flow_input, iPxPos).x; +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_SRV_OPTICAL_FLOW_PREVIOUS_INPUT) +FfxUInt32 LoadOpticalFlowPreviousInput(FfxInt32x2 iPxPos) +{ +#if FFX_OPTICALFLOW_USE_MSAD4_INSTRUCTION == 1 + return max(1, texelFetch(r_optical_flow_previous_input, iPxPos, 0).x); +#else + return texelFetch(r_optical_flow_previous_input, iPxPos, 0).x; +#endif +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_SRV_OPTICAL_FLOW) +FfxInt32x2 LoadOpticalFlow(FfxInt32x2 iPxPos) +{ + return texelFetch(r_optical_flow, iPxPos, 0).xy; +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW) +FfxInt32x2 LoadRwOpticalFlow(FfxInt32x2 iPxPos) +{ + return imageLoad(rw_optical_flow, iPxPos).xy; +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_SRV_OPTICAL_FLOW_PREVIOUS) +FfxInt32x2 LoadPreviousOpticalFlow(FfxInt32x2 iPxPos) +{ + return texelFetch(r_optical_flow_previous, iPxPos, 0).xy; +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW) +void StoreOpticalFlow(FfxInt32x2 iPxPos, FfxInt32x2 motionVector) +{ + imageStore(rw_optical_flow, iPxPos, FfxInt32x4(motionVector, 0, 0)); +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_NEXT_LEVEL) +void StoreOpticalFlowNextLevel(FfxInt32x2 iPxPos, FfxInt32x2 motionVector) +{ + imageStore(rw_optical_flow_next_level, iPxPos, FfxInt32x4(motionVector, 0, 0)); +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_SRV_OPTICAL_FLOW_ADDITIONAL_INFO) +FfxUInt32x2 LoadOpticalFlowAdditionalInfo(FfxInt32x2 iPxPos) +{ + return texelFetch(r_optical_flow_additional_info, iPxPos, 0).xy; +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_ADDITIONAL_INFO) +FfxUInt32x2 LoadRwOpticalFlowAdditionalInfo(FfxInt32x2 iPxPos) +{ + return imageLoad(rw_optical_flow_additional_info, iPxPos).xy; +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_SRV_OPTICAL_FLOW_ADDITIONAL_INFO_PREVIOUS) +FfxUInt32x2 LoadPreviousOpticalFlowAdditionalInfo(FfxInt32x2 iPxPos) +{ + return texelFetch(r_optical_flow_additional_info_previous, iPxPos, 0).xy; +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_ADDITIONAL_INFO) +void StoreOpticalFlowAdditionalInfo(FfxInt32x2 iPxPos, FfxUInt32x2 additionalInfo) +{ + imageStore(rw_optical_flow_additional_info, iPxPos, FfxUInt32x4(additionalInfo, 0, 0)); +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_ADDITIONAL_INFO_NEXT_LEVEL) +void StoreOpticalFlowNextLevelAdditionalInfo(FfxInt32x2 iPxPos, FfxUInt32x2 additionalInfo) +{ + imageStore(rw_optical_flow_additional_info_next_level, iPxPos, FfxUInt32x4(additionalInfo, 0, 0)); +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_SRV_OPTICAL_FLOW_HISTOGRAM) +FfxUInt32 LoadOpticalFlowHistogram(FfxInt32x2 iBucketId) +{ + return texelFetch(r_optical_flow_histogram, iBucketId, 0).x; +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_HISTOGRAM) +void AtomicIncrementOpticalFlowHistogram(FfxInt32x2 iBucketId) +{ + imageAtomicAdd(rw_optical_flow_histogram, iBucketId, 1); +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_SRV_OPTICAL_FLOW_GLOBAL_MOTION_SEARCH) +FfxInt32x2 LoadGlobalMotionVector() +{ + FfxInt32 vx = FfxInt32(texelFetch(r_optical_flow_global_motion_search, FfxInt32x2(0, 0), 0).x); + FfxInt32 vy = FfxInt32(texelFetch(r_optical_flow_global_motion_search, FfxInt32x2(1, 0), 0).x); + return FfxInt32x2(vx, vy); +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_GLOBAL_MOTION_SEARCH) +FfxInt32x2 LoadRwGlobalMotionVector() +{ + FfxInt32 vx = FfxInt32(imageLoad(rw_optical_flow_global_motion_search, FfxInt32x2(0, 0)).x); + FfxInt32 vy = FfxInt32(imageLoad(rw_optical_flow_global_motion_search, FfxInt32x2(1, 0)).x); + return FfxInt32x2(vx, vy); +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_GLOBAL_MOTION_SEARCH) +FfxUInt32 LoadGlobalMotionValue(FfxInt32 index) +{ + return imageLoad(rw_optical_flow_global_motion_search, FfxInt32x2(index, 0)).x; +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_GLOBAL_MOTION_SEARCH) +void StoreGlobalMotionValue(FfxInt32 index, FfxUInt32 value) +{ + imageStore(rw_optical_flow_global_motion_search, FfxInt32x2(index, 0), FfxUInt32x4(value, 0, 0, 0)); +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_GLOBAL_MOTION_SEARCH) +FfxUInt32 AtomicIncrementGlobalMotionValue(FfxInt32 index) +{ + return imageAtomicAdd(rw_optical_flow_global_motion_search, FfxInt32x2(index, 0), 1); +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_SCD_HISTOGRAM) +FfxUInt32 LoadRwSCDHistogram(FfxInt32 iIndex) +{ + return imageLoad(rw_optical_flow_scd_histogram, FfxInt32x2(iIndex, 0)).x; +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_SCD_HISTOGRAM) +void StoreSCDHistogram(FfxInt32 iIndex, FfxUInt32 value) +{ + imageStore(rw_optical_flow_scd_histogram, FfxInt32x2(iIndex, 0), FfxUInt32x4(value, 0, 0, 0)); +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_SCD_HISTOGRAM) +void AtomicIncrementSCDHistogram(FfxInt32 iIndex, FfxUInt32 valueToAdd) +{ + imageAtomicAdd(rw_optical_flow_scd_histogram, FfxInt32x2(iIndex, 0), valueToAdd); +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_SCD_PREVIOUS_HISTOGRAM) +FfxFloat32 LoadRwSCDPreviousHistogram(FfxInt32 iIndex) +{ + return imageLoad(rw_optical_flow_scd_previous_histogram, FfxInt32x2(iIndex, 0)).x; +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_SCD_PREVIOUS_HISTOGRAM) +void StoreSCDPreviousHistogram(FfxInt32 iIndex, FfxFloat32 value) +{ + imageStore(rw_optical_flow_scd_previous_histogram, FfxInt32x2(iIndex, 0), FfxFloat32x4(value, 0.0, 0.0, 0.0)); +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_SCD_TEMP) +FfxUInt32 LoadRwSCDTemp(FfxInt32 iIndex) +{ + return imageLoad(rw_optical_flow_scd_temp, FfxInt32x2(iIndex, 0)).x; +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_SCD_TEMP) +void AtomicIncrementSCDTemp(FfxInt32 iIndex, FfxUInt32 valueToAdd) +{ + imageAtomicAdd(rw_optical_flow_scd_temp, FfxInt32x2(iIndex, 0), valueToAdd); +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_SCD_TEMP) +void ResetSCDTemp() +{ + imageStore(rw_optical_flow_scd_temp, FfxInt32x2(0, 0), FfxUInt32x4(0, 0, 0, 0)); + imageStore(rw_optical_flow_scd_temp, FfxInt32x2(1, 0), FfxUInt32x4(0, 0, 0, 0)); + imageStore(rw_optical_flow_scd_temp, FfxInt32x2(2, 0), FfxUInt32x4(0, 0, 0, 0)); +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_SCD_OUTPUT) +FfxUInt32 LoadRwSCDOutput(FfxInt32 iIndex) +{ + return imageLoad(rw_optical_flow_scd_output, FfxInt32x2(iIndex, 0)).x; +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_SCD_OUTPUT) +void StoreSCDOutput(FfxInt32 iIndex, FfxUInt32 value) +{ + imageStore(rw_optical_flow_scd_output, FfxInt32x2(iIndex, 0), FfxUInt32x4(value, 0, 0, 0)); +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_SCD_OUTPUT) +FfxUInt32 AtomicIncrementSCDOutput(FfxInt32 iIndex, FfxUInt32 valueToAdd) +{ + return imageAtomicAdd(rw_optical_flow_scd_output, FfxInt32x2(iIndex, 0), valueToAdd); +} +#endif + +//#if defined(FFX_OPTICALFLOW_BIND_UAV_DEBUG_VISUALIZATION) +//void StoreDebugVisualization(FfxUInt32x2 iPxPos, FfxFloat32x3 fColor) +//{ +// imageStore(rw_debug_visualization, iPxPos, FfxFloat32x4(fColor, 1.f)); +//} +//#endif + +#if defined(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_SCD_OUTPUT) +FfxFloat32 GetSceneChangeValue() +{ + if (FrameIndex() <= 5) + return 1.0; + else + return ffxAsFloat(LoadRwSCDOutput(SCD_OUTPUT_SCENE_CHANGE_SLOT)); +} + +FfxBoolean IsSceneChanged() +{ + if (FrameIndex() <= 5) + { + return true; + } + else + { + return (LoadRwSCDOutput(SCD_OUTPUT_HISTORY_BITS_SLOT) & 0xfu) != 0; + } +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_SRV_OPTICAL_FLOW_INPUT) +FfxUInt32 LoadFirstImagePackedLuma(FfxInt32x2 iPxPos) +{ + const FfxInt32 lumaTextureWidth = DisplaySize().x >> OpticalFlowPyramidLevel(); + const FfxInt32 lumaTextureHeight = DisplaySize().y >> OpticalFlowPyramidLevel(); + + FfxInt32x2 adjustedPos = FfxInt32x2( + ffxClamp(iPxPos.x, 0, lumaTextureWidth - 4), + ffxClamp(iPxPos.y, 0, lumaTextureHeight - 1) + ); + + FfxUInt32 luma0 = LoadOpticalFlowInput(adjustedPos + FfxInt32x2(0, 0)); + FfxUInt32 luma1 = LoadOpticalFlowInput(adjustedPos + FfxInt32x2(1, 0)); + FfxUInt32 luma2 = LoadOpticalFlowInput(adjustedPos + FfxInt32x2(2, 0)); + FfxUInt32 luma3 = LoadOpticalFlowInput(adjustedPos + FfxInt32x2(3, 0)); + + return GetPackedLuma(lumaTextureWidth, iPxPos.x, luma0, luma1, luma2, luma3); +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_SRV_OPTICAL_FLOW_PREVIOUS_INPUT) +FfxUInt32 LoadSecondImagePackedLuma(FfxInt32x2 iPxPos) +{ + const FfxInt32 lumaTextureWidth = DisplaySize().x >> OpticalFlowPyramidLevel(); + const FfxInt32 lumaTextureHeight = DisplaySize().y >> OpticalFlowPyramidLevel(); + + FfxInt32x2 adjustedPos = FfxInt32x2( + ffxClamp(iPxPos.x, 0, lumaTextureWidth - 4), + ffxClamp(iPxPos.y, 0, lumaTextureHeight - 1) + ); + + FfxUInt32 luma0 = LoadOpticalFlowPreviousInput(adjustedPos + FfxInt32x2(0, 0)); + FfxUInt32 luma1 = LoadOpticalFlowPreviousInput(adjustedPos + FfxInt32x2(1, 0)); + FfxUInt32 luma2 = LoadOpticalFlowPreviousInput(adjustedPos + FfxInt32x2(2, 0)); + FfxUInt32 luma3 = LoadOpticalFlowPreviousInput(adjustedPos + FfxInt32x2(3, 0)); + + return GetPackedLuma(lumaTextureWidth, iPxPos.x, luma0, luma1, luma2, luma3); +} +#endif + + +void SPD_SetMipmap(FfxInt32x2 iPxPos, FfxUInt32 index, FfxFloat32 value) +{ + FfxUInt32x4 value4 = FfxUInt32x4(value, 0.0, 0.0, 0.0); + switch (index) + { + case 0: +#if defined FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_INPUT_LEVEL_1 + imageStore(rw_optical_flow_input_level_1, iPxPos, value4); +#endif // #if defined FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_INPUT_LEVEL_1 + break; + case 1: +#if defined FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_INPUT_LEVEL_2 + imageStore(rw_optical_flow_input_level_2, iPxPos, value4); +#endif // #if defined FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_INPUT_LEVEL_2 + break; + case 2: +#if defined FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_INPUT_LEVEL_3 + imageStore(rw_optical_flow_input_level_3, iPxPos, value4); +#endif // #if defined FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_INPUT_LEVEL_3 + break; + case 3: +#if defined FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_INPUT_LEVEL_4 + imageStore(rw_optical_flow_input_level_4, iPxPos, value4); +#endif // #if defined FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_INPUT_LEVEL_4 + break; + case 4: +#if defined FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_INPUT_LEVEL_5 + imageStore(rw_optical_flow_input_level_5, iPxPos, value4); +#endif // #if defined FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_INPUT_LEVEL_5 + break; + case 5: +#if defined FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_INPUT_LEVEL_6 + imageStore(rw_optical_flow_input_level_6, iPxPos, value4); +#endif // #if defined FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_INPUT_LEVEL_6 + break; + } +} + +#endif // #if defined(FFX_GPU) + +#endif // FFX_OPTICALFLOW_CALLBACKS_HLSL_H diff --git a/thirdparty/amd-ffx/gpu/opticalflow/ffx_opticalflow_callbacks_hlsl.h b/thirdparty/amd-ffx/gpu/opticalflow/ffx_opticalflow_callbacks_hlsl.h new file mode 100644 index 000000000000..bd1d48493a85 --- /dev/null +++ b/thirdparty/amd-ffx/gpu/opticalflow/ffx_opticalflow_callbacks_hlsl.h @@ -0,0 +1,634 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifndef FFX_OPTICALFLOW_CALLBACKS_HLSL_H +#define FFX_OPTICALFLOW_CALLBACKS_HLSL_H + +#if defined(FFX_GPU) +#ifdef __hlsl_dx_compiler +#pragma dxc diagnostic push +#pragma dxc diagnostic ignored "-Wambig-lit-shift" +#endif //__hlsl_dx_compiler +#include "ffx_core.h" +#ifdef __hlsl_dx_compiler +#pragma dxc diagnostic pop +#endif //__hlsl_dx_compiler + +#define FFX_OPTICALFLOW_USE_MSAD4_INSTRUCTION 1 +#define FFX_OPTICALFLOW_FIX_TOP_LEFT_BIAS 1 +#define FFX_OPTICALFLOW_USE_HEURISTICS 1 +#define FFX_OPTICALFLOW_BLOCK_SIZE 8 +#define FFX_LOCAL_SEARCH_FALLBACK 1 + +// perf optimization for h/w not supporting accelerated msad4() +#if !defined(FFX_PREFER_WAVE64) && defined(FFX_OPTICALFLOW_USE_MSAD4_INSTRUCTION) +#undef FFX_OPTICALFLOW_USE_MSAD4_INSTRUCTION +#endif + +#include "opticalflow/ffx_opticalflow_common.h" + +#ifndef FFX_PREFER_WAVE64 +#define FFX_PREFER_WAVE64 +#endif + + +#pragma warning(disable: 3205) // conversion from larger type to smaller + +#define DECLARE_SRV_REGISTER(regIndex) t##regIndex +#define DECLARE_UAV_REGISTER(regIndex) u##regIndex +#define DECLARE_CB_REGISTER(regIndex) b##regIndex +#define FFX_OPTICALFLOW_DECLARE_SRV(regIndex) register(DECLARE_SRV_REGISTER(regIndex)) +#define FFX_OPTICALFLOW_DECLARE_UAV(regIndex) register(DECLARE_UAV_REGISTER(regIndex)) +#define FFX_OPTICALFLOW_DECLARE_CB(regIndex) register(DECLARE_CB_REGISTER(regIndex)) + +#if defined(FFX_OPTICALFLOW_BIND_CB_COMMON) + cbuffer cbOF : FFX_OPTICALFLOW_DECLARE_CB(FFX_OPTICALFLOW_BIND_CB_COMMON) + { + FfxInt32x2 iInputLumaResolution; + FfxUInt32 uOpticalFlowPyramidLevel; + FfxUInt32 uOpticalFlowPyramidLevelCount; + + FfxUInt32 iFrameIndex; + FfxUInt32 backbufferTransferFunction; + FfxFloat32x2 minMaxLuminance; + }; +#define FFX_OPTICALFLOW_CONSTANT_BUFFER_1_SIZE 8 + +#endif //FFX_OPTICALFLOW_BIND_CB_COMMON + +#if defined(FFX_OPTICALFLOW_BIND_CB_SPD) +cbuffer cbOF_SPD : FFX_OPTICALFLOW_DECLARE_CB(FFX_OPTICALFLOW_BIND_CB_SPD) { + + FfxUInt32 mips; + FfxUInt32 numWorkGroups; + FfxUInt32x2 workGroupOffset; + FfxUInt32 numWorkGroupOpticalFlowInputPyramid; + FfxUInt32 pad0_; + FfxUInt32 pad1_; + FfxUInt32 pad2_; +}; + +FfxUInt32 NumWorkGroups() +{ + return numWorkGroupOpticalFlowInputPyramid; +} +#endif //FFX_OPTICALFLOW_BIND_CB_SPD + +#define FFX_OPTICALFLOW_CONSTANT_BUFFER_2_SIZE 8 + +#define FFX_OPTICALFLOW_DESCRIPTOR_COUNT 32 + +#define FFX_OPTICALFLOW_ROOTSIG_STRINGIFY(p) FFX_OPTICALFLOW_ROOTSIG_STR(p) +#define FFX_OPTICALFLOW_ROOTSIG_STR(p) #p +#define FFX_OPTICALFLOW_ROOTSIG [RootSignature( "DescriptorTable(UAV(u0, numDescriptors = " FFX_OPTICALFLOW_ROOTSIG_STRINGIFY(FFX_OPTICALFLOW_DESCRIPTOR_COUNT) ")), " \ + "DescriptorTable(SRV(t0, numDescriptors = " FFX_OPTICALFLOW_ROOTSIG_STRINGIFY(FFX_OPTICALFLOW_DESCRIPTOR_COUNT) ")), " \ + "CBV(b0), " \ + "StaticSampler(s0, filter = FILTER_MIN_MAG_MIP_POINT, " \ + "addressU = TEXTURE_ADDRESS_CLAMP, " \ + "addressV = TEXTURE_ADDRESS_CLAMP, " \ + "addressW = TEXTURE_ADDRESS_CLAMP, " \ + "comparisonFunc = COMPARISON_NEVER, " \ + "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK), " \ + "StaticSampler(s1, filter = FILTER_MIN_MAG_MIP_LINEAR, " \ + "addressU = TEXTURE_ADDRESS_CLAMP, " \ + "addressV = TEXTURE_ADDRESS_CLAMP, " \ + "addressW = TEXTURE_ADDRESS_CLAMP, " \ + "comparisonFunc = COMPARISON_NEVER, " \ + "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK)" )] + +#define FFX_OPTICALFLOW_CB2_ROOTSIG [RootSignature( "DescriptorTable(UAV(u0, numDescriptors = " FFX_OPTICALFLOW_ROOTSIG_STRINGIFY(FFX_OPTICALFLOW_DESCRIPTOR_COUNT) ")), " \ + "DescriptorTable(SRV(t0, numDescriptors = " FFX_OPTICALFLOW_ROOTSIG_STRINGIFY(FFX_OPTICALFLOW_DESCRIPTOR_COUNT) ")), " \ + "CBV(b0), " \ + "StaticSampler(s0, filter = FILTER_MIN_MAG_MIP_POINT, " \ + "addressU = TEXTURE_ADDRESS_CLAMP, " \ + "addressV = TEXTURE_ADDRESS_CLAMP, " \ + "addressW = TEXTURE_ADDRESS_CLAMP, " \ + "comparisonFunc = COMPARISON_NEVER, " \ + "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK), " \ + "StaticSampler(s1, filter = FILTER_MIN_MAG_MIP_LINEAR, " \ + "addressU = TEXTURE_ADDRESS_CLAMP, " \ + "addressV = TEXTURE_ADDRESS_CLAMP, " \ + "addressW = TEXTURE_ADDRESS_CLAMP, " \ + "comparisonFunc = COMPARISON_NEVER, " \ + "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK)" )] +#if defined(FFX_OPTICALFLOW_EMBED_ROOTSIG) +#define FFX_OPTICALFLOW_EMBED_ROOTSIG_CONTENT FFX_OPTICALFLOW_ROOTSIG +#define FFX_OPTICALFLOW_EMBED_CB2_ROOTSIG_CONTENT FFX_OPTICALFLOW_CB2_ROOTSIG +#else +#define FFX_OPTICALFLOW_EMBED_ROOTSIG_CONTENT +#define FFX_OPTICALFLOW_EMBED_CB2_ROOTSIG_CONTENT +#endif // #if FFX_OPTICALFLOW_EMBED_ROOTSIG + +FfxInt32x2 DisplaySize() +{ + return iInputLumaResolution; +} + +FfxUInt32 FrameIndex() +{ + return iFrameIndex; +} + +FfxUInt32 BackbufferTransferFunction() +{ + return backbufferTransferFunction; +} + +FfxFloat32x2 MinMaxLuminance() +{ + return minMaxLuminance; +} + +FfxBoolean CrossedSceneChangeThreshold(FfxFloat32 sceneChangeValue) +{ + return sceneChangeValue > 0.45f; +} + +FfxUInt32 OpticalFlowPyramidLevel() +{ + return uOpticalFlowPyramidLevel; +} + +FfxUInt32 OpticalFlowPyramidLevelCount() +{ + return uOpticalFlowPyramidLevelCount; +} + +FfxInt32x2 OpticalFlowHistogramMaxVelocity() +{ + const FfxInt32 searchRadius = 8; + FfxInt32 scale = FfxInt32(1) << (OpticalFlowPyramidLevelCount() - 1 - OpticalFlowPyramidLevel()); + FfxInt32 maxVelocity = searchRadius * scale; + return FfxInt32x2(maxVelocity, maxVelocity); +} + + #if defined FFX_OPTICALFLOW_BIND_SRV_INPUT_COLOR + Texture2D r_input_color : FFX_OPTICALFLOW_DECLARE_SRV(FFX_OPTICALFLOW_BIND_SRV_INPUT_COLOR); + #endif + #if defined FFX_OPTICALFLOW_BIND_SRV_INPUT_MOTION_VECTORS + Texture2D r_input_motion_vectors : FFX_OPTICALFLOW_DECLARE_SRV(FFX_OPTICALFLOW_BIND_SRV_INPUT_MOTION_VECTORS); + #endif + #if defined FFX_OPTICALFLOW_BIND_SRV_OPTICAL_FLOW_INPUT + Texture2D r_optical_flow_input : FFX_OPTICALFLOW_DECLARE_SRV(FFX_OPTICALFLOW_BIND_SRV_OPTICAL_FLOW_INPUT); + #endif + #if defined FFX_OPTICALFLOW_BIND_SRV_OPTICAL_FLOW_PREVIOUS_INPUT + Texture2D r_optical_flow_previous_input : FFX_OPTICALFLOW_DECLARE_SRV(FFX_OPTICALFLOW_BIND_SRV_OPTICAL_FLOW_PREVIOUS_INPUT); + #endif + #if defined FFX_OPTICALFLOW_BIND_SRV_OPTICAL_FLOW + Texture2D r_optical_flow : FFX_OPTICALFLOW_DECLARE_SRV(FFX_OPTICALFLOW_BIND_SRV_OPTICAL_FLOW); + #endif + #if defined FFX_OPTICALFLOW_BIND_SRV_OPTICAL_FLOW_PREVIOUS + Texture2D r_optical_flow_previous : FFX_OPTICALFLOW_DECLARE_SRV(FFX_OPTICALFLOW_BIND_SRV_OPTICAL_FLOW_PREVIOUS); + #endif + #if defined FFX_OPTICALFLOW_BIND_SRV_OPTICAL_FLOW_ADDITIONAL_INFO + Texture2D r_optical_flow_additional_info : FFX_OPTICALFLOW_DECLARE_SRV(FFX_OPTICALFLOW_BIND_SRV_OPTICAL_FLOW_ADDITIONAL_INFO); + #endif + #if defined FFX_OPTICALFLOW_BIND_SRV_OPTICAL_FLOW_ADDITIONAL_INFO_PREVIOUS + Texture2D r_optical_flow_additional_info_previous : FFX_OPTICALFLOW_DECLARE_SRV(FFX_OPTICALFLOW_BIND_SRV_OPTICAL_FLOW_ADDITIONAL_INFO_PREVIOUS); + #endif + #if defined FFX_OPTICALFLOW_BIND_SRV_OPTICAL_FLOW_HISTOGRAM + Texture2D r_optical_flow_histogram : FFX_OPTICALFLOW_DECLARE_SRV(FFX_OPTICALFLOW_BIND_SRV_OPTICAL_FLOW_HISTOGRAM); + #endif + #if defined FFX_OPTICALFLOW_BIND_SRV_OPTICAL_FLOW_GLOBAL_MOTION_SEARCH + Texture2D r_optical_flow_global_motion_search : FFX_OPTICALFLOW_DECLARE_SRV(FFX_OPTICALFLOW_BIND_SRV_OPTICAL_FLOW_GLOBAL_MOTION_SEARCH); + #endif + + // UAV declarations + #if defined FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_INPUT + RWTexture2D rw_optical_flow_input : FFX_OPTICALFLOW_DECLARE_UAV(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_INPUT); + #endif + #if defined FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_INPUT_LEVEL_1 + globallycoherent RWTexture2D rw_optical_flow_input_level_1 : FFX_OPTICALFLOW_DECLARE_UAV(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_INPUT_LEVEL_1); + #endif + #if defined FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_INPUT_LEVEL_2 + globallycoherent RWTexture2D rw_optical_flow_input_level_2 : FFX_OPTICALFLOW_DECLARE_UAV(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_INPUT_LEVEL_2); + #endif + #if defined FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_INPUT_LEVEL_3 + globallycoherent RWTexture2D rw_optical_flow_input_level_3 : FFX_OPTICALFLOW_DECLARE_UAV(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_INPUT_LEVEL_3); + #endif + #if defined FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_INPUT_LEVEL_4 + globallycoherent RWTexture2D rw_optical_flow_input_level_4 : FFX_OPTICALFLOW_DECLARE_UAV(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_INPUT_LEVEL_4); + #endif + #if defined FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_INPUT_LEVEL_5 + globallycoherent RWTexture2D rw_optical_flow_input_level_5 : FFX_OPTICALFLOW_DECLARE_UAV(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_INPUT_LEVEL_5); + #endif + #if defined FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_INPUT_LEVEL_6 + globallycoherent RWTexture2D rw_optical_flow_input_level_6 : FFX_OPTICALFLOW_DECLARE_UAV(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_INPUT_LEVEL_6); + #endif + #if defined FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW + RWTexture2D rw_optical_flow : FFX_OPTICALFLOW_DECLARE_UAV(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW); + #endif + #if defined FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_NEXT_LEVEL + RWTexture2D rw_optical_flow_next_level : FFX_OPTICALFLOW_DECLARE_UAV(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_NEXT_LEVEL); + #endif + #if defined FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_ADDITIONAL_INFO + RWTexture2D rw_optical_flow_additional_info : FFX_OPTICALFLOW_DECLARE_UAV(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_ADDITIONAL_INFO); + #endif + #if defined FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_ADDITIONAL_INFO_NEXT_LEVEL + RWTexture2D rw_optical_flow_additional_info_next_level : FFX_OPTICALFLOW_DECLARE_UAV(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_ADDITIONAL_INFO_NEXT_LEVEL); + #endif + #if defined FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_HISTOGRAM + RWTexture2D rw_optical_flow_histogram : FFX_OPTICALFLOW_DECLARE_UAV(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_HISTOGRAM); + #endif + #if defined FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_GLOBAL_MOTION_SEARCH + globallycoherent RWTexture2D rw_optical_flow_global_motion_search: FFX_OPTICALFLOW_DECLARE_UAV(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_GLOBAL_MOTION_SEARCH); + #endif + #if defined FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_SCD_HISTOGRAM + RWTexture2D rw_optical_flow_scd_histogram : FFX_OPTICALFLOW_DECLARE_UAV(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_SCD_HISTOGRAM); + #endif + #if defined FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_SCD_PREVIOUS_HISTOGRAM + RWTexture2D rw_optical_flow_scd_previous_histogram : FFX_OPTICALFLOW_DECLARE_UAV(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_SCD_PREVIOUS_HISTOGRAM); + #endif + #if defined FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_SCD_TEMP + RWTexture2D rw_optical_flow_scd_temp : FFX_OPTICALFLOW_DECLARE_UAV(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_SCD_TEMP); + #endif + #if defined FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_SCD_OUTPUT + RWTexture2D rw_optical_flow_scd_output : FFX_OPTICALFLOW_DECLARE_UAV(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_SCD_OUTPUT); + #endif + +#if defined(FFX_OPTICALFLOW_BIND_SRV_INPUT_COLOR) +FfxFloat32x4 LoadInputColor(FfxUInt32x2 iPxHistory) +{ + return r_input_color[iPxHistory]; +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_SRV_INPUT_MOTION_VECTORS) +FfxFloat32x2 LoadGameMotionVector(FfxInt32x2 iPxPos) +{ + FfxFloat32x2 positionScale = FfxFloat32x2(RenderSize()) / DisplaySize(); + return r_input_motion_vectors[iPxPos * positionScale] * motionVectorScale / positionScale; +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_INPUT) +void StoreOpticalFlowInput(FfxInt32x2 iPxPos, FfxUInt32 fLuma) +{ + rw_optical_flow_input[iPxPos] = fLuma; +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_SRV_OPTICAL_FLOW_INPUT) +FfxUInt32 LoadOpticalFlowInput(FfxInt32x2 iPxPos) +{ +#if FFX_OPTICALFLOW_USE_MSAD4_INSTRUCTION == 1 + return max(1, r_optical_flow_input[iPxPos]); +#else + return r_optical_flow_input[iPxPos]; +#endif +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_INPUT) +FfxUInt32 LoadRwOpticalFlowInput(FfxInt32x2 iPxPos) +{ + return rw_optical_flow_input[iPxPos]; +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_SRV_OPTICAL_FLOW_PREVIOUS_INPUT) +FfxUInt32 LoadOpticalFlowPreviousInput(FfxInt32x2 iPxPos) +{ +#if FFX_OPTICALFLOW_USE_MSAD4_INSTRUCTION == 1 + return max(1, r_optical_flow_previous_input[iPxPos]); +#else + return r_optical_flow_previous_input[iPxPos]; +#endif +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_SRV_OPTICAL_FLOW) +FfxInt32x2 LoadOpticalFlow(FfxInt32x2 iPxPos) +{ + return r_optical_flow[iPxPos]; +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW) +FfxInt32x2 LoadRwOpticalFlow(FfxInt32x2 iPxPos) +{ + return rw_optical_flow[iPxPos]; +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_SRV_OPTICAL_FLOW_PREVIOUS) +FfxInt32x2 LoadPreviousOpticalFlow(FfxInt32x2 iPxPos) +{ + return r_optical_flow_previous[iPxPos]; +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW) +void StoreOpticalFlow(FfxInt32x2 iPxPos, FfxInt32x2 motionVector) +{ + rw_optical_flow[iPxPos] = motionVector; +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_NEXT_LEVEL) +void StoreOpticalFlowNextLevel(FfxInt32x2 iPxPos, FfxInt32x2 motionVector) +{ + rw_optical_flow_next_level[iPxPos] = motionVector; +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_SRV_OPTICAL_FLOW_ADDITIONAL_INFO) +FfxUInt32x2 LoadOpticalFlowAdditionalInfo(FfxInt32x2 iPxPos) +{ + return r_optical_flow_additional_info[iPxPos]; +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_ADDITIONAL_INFO) +FfxUInt32x2 LoadRwOpticalFlowAdditionalInfo(FfxInt32x2 iPxPos) +{ + return rw_optical_flow_additional_info[iPxPos]; +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_SRV_OPTICAL_FLOW_ADDITIONAL_INFO_PREVIOUS) +FfxUInt32x2 LoadPreviousOpticalFlowAdditionalInfo(FfxInt32x2 iPxPos) +{ + return r_optical_flow_additional_info_previous[iPxPos]; +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_ADDITIONAL_INFO) +void StoreOpticalFlowAdditionalInfo(FfxInt32x2 iPxPos, FfxUInt32x2 additionalInfo) +{ + rw_optical_flow_additional_info[iPxPos] = additionalInfo; +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_ADDITIONAL_INFO_NEXT_LEVEL) +void StoreOpticalFlowNextLevelAdditionalInfo(FfxInt32x2 iPxPos, FfxUInt32x2 additionalInfo) +{ + rw_optical_flow_additional_info_next_level[iPxPos] = additionalInfo; +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_SRV_OPTICAL_FLOW_HISTOGRAM) +FfxUInt32 LoadOpticalFlowHistogram(FfxInt32x2 iBucketId) +{ + return r_optical_flow_histogram[iBucketId]; +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_HISTOGRAM) +void AtomicIncrementOpticalFlowHistogram(FfxInt32x2 iBucketId) +{ + InterlockedAdd(rw_optical_flow_histogram[iBucketId], 1); +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_SRV_OPTICAL_FLOW_GLOBAL_MOTION_SEARCH) +FfxInt32x2 LoadGlobalMotionVector() +{ + FfxInt32 vx = FfxInt32(r_optical_flow_global_motion_search[FfxInt32x2(0, 0)]); + FfxInt32 vy = FfxInt32(r_optical_flow_global_motion_search[FfxInt32x2(1, 0)]); + return FfxInt32x2(vx, vy); +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_GLOBAL_MOTION_SEARCH) +FfxInt32x2 LoadRwGlobalMotionVector() +{ + FfxInt32 vx = FfxInt32(rw_optical_flow_global_motion_search[FfxInt32x2(0, 0)]); + FfxInt32 vy = FfxInt32(rw_optical_flow_global_motion_search[FfxInt32x2(1, 0)]); + return FfxInt32x2(vx, vy); +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_GLOBAL_MOTION_SEARCH) +FfxUInt32 LoadGlobalMotionValue(FfxInt32 index) +{ + return rw_optical_flow_global_motion_search[FfxInt32x2(index, 0)]; +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_GLOBAL_MOTION_SEARCH) +void StoreGlobalMotionValue(FfxInt32 index, FfxUInt32 value) +{ + rw_optical_flow_global_motion_search[FfxInt32x2(index, 0)] = value; +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_GLOBAL_MOTION_SEARCH) +FfxUInt32 AtomicIncrementGlobalMotionValue(FfxInt32 index) +{ + FfxUInt32 initialValue; + InterlockedAdd(rw_optical_flow_global_motion_search[FfxInt32x2(index, 0)], 1, initialValue); + return initialValue; +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_SCD_HISTOGRAM) +FfxUInt32 LoadRwSCDHistogram(FfxInt32 iIndex) +{ + return rw_optical_flow_scd_histogram[FfxInt32x2(iIndex, 0)]; +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_SCD_HISTOGRAM) +void StoreSCDHistogram(FfxInt32 iIndex, FfxUInt32 value) +{ + rw_optical_flow_scd_histogram[FfxInt32x2(iIndex, 0)] = value; +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_SCD_HISTOGRAM) +void AtomicIncrementSCDHistogram(FfxInt32 iIndex, FfxUInt32 valueToAdd) +{ + InterlockedAdd(rw_optical_flow_scd_histogram[FfxInt32x2(iIndex, 0)], valueToAdd); +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_SCD_PREVIOUS_HISTOGRAM) +FfxFloat32 LoadRwSCDPreviousHistogram(FfxInt32 iIndex) +{ + return rw_optical_flow_scd_previous_histogram[FfxInt32x2(iIndex, 0)]; +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_SCD_PREVIOUS_HISTOGRAM) +void StoreSCDPreviousHistogram(FfxInt32 iIndex, FfxFloat32 value) +{ + rw_optical_flow_scd_previous_histogram[FfxInt32x2(iIndex, 0)] = value; +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_SCD_TEMP) +FfxUInt32 LoadRwSCDTemp(FfxInt32 iIndex) +{ + return rw_optical_flow_scd_temp[FfxInt32x2(iIndex, 0)]; +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_SCD_TEMP) +void AtomicIncrementSCDTemp(FfxInt32 iIndex, FfxUInt32 valueToAdd) +{ + InterlockedAdd(rw_optical_flow_scd_temp[FfxInt32x2(iIndex, 0)], valueToAdd); +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_SCD_TEMP) +void ResetSCDTemp() +{ + rw_optical_flow_scd_temp[FfxInt32x2(0, 0)] = 0; + rw_optical_flow_scd_temp[FfxInt32x2(1, 0)] = 0; + rw_optical_flow_scd_temp[FfxInt32x2(2, 0)] = 0; +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_SCD_OUTPUT) +FfxUInt32 LoadRwSCDOutput(FfxInt32 iIndex) +{ + return rw_optical_flow_scd_output[FfxInt32x2(iIndex, 0)]; +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_SCD_OUTPUT) +void StoreSCDOutput(FfxInt32 iIndex, FfxUInt32 value) +{ + rw_optical_flow_scd_output[FfxInt32x2(iIndex, 0)] = value; +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_SCD_OUTPUT) +FfxUInt32 AtomicIncrementSCDOutput(FfxInt32 iIndex, FfxUInt32 valueToAdd) +{ + FfxUInt32 initialValue; + InterlockedAdd(rw_optical_flow_scd_output[FfxInt32x2(iIndex, 0)], valueToAdd, initialValue); + return initialValue; +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_UAV_DEBUG_VISUALIZATION) +void StoreDebugVisualization(FfxUInt32x2 iPxPos, FfxFloat32x3 fColor) +{ + rw_debug_visualization[iPxPos] = FfxFloat32x4(fColor, 1.f); +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_SCD_OUTPUT) +FfxFloat32 GetSceneChangeValue() +{ + if (FrameIndex() <= 5) + return 1.0; + else + return ffxAsFloat(LoadRwSCDOutput(SCD_OUTPUT_SCENE_CHANGE_SLOT)); +} + +FfxBoolean IsSceneChanged() +{ + if (FrameIndex() <= 5) + { + return 1.0; + } + else + { + return (LoadRwSCDOutput(SCD_OUTPUT_HISTORY_BITS_SLOT) & 0xfu) != 0; + } +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_SRV_OPTICAL_FLOW_INPUT) +FfxUInt32 LoadFirstImagePackedLuma(FfxInt32x2 iPxPos) +{ + const FfxInt32 lumaTextureWidth = DisplaySize().x >> OpticalFlowPyramidLevel(); + const FfxInt32 lumaTextureHeight = DisplaySize().y >> OpticalFlowPyramidLevel(); + + FfxInt32x2 adjustedPos = FfxInt32x2( + ffxClamp(iPxPos.x, 0, lumaTextureWidth - 4), + ffxClamp(iPxPos.y, 0, lumaTextureHeight - 1) + ); + + FfxUInt32 luma0 = LoadOpticalFlowInput(adjustedPos + FfxInt32x2(0, 0)); + FfxUInt32 luma1 = LoadOpticalFlowInput(adjustedPos + FfxInt32x2(1, 0)); + FfxUInt32 luma2 = LoadOpticalFlowInput(adjustedPos + FfxInt32x2(2, 0)); + FfxUInt32 luma3 = LoadOpticalFlowInput(adjustedPos + FfxInt32x2(3, 0)); + + return GetPackedLuma(lumaTextureWidth, iPxPos.x, luma0, luma1, luma2, luma3); +} +#endif + +#if defined(FFX_OPTICALFLOW_BIND_SRV_OPTICAL_FLOW_PREVIOUS_INPUT) +FfxUInt32 LoadSecondImagePackedLuma(FfxInt32x2 iPxPos) +{ + const FfxInt32 lumaTextureWidth = DisplaySize().x >> OpticalFlowPyramidLevel(); + const FfxInt32 lumaTextureHeight = DisplaySize().y >> OpticalFlowPyramidLevel(); + + FfxInt32x2 adjustedPos = FfxInt32x2( + ffxClamp(iPxPos.x, 0, lumaTextureWidth - 4), + ffxClamp(iPxPos.y, 0, lumaTextureHeight - 1) + ); + + FfxUInt32 luma0 = LoadOpticalFlowPreviousInput(adjustedPos + FfxInt32x2(0, 0)); + FfxUInt32 luma1 = LoadOpticalFlowPreviousInput(adjustedPos + FfxInt32x2(1, 0)); + FfxUInt32 luma2 = LoadOpticalFlowPreviousInput(adjustedPos + FfxInt32x2(2, 0)); + FfxUInt32 luma3 = LoadOpticalFlowPreviousInput(adjustedPos + FfxInt32x2(3, 0)); + + return GetPackedLuma(lumaTextureWidth, iPxPos.x, luma0, luma1, luma2, luma3); +} +#endif + + +void SPD_SetMipmap(int2 iPxPos, int index, float value) +{ + switch (index) + { + case 0: +#if defined FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_INPUT_LEVEL_1 + rw_optical_flow_input_level_1[iPxPos] = value; +#endif // #if defined FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_INPUT_LEVEL_1 + break; + case 1: +#if defined FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_INPUT_LEVEL_2 + rw_optical_flow_input_level_2[iPxPos] = value; +#endif // #if defined FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_INPUT_LEVEL_2 + break; + case 2: +#if defined FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_INPUT_LEVEL_3 + rw_optical_flow_input_level_3[iPxPos] = value; +#endif // #if defined FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_INPUT_LEVEL_3 + break; + case 3: +#if defined FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_INPUT_LEVEL_4 + rw_optical_flow_input_level_4[iPxPos] = value; +#endif // #if defined FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_INPUT_LEVEL_4 + break; + case 4: +#if defined FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_INPUT_LEVEL_5 + rw_optical_flow_input_level_5[iPxPos] = value; +#endif // #if defined FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_INPUT_LEVEL_5 + break; + case 5: +#if defined FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_INPUT_LEVEL_6 + rw_optical_flow_input_level_6[iPxPos] = value; +#endif // #if defined FFX_OPTICALFLOW_BIND_UAV_OPTICAL_FLOW_INPUT_LEVEL_6 + break; + } +} + +#endif // #if defined(FFX_GPU) + +#endif // FFX_OPTICALFLOW_CALLBACKS_HLSL_H diff --git a/thirdparty/amd-ffx/gpu/opticalflow/ffx_opticalflow_common.h b/thirdparty/amd-ffx/gpu/opticalflow/ffx_opticalflow_common.h new file mode 100644 index 000000000000..907e7ac796fe --- /dev/null +++ b/thirdparty/amd-ffx/gpu/opticalflow/ffx_opticalflow_common.h @@ -0,0 +1,99 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#if !defined(FFX_OPTICALFLOW_COMMON_H) +#define FFX_OPTICALFLOW_COMMON_H + +#if defined(FFX_GPU) + +#define SCD_OUTPUT_SCENE_CHANGE_SLOT 0 +#define SCD_OUTPUT_HISTORY_BITS_SLOT 1 +#define SCD_OUTPUT_COMPLETED_WORKGROUPS_SLOT 2 + + +#define ffxClamp(x, a, b) (ffxMax(a, ffxMin(b, x))) + + +FfxUInt32 GetPackedLuma(FfxInt32 width, FfxInt32 x, FfxUInt32 luma0, FfxUInt32 luma1, FfxUInt32 luma2, FfxUInt32 luma3) +{ + FfxUInt32 packedLuma = luma0 | (luma1 << 8) | (luma2 << 16) | (luma3 << 24); + + if (x < 0) + { + FfxUInt32 outOfScreenFiller = packedLuma & 0xffu; + if (x <= -1) + packedLuma = (packedLuma << 8) | outOfScreenFiller; + if (x <= -2) + packedLuma = (packedLuma << 8) | outOfScreenFiller; + if (x <= -3) + packedLuma = (packedLuma << 8) | outOfScreenFiller; + } + else if (x > width - 4) + { + FfxUInt32 outOfScreenFiller = packedLuma & 0xff000000u; + if (x >= width - 3) + packedLuma = (packedLuma >> 8) | outOfScreenFiller; + if (x >= width - 2) + packedLuma = (packedLuma >> 8) | outOfScreenFiller; + if (x >= width - 1) + packedLuma = (packedLuma >> 8) | outOfScreenFiller; + } + return packedLuma; +} + +FfxUInt32 Sad(FfxUInt32 a, FfxUInt32 b) +{ +#if FFX_OPTICALFLOW_USE_MSAD4_INSTRUCTION == 1 + return msad4(a, FfxUInt32x2(b, 0), FfxUInt32x4(0, 0, 0, 0)).x; +#else + return abs(FfxInt32((a >> 0) & 0xffu) - FfxInt32((b >> 0) & 0xffu)) + + abs(FfxInt32((a >> 8) & 0xffu) - FfxInt32((b >> 8) & 0xffu)) + + abs(FfxInt32((a >> 16) & 0xffu) - FfxInt32((b >> 16) & 0xffu)) + + abs(FfxInt32((a >> 24) & 0xffu) - FfxInt32((b >> 24) & 0xffu)); +#endif +} + +FfxUInt32x4 QSad(FfxUInt32 a0, FfxUInt32 a1, FfxUInt32 b) +{ +#if FFX_OPTICALFLOW_USE_MSAD4_INSTRUCTION == 1 + return msad4(b, FfxUInt32x2(a0, a1), FfxUInt32x4(0, 0, 0, 0)); +#else + FfxUInt32x4 sad; + sad.x = Sad(a0, b); + + a0 = (a0 >> 8) | ((a1 & 0xffu) << 24); + a1 >>= 8; + sad.y = Sad(a0, b); + + a0 = (a0 >> 8) | ((a1 & 0xffu) << 24); + a1 >>= 8; + sad.z = Sad(a0, b); + + a0 = (a0 >> 8) | ((a1 & 0xffu) << 24); + sad.w = Sad(a0, b); + return sad; +#endif +} + +#endif // #if defined(FFX_GPU) + +#endif //!defined(FFX_OPTICALFLOW_COMMON_H) diff --git a/thirdparty/amd-ffx/gpu/opticalflow/ffx_opticalflow_compute_luminance_pyramid.h b/thirdparty/amd-ffx/gpu/opticalflow/ffx_opticalflow_compute_luminance_pyramid.h new file mode 100644 index 000000000000..a51f1edbd736 --- /dev/null +++ b/thirdparty/amd-ffx/gpu/opticalflow/ffx_opticalflow_compute_luminance_pyramid.h @@ -0,0 +1,107 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifndef FFX_OPTICALFLOW_COMPUTE_LUMINANCE_PYRAMID_H +#define FFX_OPTICALFLOW_COMPUTE_LUMINANCE_PYRAMID_H + +FFX_GROUPSHARED FfxFloat32 spdIntermediateR[16][16]; +FFX_GROUPSHARED FfxFloat32 spdIntermediateG[16][16]; +FFX_GROUPSHARED FfxFloat32 spdIntermediateB[16][16]; +FFX_GROUPSHARED FfxFloat32 spdIntermediateA[16][16]; + +void SPD_IncreaseAtomicCounter(inout FfxUInt32 spdCounter) +{ +} + +void SPD_ResetAtomicCounter() +{ +} + +FfxFloat32x4 SpdLoadSourceImage(FfxFloat32x2 tex, FfxUInt32 slice) +{ + FfxFloat32 luma = LoadRwOpticalFlowInput(FfxInt32x2(tex)); + return FfxFloat32x4(luma, 0, 0, 0); +} + +FfxFloat32x4 SpdLoad(FfxInt32x2 tex, FfxUInt32 slice) +{ + return FfxFloat32x4(0, 0, 0, 0); +} + +void SpdStore(FfxInt32x2 pix, FfxFloat32x4 outValue, FfxUInt32 index, FfxUInt32 slice) +{ + SPD_SetMipmap(pix, index, outValue.r); +} + +void SpdIncreaseAtomicCounter(FfxUInt32 slice) +{ +} + +FfxUInt32 SpdGetAtomicCounter() +{ + return 0; +} + +void SpdResetAtomicCounter(FfxUInt32 slice) +{ + SPD_ResetAtomicCounter(); +} + +FfxFloat32x4 SpdLoadIntermediate(FfxUInt32 x, FfxUInt32 y) +{ + return FfxFloat32x4( + spdIntermediateR[x][y], + spdIntermediateG[x][y], + spdIntermediateB[x][y], + spdIntermediateA[x][y]); +} +void SpdStoreIntermediate(FfxUInt32 x, FfxUInt32 y, FfxFloat32x4 value) +{ + spdIntermediateR[x][y] = value.x; + spdIntermediateG[x][y] = value.y; + spdIntermediateB[x][y] = value.z; + spdIntermediateA[x][y] = value.w; +} +FfxFloat32x4 SpdReduce4(FfxFloat32x4 v0, FfxFloat32x4 v1, FfxFloat32x4 v2, FfxFloat32x4 v3) +{ + return (v0 + v1 + v2 + v3) * 0.25; +} + +#ifdef FFX_HALF +#undef FFX_HALF +#endif + +// https://github.com/GPUOpen-Effects/FidelityFX-SPD/blob/master/docs/FidelityFX_SPD.pdf +#include "../spd/ffx_spd.h" + +void ComputeOpticalFlowInputPyramid(FfxInt32x2 iGroupId, FfxInt32 iLocalIndex) +{ + SpdDownsample( + FfxUInt32x2(iGroupId.xy), + FfxUInt32(iLocalIndex), + 6, // mip levels to generate + FfxUInt32(NumWorkGroups()), + 1 // single slice + ); +} + +#endif // FFX_OPTICALFLOW_COMPUTE_LUMINANCE_PYRAMID_H diff --git a/thirdparty/amd-ffx/gpu/opticalflow/ffx_opticalflow_compute_optical_flow_v5.h b/thirdparty/amd-ffx/gpu/opticalflow/ffx_opticalflow_compute_optical_flow_v5.h new file mode 100644 index 000000000000..9c1b522b6dbc --- /dev/null +++ b/thirdparty/amd-ffx/gpu/opticalflow/ffx_opticalflow_compute_optical_flow_v5.h @@ -0,0 +1,279 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifndef FFX_OPTICALFLOW_COMPUTE_OPTICAL_FLOW_V5_H +#define FFX_OPTICALFLOW_COMPUTE_OPTICAL_FLOW_V5_H + +#define CompareSize (4 * 2) +#define BlockSizeY 8 +#define BlockSizeX 8 +#define ThreadCount (4 * 16) +#define SearchRadiusX (8) +#define SearchRadiusY (8) +#define BlockCount 2 + +#define SearchBufferSizeX ((CompareSize + SearchRadiusX*2)/4) +#define SearchBufferSizeY (CompareSize + SearchRadiusY*2) + +FFX_GROUPSHARED FfxUInt32 pixels[CompareSize][CompareSize / 4]; +FFX_GROUPSHARED FfxUInt32 searchBuffer[1][SearchBufferSizeY * SearchBufferSizeX]; +#define bankBreaker 1 +FFX_GROUPSHARED FfxUInt32 sadMapBuffer[4][SearchRadiusY * 2][(SearchRadiusX * 2) / 4 + bankBreaker]; + +#define MaxWaves 2 +FFX_GROUPSHARED FfxUInt32 sWaveSad[MaxWaves]; +FFX_GROUPSHARED FfxUInt32 sWaveMin[MaxWaves]; + +FfxUInt32 BlockSad64(FfxUInt32 blockSadSum, FfxInt32 iLocalIndex, FfxInt32 iLaneToBlockId, FfxInt32 block) +{ + if (iLaneToBlockId != block) + { + blockSadSum = 0u; + } + blockSadSum = ffxWaveSum(blockSadSum); + + if (ffxWaveLaneCount() == 32) + { + FfxInt32 waveId = iLocalIndex >> 5u; + if (ffxWaveIsFirstLane()) + { + sWaveSad[waveId] = blockSadSum; + } + FFX_GROUP_MEMORY_BARRIER; + blockSadSum += sWaveSad[waveId ^ 1]; + } + + return blockSadSum; +} + +FfxUInt32 SadMapMinReduction256(FfxInt32x2 iSearchId, FfxInt32 iLocalIndex) +{ + FfxUInt32 min01 = ffxMin(sadMapBuffer[0][iSearchId.y][iSearchId.x], sadMapBuffer[1][iSearchId.y][iSearchId.x]); + FfxUInt32 min23 = ffxMin(sadMapBuffer[2][iSearchId.y][iSearchId.x], sadMapBuffer[3][iSearchId.y][iSearchId.x]); + FfxUInt32 min0123 = ffxMin(min01, min23); + min0123 = ffxWaveMin(min0123); + + if (ffxWaveLaneCount() == 32) + { + FfxInt32 waveId = iLocalIndex >> 5u; + + if (ffxWaveIsFirstLane()) + { + sWaveMin[waveId] = min0123; + } + FFX_GROUP_MEMORY_BARRIER; + min0123 = ffxMin(min0123, sWaveMin[waveId ^ 1]); + } + + return min0123; +} + +void LoadSearchBuffer(FfxInt32 iLocalIndex, FfxInt32x2 iPxPosShifted) +{ + FfxInt32 baseX = (iPxPosShifted.x - SearchRadiusX); + FfxInt32 baseY = (iPxPosShifted.y - SearchRadiusY); + + for (FfxInt32 id = iLocalIndex; id < SearchBufferSizeX * SearchBufferSizeY; id += ThreadCount) + { + FfxInt32 idx = id % SearchBufferSizeX; + FfxInt32 idy = id / SearchBufferSizeX; + FfxInt32 x = baseX + idx * 4; + FfxInt32 y = baseY + idy; + searchBuffer[0][id] = LoadSecondImagePackedLuma(FfxInt32x2(x, y)); + } + FFX_GROUP_MEMORY_BARRIER; +} + +FfxUInt32x4 CalculateQSads2(FfxInt32x2 iSearchId) +{ + FfxUInt32x4 sad = ffxBroadcast4(0u); + +#if FFX_OPTICALFLOW_USE_MSAD4_INSTRUCTION == 1 + + FfxInt32 idx = iSearchId.y * 6 + iSearchId.x; + + sad = msad4(pixels[0][0], FfxUInt32x2(searchBuffer[0][idx], searchBuffer[0][idx + 1]), sad); + sad = msad4(pixels[0][1], FfxUInt32x2(searchBuffer[0][idx + 1], searchBuffer[0][idx + 2]), sad); + idx += 6; + sad = msad4(pixels[1][0], FfxUInt32x2(searchBuffer[0][idx], searchBuffer[0][idx + 1]), sad); + sad = msad4(pixels[1][1], FfxUInt32x2(searchBuffer[0][idx + 1], searchBuffer[0][idx + 2]), sad); + idx += 6; + sad = msad4(pixels[2][0], FfxUInt32x2(searchBuffer[0][idx], searchBuffer[0][idx + 1]), sad); + sad = msad4(pixels[2][1], FfxUInt32x2(searchBuffer[0][idx + 1], searchBuffer[0][idx + 2]), sad); + idx += 6; + sad = msad4(pixels[3][0], FfxUInt32x2(searchBuffer[0][idx], searchBuffer[0][idx + 1]), sad); + sad = msad4(pixels[3][1], FfxUInt32x2(searchBuffer[0][idx + 1], searchBuffer[0][idx + 2]), sad); + idx += 6; + sad = msad4(pixels[4][0], FfxUInt32x2(searchBuffer[0][idx], searchBuffer[0][idx + 1]), sad); + sad = msad4(pixels[4][1], FfxUInt32x2(searchBuffer[0][idx + 1], searchBuffer[0][idx + 2]), sad); + idx += 6; + sad = msad4(pixels[5][0], FfxUInt32x2(searchBuffer[0][idx], searchBuffer[0][idx + 1]), sad); + sad = msad4(pixels[5][1], FfxUInt32x2(searchBuffer[0][idx + 1], searchBuffer[0][idx + 2]), sad); + idx += 6; + sad = msad4(pixels[6][0], FfxUInt32x2(searchBuffer[0][idx], searchBuffer[0][idx + 1]), sad); + sad = msad4(pixels[6][1], FfxUInt32x2(searchBuffer[0][idx + 1], searchBuffer[0][idx + 2]), sad); + idx += 6; + sad = msad4(pixels[7][0], FfxUInt32x2(searchBuffer[0][idx], searchBuffer[0][idx + 1]), sad); + sad = msad4(pixels[7][1], FfxUInt32x2(searchBuffer[0][idx + 1], searchBuffer[0][idx + 2]), sad); + +#else + for (FfxInt32 dy = 0; dy < CompareSize; dy++) + { + FfxInt32 rowOffset = (iSearchId.y + dy) * SearchBufferSizeX; + FfxUInt32 a0 = searchBuffer[0][rowOffset + iSearchId.x]; + FfxUInt32 a1 = searchBuffer[0][rowOffset + iSearchId.x + 1]; + FfxUInt32 a2 = searchBuffer[0][rowOffset + iSearchId.x + 2]; + sad += QSad(a0, a1, pixels[dy][0]); + sad += QSad(a1, a2, pixels[dy][1]); + } +#endif + + return sad; +} + +FfxUInt32x2 abs_2(FfxInt32x2 val) +{ + FfxInt32x2 tmp = val; + FfxInt32x2 mask = tmp >> 31; + FfxUInt32x2 res = (tmp + mask) ^ mask; + return res; +} + +FfxUInt32 EncodeSearchCoord(FfxInt32x2 coord) +{ +#if FFX_OPTICALFLOW_FIX_TOP_LEFT_BIAS == 1 + FfxUInt32x2 absCoord = FfxUInt32x2(abs_2(coord - 8)); + return FfxUInt32(absCoord.y << 12) | FfxUInt32(absCoord.x << 8) | FfxUInt32(coord.y << 4) | FfxUInt32(coord.x); +#else //FFX_OPTICALFLOW_FIX_TOP_LEFT_BIAS == 1 + return FfxUInt32(coord.y << 8) | FfxUInt32(coord.x); +#endif //FFX_OPTICALFLOW_FIX_TOP_LEFT_BIAS == 1 +} + +FfxInt32x2 DecodeSearchCoord(FfxUInt32 bits) +{ +#if FFX_OPTICALFLOW_FIX_TOP_LEFT_BIAS == 1 + FfxInt32 dx = FfxInt32(bits & 0xfu) - SearchRadiusX; + FfxInt32 dy = FfxInt32((bits >> 4) & 0xfu) - SearchRadiusY; + + return FfxInt32x2(dx, dy); +#else + FfxInt32 dx = FfxInt32(bits & 0xffu) - SearchRadiusX; + FfxInt32 dy = FfxInt32((bits >> 8) & 0xffu) - SearchRadiusY; + + return FfxInt32x2(dx, dy); +#endif +} + +void PrepareSadMap(FfxInt32x2 iSearchId, FfxUInt32x4 qsad) +{ + sadMapBuffer[0][iSearchId.y][iSearchId.x] = (qsad.x << 16) | EncodeSearchCoord(FfxInt32x2(iSearchId.x * 4 + 0, iSearchId.y)); + sadMapBuffer[1][iSearchId.y][iSearchId.x] = (qsad.y << 16) | EncodeSearchCoord(FfxInt32x2(iSearchId.x * 4 + 1, iSearchId.y)); + sadMapBuffer[2][iSearchId.y][iSearchId.x] = (qsad.z << 16) | EncodeSearchCoord(FfxInt32x2(iSearchId.x * 4 + 2, iSearchId.y)); + sadMapBuffer[3][iSearchId.y][iSearchId.x] = (qsad.w << 16) | EncodeSearchCoord(FfxInt32x2(iSearchId.x * 4 + 3, iSearchId.y)); + FFX_GROUP_MEMORY_BARRIER; +} + + +uint ABfe(uint src, uint off, uint bits) { uint mask = (1u << bits) - 1u; return (src >> off) & mask; } +uint ABfi(uint src, uint ins, uint mask) { return (ins & mask) | (src & (~mask)); } +uint ABfiM(uint src, uint ins, uint bits) { uint mask = (1u << bits) - 1u; return (ins & mask) | (src & (~mask)); } +void MapThreads(in FfxInt32x2 iGroupId, in FfxInt32 iLocalIndex, + out FfxInt32x2 iSearchId, out FfxInt32x2 iPxPos, out FfxInt32 iLaneToBlockId) +{ + iSearchId = FfxInt32x2(ABfe(iLocalIndex, 0u, 2u), ABfe(iLocalIndex, 2u, 4u)); + iLaneToBlockId = FfxInt32(ABfe(iLocalIndex, 1u, 1u) | (ABfe(iLocalIndex, 5u, 1u) << 1u)); + iPxPos = (iGroupId << 4u) + iSearchId * FfxInt32x2(4, 1); +} + +void ComputeOpticalFlowAdvanced(FfxInt32x2 iGlobalId, FfxInt32x2 iLocalId, FfxInt32x2 iGroupId, FfxInt32 iLocalIndex) +{ + FfxInt32x2 iSearchId; + FfxInt32x2 iPxPos; + FfxInt32 iLaneToBlockId; + MapThreads(iGroupId, iLocalIndex, iSearchId, iPxPos, iLaneToBlockId); + + FfxInt32x2 currentOFPos = iPxPos >> 3u; + + if (IsSceneChanged()) + { + if ((iSearchId.y & 0x7) == 0 && (iSearchId.x & 0x1) == 0) + { + StoreOpticalFlow(currentOFPos, FfxInt32x2(0, 0)); + } + + return; + } + + const FfxBoolean bUsePredictionFromPreviousLevel = (OpticalFlowPyramidLevel() != OpticalFlowPyramidLevelCount() - 1); + + FfxUInt32 packedLuma_4blocks = LoadFirstImagePackedLuma(iPxPos); + +#if FFX_LOCAL_SEARCH_FALLBACK == 1 + FfxUInt32 prevPackedLuma_4blocks = LoadSecondImagePackedLuma(iPxPos); + FfxUInt32 sad_4blocks = Sad(packedLuma_4blocks, prevPackedLuma_4blocks); +#endif //FFX_LOCAL_SEARCH_FALLBACK + + FfxInt32x2 ofGroupOffset = iGroupId << 1u; + FfxInt32x2 pixelGroupOffset = iGroupId << 4u; + + FfxInt32x2 blockId; + for (blockId.y = 0; blockId.y < BlockCount; blockId.y++) + { + for (blockId.x = 0; blockId.x < BlockCount; blockId.x++) + { + FfxInt32x2 currentVector = LoadRwOpticalFlow(ofGroupOffset + blockId); + if (!bUsePredictionFromPreviousLevel) + { + currentVector = FfxInt32x2(0, 0); + } + + if (iLaneToBlockId == blockId.y * 2 + blockId.x) + { + pixels[iSearchId.y & 0x7][iSearchId.x & 0x1] = packedLuma_4blocks; + } + + LoadSearchBuffer(iLocalIndex, pixelGroupOffset + blockId * 8 + currentVector); + + FfxUInt32x4 qsad = CalculateQSads2(iSearchId); + + PrepareSadMap(iSearchId, qsad); + FfxUInt32 minSad = SadMapMinReduction256(iSearchId, iLocalIndex); + + FfxInt32x2 minSadCoord = DecodeSearchCoord(minSad); + FfxInt32x2 newVector = currentVector + minSadCoord; + +#if FFX_LOCAL_SEARCH_FALLBACK == 1 + FfxUInt32 blockSadSum = BlockSad64(sad_4blocks, iLocalIndex, iLaneToBlockId, blockId.x + blockId.y * 2); + if (OpticalFlowPyramidLevel() == 0 && blockSadSum <= (minSad >> 16u)) + { + newVector = FfxInt32x2(0, 0); + } +#endif //FFX_LOCAL_SEARCH_FALLBACK + + { + StoreOpticalFlow(ofGroupOffset + blockId, newVector); + } + } + } +} + +#endif // FFX_OPTICALFLOW_COMPUTE_OPTICAL_FLOW_V5_H diff --git a/thirdparty/amd-ffx/gpu/opticalflow/ffx_opticalflow_compute_scd_divergence.h b/thirdparty/amd-ffx/gpu/opticalflow/ffx_opticalflow_compute_scd_divergence.h new file mode 100644 index 000000000000..7f473f657a5b --- /dev/null +++ b/thirdparty/amd-ffx/gpu/opticalflow/ffx_opticalflow_compute_scd_divergence.h @@ -0,0 +1,159 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifndef FFX_OPTICALFLOW_COMPUTE_SCD_DIVERGENCE_H +#define FFX_OPTICALFLOW_COMPUTE_SCD_DIVERGENCE_H + +FFX_GROUPSHARED FfxFloat32 sourceHistogram[256]; +FFX_GROUPSHARED FfxFloat32 filteredHistogram[256]; +FFX_GROUPSHARED FfxFloat32 tempBuffer[256]; +FFX_GROUPSHARED FfxFloat32x2 tempBuffer2[256]; + +void ComputeSCDHistogramsDivergence(FfxInt32x3 iGlobalId, FfxInt32x2 iLocalId, FfxInt32 iLocalIndex, FfxInt32x2 iGroupId, FfxInt32x2 iGroupSize) +{ + FFX_STATIC const FfxFloat32 Factor = 1000000.0; + FFX_STATIC const FfxInt32 WhereToStop = 3*9 - 1; + FFX_STATIC const FfxInt32 HistogramCount = 3 * 3; + + FFX_STATIC const FfxFloat32 Kernel[] = { + 0.0088122291, 0.027143577, 0.065114059, 0.12164907, 0.17699835, 0.20056541 + }; + + sourceHistogram[iLocalIndex] = FfxFloat32(LoadRwSCDHistogram(iGlobalId.x)); + FFX_GROUP_MEMORY_BARRIER; + + const FfxInt32 kernelShift = -5; + const FfxInt32 indexToRead = iLocalIndex + kernelShift; + + FfxFloat32 val = 0.0; + val += Kernel[0] * sourceHistogram[ffxClamp(indexToRead + 0, 0, 255)]; + val += Kernel[1] * sourceHistogram[ffxClamp(indexToRead + 1, 0, 255)]; + val += Kernel[2] * sourceHistogram[ffxClamp(indexToRead + 2, 0, 255)]; + val += Kernel[3] * sourceHistogram[ffxClamp(indexToRead + 3, 0, 255)]; + val += Kernel[4] * sourceHistogram[ffxClamp(indexToRead + 4, 0, 255)]; + val += Kernel[5] * sourceHistogram[ffxClamp(indexToRead + 5, 0, 255)]; + val += Kernel[4] * sourceHistogram[ffxClamp(indexToRead + 6, 0, 255)]; + val += Kernel[3] * sourceHistogram[ffxClamp(indexToRead + 7, 0, 255)]; + val += Kernel[2] * sourceHistogram[ffxClamp(indexToRead + 8, 0, 255)]; + val += Kernel[1] * sourceHistogram[ffxClamp(indexToRead + 9, 0, 255)]; + val += Kernel[0] * sourceHistogram[ffxClamp(indexToRead + 10, 0, 255)]; + + val += 1.0; + + if (iGlobalId.y == 0) + { + if (iLocalIndex == 0) + filteredHistogram[255] = 1.0; + else + filteredHistogram[iLocalIndex - 1] = val; + } + else if (iGlobalId.y == 1) + { + filteredHistogram[iLocalIndex] = val; + } + else if (iGlobalId.y == 2) + { + if (iLocalIndex == 255) + filteredHistogram[0] = 1.0; + else + filteredHistogram[iLocalIndex + 1] = val; + } + FFX_GROUP_MEMORY_BARRIER; + + tempBuffer[iLocalIndex] = filteredHistogram[iLocalIndex]; + FFX_GROUP_MEMORY_BARRIER; + + if (iLocalIndex < 128) tempBuffer[iLocalIndex] += tempBuffer[iLocalIndex + 128]; + FFX_GROUP_MEMORY_BARRIER; + + if (iLocalIndex < 64) tempBuffer[iLocalIndex] += tempBuffer[iLocalIndex + 64]; + FFX_GROUP_MEMORY_BARRIER; + + if (iLocalIndex < 32) tempBuffer[iLocalIndex] += tempBuffer[iLocalIndex + 32]; + if (iLocalIndex < 16) tempBuffer[iLocalIndex] += tempBuffer[iLocalIndex + 16]; + if (iLocalIndex < 8 ) tempBuffer[iLocalIndex] += tempBuffer[iLocalIndex + 8]; + if (iLocalIndex < 4 ) tempBuffer[iLocalIndex] += tempBuffer[iLocalIndex + 4]; + if (iLocalIndex < 2 ) tempBuffer[iLocalIndex] += tempBuffer[iLocalIndex + 2]; + if (iLocalIndex < 1 ) tempBuffer[iLocalIndex] += tempBuffer[iLocalIndex + 1]; + FFX_GROUP_MEMORY_BARRIER; + + filteredHistogram[iLocalIndex] /= tempBuffer[0]; + + FfxFloat32 currentFilteredHistogramsValue = filteredHistogram[iLocalIndex]; + FfxFloat32 previousHistogramsValue = LoadRwSCDPreviousHistogram(iGlobalId.x); + + tempBuffer2[iLocalIndex] = FfxFloat32x2( + currentFilteredHistogramsValue * log(currentFilteredHistogramsValue / previousHistogramsValue), + previousHistogramsValue * log(previousHistogramsValue / currentFilteredHistogramsValue) + ); + FFX_GROUP_MEMORY_BARRIER; + + if (iLocalIndex < 128) tempBuffer2[iLocalIndex] += tempBuffer2[iLocalIndex + 128]; + FFX_GROUP_MEMORY_BARRIER; + + if (iLocalIndex < 64) tempBuffer2[iLocalIndex] += tempBuffer2[iLocalIndex + 64]; + FFX_GROUP_MEMORY_BARRIER; + + if (iLocalIndex < 32) tempBuffer2[iLocalIndex] += tempBuffer2[iLocalIndex + 32]; + if (iLocalIndex < 16) tempBuffer2[iLocalIndex] += tempBuffer2[iLocalIndex + 16]; + if (iLocalIndex < 8 ) tempBuffer2[iLocalIndex] += tempBuffer2[iLocalIndex + 8]; + if (iLocalIndex < 4 ) tempBuffer2[iLocalIndex] += tempBuffer2[iLocalIndex + 4]; + if (iLocalIndex < 2 ) tempBuffer2[iLocalIndex] += tempBuffer2[iLocalIndex + 2]; + + if (iLocalIndex == 0) + { + FfxFloat32x2 sum = tempBuffer2[0] + tempBuffer2[1]; + + FfxFloat32 resFloat = 1 - exp(-(abs(sum.x) + abs(sum.y))); + FfxUInt32 resUInt = FfxUInt32((resFloat / FfxFloat32(HistogramCount)) * Factor); + AtomicIncrementSCDTemp(iGlobalId.y, resUInt); + + FfxUInt32 oldFinishedGroupCount = AtomicIncrementSCDOutput(SCD_OUTPUT_COMPLETED_WORKGROUPS_SLOT, 1); + if (oldFinishedGroupCount == WhereToStop) + { + FfxUInt32 res0 = LoadRwSCDTemp(0); + FfxUInt32 res1 = LoadRwSCDTemp(1); + FfxUInt32 res2 = LoadRwSCDTemp(2); + FfxFloat32 sceneChangeValue = ffxMin(res0, ffxMin(res1, res2)) / Factor; + + FfxUInt32 history = LoadRwSCDOutput(SCD_OUTPUT_HISTORY_BITS_SLOT) << 1; + if (CrossedSceneChangeThreshold(sceneChangeValue)) + { + history |= 1; + } + StoreSCDOutput(SCD_OUTPUT_SCENE_CHANGE_SLOT, ffxAsUInt32(sceneChangeValue)); + StoreSCDOutput(SCD_OUTPUT_HISTORY_BITS_SLOT, history); + StoreSCDOutput(SCD_OUTPUT_COMPLETED_WORKGROUPS_SLOT, 0); + + ResetSCDTemp(); + } + } + + if (iGlobalId.y == 1) + { + StoreSCDPreviousHistogram(iGlobalId.x, currentFilteredHistogramsValue); + + StoreSCDHistogram(iGlobalId.x, 0); + } +} + +#endif // FFX_OPTICALFLOW_COMPUTE_SCD_DIVERGENCE_H diff --git a/thirdparty/amd-ffx/gpu/opticalflow/ffx_opticalflow_filter_optical_flow_v5.h b/thirdparty/amd-ffx/gpu/opticalflow/ffx_opticalflow_filter_optical_flow_v5.h new file mode 100644 index 000000000000..8ebf0beb1786 --- /dev/null +++ b/thirdparty/amd-ffx/gpu/opticalflow/ffx_opticalflow_filter_optical_flow_v5.h @@ -0,0 +1,58 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifndef FFX_OPTICALFLOW_FILTER_OPTICAL_FLOW_V5_H +#define FFX_OPTICALFLOW_FILTER_OPTICAL_FLOW_V5_H + +void FilterOpticalFlow(FfxInt32x2 iGlobalId, FfxInt32x2 iLocalId, FfxInt32x2 iGroupId, FfxInt32 iLocalIndex) +{ + FfxInt32x2 tmpMV[9]; + FfxInt32 idx = 0; + for (FfxInt32 xx = -1; xx < 2; xx++) + { + for (FfxInt32 yy = -1; yy < 2; yy++) + { + + tmpMV[idx] = LoadPreviousOpticalFlow(iGlobalId + FfxInt32x2(xx, yy)); + idx++; + } + } + + FfxUInt32 ret = 0xFFFFFFFF; + for (FfxInt32 i = 0; i < 9; ++i) + { + FfxUInt32 tmp = 0; + for (FfxInt32 j = 0; j < 9; ++j) + { + FfxInt32x2 delta = tmpMV[i] - tmpMV[j]; + tmp = delta.x * delta.x + (delta.y * delta.y + tmp); + } + + ret = min(((tmp) << 4) | i, ret); + } + + FfxUInt32 minIdx = ret & 0xF; + + StoreOpticalFlow(iGlobalId, tmpMV[minIdx]); +} + +#endif // FFX_OPTICALFLOW_FILTER_OPTICAL_FLOW_V5_H diff --git a/thirdparty/amd-ffx/gpu/opticalflow/ffx_opticalflow_generate_scd_histogram.h b/thirdparty/amd-ffx/gpu/opticalflow/ffx_opticalflow_generate_scd_histogram.h new file mode 100644 index 000000000000..aead43868933 --- /dev/null +++ b/thirdparty/amd-ffx/gpu/opticalflow/ffx_opticalflow_generate_scd_histogram.h @@ -0,0 +1,92 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifndef FFX_OPTICALFLOW_GENERATE_SCD_HISTOGRAM_H +#define FFX_OPTICALFLOW_GENERATE_SCD_HISTOGRAM_H + +#define LBASE 10 + +FFX_GROUPSHARED FfxUInt32 scdBuffer[256 * LBASE]; + +void GenerateSceneChangeDetectionHistogram(FfxInt32x3 iGlobalId, FfxInt32x2 iLocalId, FfxInt32 iLocalIndex, FfxInt32x2 iGroupId, FfxInt32x2 iGroupSize) +{ + FFX_STATIC const FfxUInt32 HistogramsPerDim = 3; + + FfxUInt32 divX = DisplaySize().x / HistogramsPerDim; + FfxUInt32 divY = DisplaySize().y / HistogramsPerDim; + + FfxUInt32 nx = iGlobalId.z % HistogramsPerDim; + FfxUInt32 ny = iGlobalId.z / HistogramsPerDim; + FfxUInt32 startX = divX * nx; + FfxUInt32 startY = divY * ny; + FfxUInt32 stopX = startX + divX; + FfxUInt32 stopY = startY + divY; + + const FfxUInt32 bufferOffset = iLocalIndex * LBASE; + + for (FfxInt32 i = 0; i < LBASE; i++) + { + scdBuffer[bufferOffset + i] = 0; + } + FFX_GROUP_MEMORY_BARRIER; + + FfxInt32x2 coord = FfxInt32x2(startX + (4 * iGlobalId.x), startY + iGlobalId.y); + if (coord.x < stopX) + { + for (; coord.y < stopY; coord.y += 128) + { + FfxUInt32x4 color = FfxUInt32x4( + LoadOpticalFlowInput(coord + FfxInt32x2(0, 0)), + LoadOpticalFlowInput(coord + FfxInt32x2(1, 0)), + LoadOpticalFlowInput(coord + FfxInt32x2(2, 0)), + LoadOpticalFlowInput(coord + FfxInt32x2(3, 0)) + ); + color *= LBASE; + + FfxUInt32 scramblingOffset = iLocalIndex % LBASE; + +#if defined(FFX_HLSL) + InterlockedAdd(scdBuffer[color.x + scramblingOffset], 1); + InterlockedAdd(scdBuffer[color.y + scramblingOffset], 1); + InterlockedAdd(scdBuffer[color.z + scramblingOffset], 1); + InterlockedAdd(scdBuffer[color.w + scramblingOffset], 1); +#elif defined(FFX_GLSL) + atomicAdd(scdBuffer[color.x + scramblingOffset], 1); + atomicAdd(scdBuffer[color.y + scramblingOffset], 1); + atomicAdd(scdBuffer[color.z + scramblingOffset], 1); + atomicAdd(scdBuffer[color.w + scramblingOffset], 1); +#endif + } + } + FFX_GROUP_MEMORY_BARRIER; + + FfxUInt32 value = 0; + for (FfxInt32 i = 0; i < LBASE; i++) + { + value += scdBuffer[bufferOffset + i]; + } + + FfxUInt32 histogramStart = (iGroupSize.x * iGroupSize.y) * iGlobalId.z; + AtomicIncrementSCDHistogram(FfxInt32(histogramStart + iLocalIndex), value); +} + +#endif // FFX_OPTICALFLOW_GENERATE_SCD_HISTOGRAM_H diff --git a/thirdparty/amd-ffx/gpu/opticalflow/ffx_opticalflow_prepare_luma.h b/thirdparty/amd-ffx/gpu/opticalflow/ffx_opticalflow_prepare_luma.h new file mode 100644 index 000000000000..57fd8a74b4ab --- /dev/null +++ b/thirdparty/amd-ffx/gpu/opticalflow/ffx_opticalflow_prepare_luma.h @@ -0,0 +1,106 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifndef FFX_OPTICALFLOW_PREPARE_LUMA_H +#define FFX_OPTICALFLOW_PREPARE_LUMA_H + +FfxFloat32 LuminanceToPerceivedLuminance(FfxFloat32 fLuminance) +{ + FfxFloat32 fPercievedLuminance = 0; + if (fLuminance <= 216.0f / 24389.0f) { + fPercievedLuminance = fLuminance * (24389.0f / 27.0f); + } + else { + fPercievedLuminance = ffxPow(fLuminance, 1.0f / 3.0f) * 116.0f - 16.0f; + } + + return fPercievedLuminance * 0.01f; +} + +FfxFloat32 LinearLdrToLuminance(FfxFloat32x3 linearRec709RGB) +{ + FfxFloat32 fY = 0.2126 * linearRec709RGB.x + 0.7152 * linearRec709RGB.y + 0.0722 * linearRec709RGB.z; + return fY; +} + +FfxFloat32 LinearRec2020ToLuminance(FfxFloat32x3 linearRec2020RGB) +{ + FfxFloat32 fY = 0.2627 * linearRec2020RGB.x + 0.678 * linearRec2020RGB.y + 0.0593 * linearRec2020RGB.z; + return fY; +} + +FfxFloat32 PQCorrectedHdrToLuminance(FfxFloat32x3 pq, FfxFloat32 maxLuminance) +{ + FfxFloat32 fY = LinearRec2020ToLuminance(ffxLinearFromPQ(pq) * (10000.0f / maxLuminance)); + return fY; +} + +FfxFloat32x3 ffxscRGBToLinear(FfxFloat32x3 value, FfxFloat32 minLuminance, FfxFloat32 maxLuminance) +{ + FfxFloat32x3 p = value - ffxBroadcast3(minLuminance / 80.0f); + return p / ffxBroadcast3((maxLuminance - minLuminance) / 80.0f); +} + +FfxFloat32 SCRGBCorrectedHdrToLuminance(FfxFloat32x3 scRGB, FfxFloat32 minLuminance, FfxFloat32 maxLuminance) +{ + FfxFloat32 fY = LinearLdrToLuminance(ffxscRGBToLinear(scRGB, minLuminance, maxLuminance)); + return fY; +} + +void PrepareLuma(FfxInt32x2 iGlobalId, FfxInt32 iLocalIndex) +{ +#define PixelsPerThreadX 2 +#define PixelsPerThreadY 2 +#pragma unroll + for (FfxInt32 y = 0; y < PixelsPerThreadY; y++) + { +#pragma unroll + for (FfxInt32 x = 0; x < PixelsPerThreadX; x++) + { + FfxInt32x2 pos = iGlobalId * FfxInt32x2(PixelsPerThreadX, PixelsPerThreadY) + FfxInt32x2(x, y); + FfxInt32x2 iPxHrPos = pos; + FfxFloat32 fY = 0.0; + + FfxFloat32x3 inputColor = LoadInputColor(iPxHrPos).rgb; + + FfxUInt32 backbufferTransferFunction = BackbufferTransferFunction(); + if (backbufferTransferFunction == 0) + { + fY = LinearLdrToLuminance(inputColor); + } + else if (backbufferTransferFunction == 1) + { + fY = PQCorrectedHdrToLuminance(inputColor, MinMaxLuminance()[1]); + fY = LuminanceToPerceivedLuminance(fY); + } + else if (backbufferTransferFunction == 2) + { + fY = SCRGBCorrectedHdrToLuminance(inputColor, MinMaxLuminance()[0], MinMaxLuminance()[1]); + fY = LuminanceToPerceivedLuminance(fY); + } + + StoreOpticalFlowInput(pos, FfxUInt32(fY * 255)); + } + } +} + +#endif // FFX_OPTICALFLOW_PREPARE_LUMA_H diff --git a/thirdparty/amd-ffx/gpu/opticalflow/ffx_opticalflow_resources.h b/thirdparty/amd-ffx/gpu/opticalflow/ffx_opticalflow_resources.h new file mode 100644 index 000000000000..4676af409f44 --- /dev/null +++ b/thirdparty/amd-ffx/gpu/opticalflow/ffx_opticalflow_resources.h @@ -0,0 +1,74 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifndef FFX_OPTICALFLOW_RESOURCES_H +#define FFX_OPTICALFLOW_RESOURCES_H + +#if defined(FFX_CPU) || defined(FFX_GPU) + +#define FFX_OF_RESOURCE_IDENTIFIER_NULL 0 + +#define FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_1 1 +#define FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_1_LEVEL_1 2 +#define FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_1_LEVEL_2 3 +#define FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_1_LEVEL_3 4 +#define FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_1_LEVEL_4 5 +#define FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_1_LEVEL_5 6 +#define FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_1_LEVEL_6 7 + +#define FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_2 8 +#define FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_2_LEVEL_1 9 +#define FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_2_LEVEL_2 10 +#define FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_2_LEVEL_3 11 +#define FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_2_LEVEL_4 12 +#define FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_2_LEVEL_5 13 +#define FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_INPUT_2_LEVEL_6 14 + +#define FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_1 15 +#define FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_1_LEVEL_1 16 +#define FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_1_LEVEL_2 17 +#define FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_1_LEVEL_3 18 +#define FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_1_LEVEL_4 19 +#define FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_1_LEVEL_5 20 +#define FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_1_LEVEL_6 21 + +#define FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_2 22 +#define FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_2_LEVEL_1 23 +#define FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_2_LEVEL_2 24 +#define FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_2_LEVEL_3 25 +#define FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_2_LEVEL_4 26 +#define FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_2_LEVEL_5 27 +#define FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_2_LEVEL_6 28 + +#define FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_SCD_HISTOGRAM 29 +#define FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_SCD_PREVIOUS_HISTOGRAM 30 +#define FFX_OF_RESOURCE_IDENTIFIER_OPTICAL_FLOW_SCD_TEMP 31 + +#define FFX_OF_RESOURCE_IDENTIFIER_COUNT 32 + +#define FFX_OPTICALFLOW_CONSTANTBUFFER_IDENTIFIER 0 +#define FFX_OPTICALFLOW_CONSTANTBUFFER_IDENTIFIER_SPD 1 +#define FFX_OPTICALFLOW_CONSTANTBUFFER_COUNT 2 + +#endif // #if defined(FFX_CPU) || defined(FFX_GPU) + +#endif //!defined( FFX_OPTICALFLOW_RESOURCES_H ) diff --git a/thirdparty/amd-ffx/gpu/opticalflow/ffx_opticalflow_scale_optical_flow_advanced_v5.h b/thirdparty/amd-ffx/gpu/opticalflow/ffx_opticalflow_scale_optical_flow_advanced_v5.h new file mode 100644 index 000000000000..16238f4e6405 --- /dev/null +++ b/thirdparty/amd-ffx/gpu/opticalflow/ffx_opticalflow_scale_optical_flow_advanced_v5.h @@ -0,0 +1,99 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifndef FFX_OPTICALFLOW_SCALE_OPTICAL_FLOW_ADVANCED_V5_H +#define FFX_OPTICALFLOW_SCALE_OPTICAL_FLOW_ADVANCED_V5_H + +#define WG_WIDTH FFX_OPTICALFLOW_THREAD_GROUP_WIDTH +#define WG_HEIGHT FFX_OPTICALFLOW_THREAD_GROUP_HEIGHT +#define WG_DEPTH FFX_OPTICALFLOW_THREAD_GROUP_DEPTH +FFX_GROUPSHARED FfxInt32x2 nearestVectors[4][WG_HEIGHT][WG_WIDTH]; +FFX_GROUPSHARED FfxUInt32 localRegion[4][WG_HEIGHT][WG_WIDTH]; +FFX_GROUPSHARED FfxUInt32 sads[4][WG_HEIGHT][WG_WIDTH]; + +void ScaleOpticalFlowAdvanced(FfxInt32x3 iGlobalId, FfxInt32x3 iLocalId) +{ + if (IsSceneChanged()) + { + StoreOpticalFlowNextLevel(iGlobalId.xy, FfxInt32x2(0, 0)); + + return; + } + + int xOffset = (iLocalId.z % 2) - 1 + iGlobalId.x % 2; + int yOffset = (iLocalId.z / 2) - 1 + iGlobalId.y % 2; + + FfxInt32x2 srcOFPos = FfxInt32x2( + (iGlobalId.x / 2) + xOffset, + (iGlobalId.y / 2) + yOffset + ); + + FfxInt32x2 nearestVector = LoadOpticalFlow(srcOFPos); + nearestVectors[iLocalId.z][iLocalId.y][iLocalId.x] = nearestVector * 2; + + int maxY = 4; + for (int n = iLocalId.z; n < maxY; n += WG_DEPTH) + { + { + FfxInt32x2 lumaPos = FfxInt32x2((iGlobalId.x) * 4, iGlobalId.y * maxY + n); + FfxUInt32 firstPixel = LoadFirstImagePackedLuma(lumaPos); + localRegion[n][iLocalId.y][iLocalId.x] = firstPixel; + } + } + FFX_GROUP_MEMORY_BARRIER; + + uint sad = 0; + for (int n = 0; n < maxY; n++) + { + { + FfxInt32x2 lumaPos = FfxInt32x2((iGlobalId.x) * 4, (iGlobalId.y * maxY + n)) + nearestVector; + FfxUInt32 secondPixel = LoadSecondImagePackedLuma(lumaPos); + sad += Sad(localRegion[n][iLocalId.y][iLocalId.x], secondPixel); + } + } + sads[iLocalId.z][iLocalId.y][iLocalId.x] = sad; + + FFX_GROUP_MEMORY_BARRIER; + + { + if (iLocalId.z == 0) + { + uint bestSad = 0xffffffff; + uint bestId = 0; + + for (int n = 0; n < 4; n++) + { + if ((sads[n][iLocalId.y][iLocalId.x]) < bestSad) + { + bestSad = sads[n][iLocalId.y][iLocalId.x]; + bestId = n; + } + } + + FfxInt32x2 outputVector = nearestVectors[bestId][iLocalId.y][iLocalId.x]; + + StoreOpticalFlowNextLevel(iGlobalId.xy, outputVector); + } + } +} + +#endif // FFX_OPTICALFLOW_SCALE_OPTICAL_FLOW_ADVANCED_V5_H diff --git a/thirdparty/amd-fsr2/shaders/ffx_spd.h b/thirdparty/amd-ffx/gpu/spd/ffx_spd.h similarity index 80% rename from thirdparty/amd-fsr2/shaders/ffx_spd.h rename to thirdparty/amd-ffx/gpu/spd/ffx_spd.h index 5ce24ec87cc3..c3ee50f02734 100644 --- a/thirdparty/amd-fsr2/shaders/ffx_spd.h +++ b/thirdparty/amd-ffx/gpu/spd/ffx_spd.h @@ -1,13 +1,14 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. -// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// // Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal +// of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell // copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: +// furnished to do so, subject to the following conditions : +// // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // @@ -19,22 +20,41 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. -#ifdef FFX_CPU -FFX_STATIC void SpdSetup(FfxUInt32x2 dispatchThreadGroupCountXY, // CPU side: dispatch thread group count xy - FfxUInt32x2 workGroupOffset, // GPU side: pass in as constant - FfxUInt32x2 numWorkGroupsAndMips, // GPU side: pass in as constant - FfxUInt32x4 rectInfo, // left, top, width, height - FfxInt32 mips) // optional: if -1, calculate based on rect width and height -{ - workGroupOffset[0] = rectInfo[0] / 64; // rectInfo[0] = left - workGroupOffset[1] = rectInfo[1] / 64; // rectInfo[1] = top +/// @defgroup FfxGPUSpd FidelityFX SPD +/// FidelityFX Single Pass Downsampler 2.0 GPU documentation +/// +/// @ingroup FfxGPUEffects + +/// Setup required constant values for SPD (CPU). +/// +/// @param [out] dispatchThreadGroupCountXY CPU side: dispatch thread group count xy. z is number of slices of the input texture +/// @param [out] workGroupOffset GPU side: pass in as constant +/// @param [out] numWorkGroupsAndMips GPU side: pass in as constant +/// @param [in] rectInfo left, top, width, height +/// @param [in] mips optional: if -1, calculate based on rect width and height +/// +/// @ingroup FfxGPUSpd +#if defined(FFX_CPU) +FFX_STATIC void ffxSpdSetup(FfxUInt32x2 dispatchThreadGroupCountXY, + FfxUInt32x2 workGroupOffset, + FfxUInt32x2 numWorkGroupsAndMips, + FfxUInt32x4 rectInfo, + FfxInt32 mips) +{ + // determines the offset of the first tile to downsample based on + // left (rectInfo[0]) and top (rectInfo[1]) of the subregion. + workGroupOffset[0] = rectInfo[0] / 64; + workGroupOffset[1] = rectInfo[1] / 64; FfxUInt32 endIndexX = (rectInfo[0] + rectInfo[2] - 1) / 64; // rectInfo[0] = left, rectInfo[2] = width FfxUInt32 endIndexY = (rectInfo[1] + rectInfo[3] - 1) / 64; // rectInfo[1] = top, rectInfo[3] = height + // we only need to dispatch as many thread groups as tiles we need to downsample + // number of tiles per slice depends on the subregion to downsample dispatchThreadGroupCountXY[0] = endIndexX + 1 - workGroupOffset[0]; dispatchThreadGroupCountXY[1] = endIndexY + 1 - workGroupOffset[1]; + // number of thread groups per slice numWorkGroupsAndMips[0] = (dispatchThreadGroupCountXY[0]) * (dispatchThreadGroupCountXY[1]); if (mips >= 0) @@ -42,29 +62,37 @@ FFX_STATIC void SpdSetup(FfxUInt32x2 dispatchThreadGroupCountXY, // CPU side numWorkGroupsAndMips[1] = FfxUInt32(mips); } else - { + { // calculate based on rect width and height FfxUInt32 resolution = ffxMax(rectInfo[2], rectInfo[3]); numWorkGroupsAndMips[1] = FfxUInt32((ffxMin(floor(log2(FfxFloat32(resolution))), FfxFloat32(12)))); } } -FFX_STATIC void SpdSetup(FfxUInt32x2 dispatchThreadGroupCountXY, // CPU side: dispatch thread group count xy - FfxUInt32x2 workGroupOffset, // GPU side: pass in as constant - FfxUInt32x2 numWorkGroupsAndMips, // GPU side: pass in as constant - FfxUInt32x4 rectInfo) // left, top, width, height +/// Setup required constant values for SPD (CPU). +/// +/// @param [out] dispatchThreadGroupCountXY CPU side: dispatch thread group count xy. z is number of slices of the input texture +/// @param [out] workGroupOffset GPU side: pass in as constant +/// @param [out] numWorkGroupsAndMips GPU side: pass in as constant +/// @param [in] rectInfo left, top, width, height +/// +/// @ingroup FfxGPUSpd +FFX_STATIC void ffxSpdSetup(FfxUInt32x2 dispatchThreadGroupCountXY, + FfxUInt32x2 workGroupOffset, + FfxUInt32x2 numWorkGroupsAndMips, + FfxUInt32x4 rectInfo) { - SpdSetup(dispatchThreadGroupCountXY, workGroupOffset, numWorkGroupsAndMips, rectInfo, -1); + ffxSpdSetup(dispatchThreadGroupCountXY, workGroupOffset, numWorkGroupsAndMips, rectInfo, -1); } -#endif // #ifdef FFX_CPU +#endif // #if defined(FFX_CPU) //============================================================================================================================== // NON-PACKED VERSION //============================================================================================================================== -#ifdef FFX_GPU -#ifdef SPD_PACKED_ONLY -// Avoid compiler error +#if defined(FFX_GPU) +#if defined(FFX_SPD_PACKED_ONLY) +// Avoid compiler errors by including default implementations of these callbacks. FfxFloat32x4 SpdLoadSourceImage(FfxInt32x2 p, FfxUInt32 slice) { return FfxFloat32x4(0.0, 0.0, 0.0, 0.0); @@ -88,21 +116,16 @@ FfxFloat32x4 SpdReduce4(FfxFloat32x4 v0, FfxFloat32x4 v1, FfxFloat32x4 v2, FfxFl { return FfxFloat32x4(0.0, 0.0, 0.0, 0.0); } -#endif // #ifdef SPD_PACKED_ONLY +#endif // #if FFX_SPD_PACKED_ONLY //_____________________________________________________________/\_______________________________________________________________ -#if defined(FFX_GLSL) && !defined(SPD_NO_WAVE_OPERATIONS) +#if defined(FFX_GLSL) && !defined(FFX_SPD_NO_WAVE_OPERATIONS) #extension GL_KHR_shader_subgroup_quad:require #endif -void SpdWorkgroupShuffleBarrier() +void ffxSpdWorkgroupShuffleBarrier() { -#ifdef FFX_GLSL - barrier(); -#endif -#ifdef FFX_HLSL - GroupMemoryBarrierWithGroupSync(); -#endif + FFX_GROUP_MEMORY_BARRIER; } // Only last active workgroup should proceed @@ -114,14 +137,14 @@ bool SpdExitWorkgroup(FfxUInt32 numWorkGroups, FfxUInt32 localInvocationIndex, F SpdIncreaseAtomicCounter(slice); } - SpdWorkgroupShuffleBarrier(); + ffxSpdWorkgroupShuffleBarrier(); return (SpdGetAtomicCounter() != (numWorkGroups - 1)); } // User defined: FfxFloat32x4 SpdReduce4(FfxFloat32x4 v0, FfxFloat32x4 v1, FfxFloat32x4 v2, FfxFloat32x4 v3); FfxFloat32x4 SpdReduceQuad(FfxFloat32x4 v) { -#if defined(FFX_GLSL) && !defined(SPD_NO_WAVE_OPERATIONS) +#if defined(FFX_GLSL) && !defined(FFX_SPD_NO_WAVE_OPERATIONS) FfxFloat32x4 v0 = v; FfxFloat32x4 v1 = subgroupQuadSwapHorizontal(v); @@ -129,14 +152,13 @@ FfxFloat32x4 SpdReduceQuad(FfxFloat32x4 v) FfxFloat32x4 v3 = subgroupQuadSwapDiagonal(v); return SpdReduce4(v0, v1, v2, v3); -#elif defined(FFX_HLSL) && !defined(SPD_NO_WAVE_OPERATIONS) +#elif defined(FFX_HLSL) && !defined(FFX_SPD_NO_WAVE_OPERATIONS) // requires SM6.0 - FfxUInt32 quad = WaveGetLaneIndex() & (~0x3); - FfxFloat32x4 v0 = v; - FfxFloat32x4 v1 = WaveReadLaneAt(v, quad | 1); - FfxFloat32x4 v2 = WaveReadLaneAt(v, quad | 2); - FfxFloat32x4 v3 = WaveReadLaneAt(v, quad | 3); + FfxFloat32x4 v0 = v; + FfxFloat32x4 v1 = QuadReadAcrossX(v); + FfxFloat32x4 v2 = QuadReadAcrossY(v); + FfxFloat32x4 v3 = QuadReadAcrossDiagonal(v); return SpdReduce4(v0, v1, v2, v3); /* // if SM6.0 is not available, you can use the AMD shader intrinsics @@ -199,7 +221,7 @@ FfxFloat32x4 SpdReduceLoadSourceImage4(FfxUInt32x2 i0, FfxUInt32x2 i1, FfxUInt32 FfxFloat32x4 SpdReduceLoadSourceImage(FfxUInt32x2 base, FfxUInt32 slice) { -#ifdef SPD_LINEAR_SAMPLER +#if defined(SPD_LINEAR_SAMPLER) return SpdLoadSourceImage(FfxInt32x2(base), slice); #else return SpdReduceLoadSourceImage4(FfxUInt32x2(base + FfxUInt32x2(0, 0)), FfxUInt32x2(base + FfxUInt32x2(0, 1)), FfxUInt32x2(base + FfxUInt32x2(1, 0)), FfxUInt32x2(base + FfxUInt32x2(1, 1)), slice); @@ -284,13 +306,13 @@ void SpdDownsampleMips_0_1_LDS(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID for (FfxUInt32 i = 0; i < 4; i++) { SpdStoreIntermediate(x, y, v[i]); - SpdWorkgroupShuffleBarrier(); + ffxSpdWorkgroupShuffleBarrier(); if (localInvocationIndex < 64) { v[i] = SpdReduceIntermediate(FfxUInt32x2(x * 2 + 0, y * 2 + 0), FfxUInt32x2(x * 2 + 1, y * 2 + 0), FfxUInt32x2(x * 2 + 0, y * 2 + 1), FfxUInt32x2(x * 2 + 1, y * 2 + 1)); SpdStore(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x + (i % 2) * 8, y + (i / 2) * 8), v[i], 1, slice); } - SpdWorkgroupShuffleBarrier(); + ffxSpdWorkgroupShuffleBarrier(); } if (localInvocationIndex < 64) @@ -304,7 +326,7 @@ void SpdDownsampleMips_0_1_LDS(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID void SpdDownsampleMips_0_1(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice) { -#ifdef SPD_NO_WAVE_OPERATIONS +#if defined(FFX_SPD_NO_WAVE_OPERATIONS) SpdDownsampleMips_0_1_LDS(x, y, workGroupID, localInvocationIndex, mip, slice); #else SpdDownsampleMips_0_1_Intrinsics(x, y, workGroupID, localInvocationIndex, mip, slice); @@ -314,7 +336,7 @@ void SpdDownsampleMips_0_1(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, Ff void SpdDownsampleMip_2(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice) { -#ifdef SPD_NO_WAVE_OPERATIONS +#if defined(FFX_SPD_NO_WAVE_OPERATIONS) if (localInvocationIndex < 64) { FfxFloat32x4 v = SpdReduceIntermediate(FfxUInt32x2(x * 2 + 0, y * 2 + 0), FfxUInt32x2(x * 2 + 1, y * 2 + 0), FfxUInt32x2(x * 2 + 0, y * 2 + 1), FfxUInt32x2(x * 2 + 1, y * 2 + 1)); @@ -343,7 +365,7 @@ void SpdDownsampleMip_2(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUI void SpdDownsampleMip_3(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice) { -#ifdef SPD_NO_WAVE_OPERATIONS +#if defined(FFX_SPD_NO_WAVE_OPERATIONS) if (localInvocationIndex < 16) { // x 0 x 0 @@ -383,7 +405,7 @@ void SpdDownsampleMip_3(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUI void SpdDownsampleMip_4(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice) { -#ifdef SPD_NO_WAVE_OPERATIONS +#if defined(FFX_SPD_NO_WAVE_OPERATIONS) if (localInvocationIndex < 4) { // x 0 0 0 x 0 0 0 @@ -416,7 +438,7 @@ void SpdDownsampleMip_4(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUI void SpdDownsampleMip_5(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice) { -#ifdef SPD_NO_WAVE_OPERATIONS +#if defined(FFX_SPD_NO_WAVE_OPERATIONS) if (localInvocationIndex < 1) { // x x x x 0 ... @@ -473,48 +495,75 @@ void SpdDownsampleNextFour(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, Ff { if (mips <= baseMip) return; - SpdWorkgroupShuffleBarrier(); + ffxSpdWorkgroupShuffleBarrier(); SpdDownsampleMip_2(x, y, workGroupID, localInvocationIndex, baseMip, slice); if (mips <= baseMip + 1) return; - SpdWorkgroupShuffleBarrier(); + ffxSpdWorkgroupShuffleBarrier(); SpdDownsampleMip_3(x, y, workGroupID, localInvocationIndex, baseMip + 1, slice); if (mips <= baseMip + 2) return; - SpdWorkgroupShuffleBarrier(); + ffxSpdWorkgroupShuffleBarrier(); SpdDownsampleMip_4(x, y, workGroupID, localInvocationIndex, baseMip + 2, slice); if (mips <= baseMip + 3) return; - SpdWorkgroupShuffleBarrier(); + ffxSpdWorkgroupShuffleBarrier(); SpdDownsampleMip_5(workGroupID, localInvocationIndex, baseMip + 3, slice); } +/// Downsamples a 64x64 tile based on the work group id. +/// If after downsampling it's the last active thread group, computes the remaining MIP levels. +/// +/// @param [in] workGroupID index of the work group / thread group +/// @param [in] localInvocationIndex index of the thread within the thread group in 1D +/// @param [in] mips the number of total MIP levels to compute for the input texture +/// @param [in] numWorkGroups the total number of dispatched work groups / thread groups for this slice +/// @param [in] slice the slice of the input texture +/// +/// @ingroup FfxGPUSpd void SpdDownsample(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 numWorkGroups, FfxUInt32 slice) { + // compute MIP level 0 and 1 FfxUInt32x2 sub_xy = ffxRemapForWaveReduction(localInvocationIndex % 64); FfxUInt32 x = sub_xy.x + 8 * ((localInvocationIndex >> 6) % 2); FfxUInt32 y = sub_xy.y + 8 * ((localInvocationIndex >> 7)); SpdDownsampleMips_0_1(x, y, workGroupID, localInvocationIndex, mips, slice); + // compute MIP level 2, 3, 4, 5 SpdDownsampleNextFour(x, y, workGroupID, localInvocationIndex, 2, mips, slice); if (mips <= 6) return; + // increase the global atomic counter for the given slice and check if it's the last remaining thread group: + // terminate if not, continue if yes. if (SpdExitWorkgroup(numWorkGroups, localInvocationIndex, slice)) return; + // reset the global atomic counter back to 0 for the next spd dispatch SpdResetAtomicCounter(slice); - // After mip 6 there is only a single workgroup left that downsamples the remaining up to 64x64 texels. + // After mip 5 there is only a single workgroup left that downsamples the remaining up to 64x64 texels. + // compute MIP level 6 and 7 SpdDownsampleMips_6_7(x, y, mips, slice); + // compute MIP level 8, 9, 10, 11 SpdDownsampleNextFour(x, y, FfxUInt32x2(0, 0), localInvocationIndex, 8, mips, slice); } - +/// Downsamples a 64x64 tile based on the work group id and work group offset. +/// If after downsampling it's the last active thread group, computes the remaining MIP levels. +/// +/// @param [in] workGroupID index of the work group / thread group +/// @param [in] localInvocationIndex index of the thread within the thread group in 1D +/// @param [in] mips the number of total MIP levels to compute for the input texture +/// @param [in] numWorkGroups the total number of dispatched work groups / thread groups for this slice +/// @param [in] slice the slice of the input texture +/// @param [in] workGroupOffset the work group offset. it's (0,0) in case the entire input texture is downsampled. +/// +/// @ingroup FfxGPUSpd void SpdDownsample(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 numWorkGroups, FfxUInt32 slice, FfxUInt32x2 workGroupOffset) { SpdDownsample(workGroupID + workGroupOffset, localInvocationIndex, mips, numWorkGroups, slice); @@ -529,25 +578,24 @@ void SpdDownsample(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxU #if FFX_HALF -#ifdef FFX_GLSL +#if defined(FFX_GLSL) #extension GL_EXT_shader_subgroup_extended_types_float16:require #endif FfxFloat16x4 SpdReduceQuadH(FfxFloat16x4 v) { -#if defined(FFX_GLSL) && !defined(SPD_NO_WAVE_OPERATIONS) +#if defined(FFX_GLSL) && !defined(FFX_SPD_NO_WAVE_OPERATIONS) FfxFloat16x4 v0 = v; FfxFloat16x4 v1 = subgroupQuadSwapHorizontal(v); FfxFloat16x4 v2 = subgroupQuadSwapVertical(v); FfxFloat16x4 v3 = subgroupQuadSwapDiagonal(v); return SpdReduce4H(v0, v1, v2, v3); -#elif defined(FFX_HLSL) && !defined(SPD_NO_WAVE_OPERATIONS) +#elif defined(FFX_HLSL) && !defined(FFX_SPD_NO_WAVE_OPERATIONS) // requires SM6.0 - FfxUInt32 quad = WaveGetLaneIndex() & (~0x3); - FfxFloat16x4 v0 = v; - FfxFloat16x4 v1 = WaveReadLaneAt(v, quad | 1); - FfxFloat16x4 v2 = WaveReadLaneAt(v, quad | 2); - FfxFloat16x4 v3 = WaveReadLaneAt(v, quad | 3); + FfxFloat16x4 v0 = v; + FfxFloat16x4 v1 = QuadReadAcrossX(v); + FfxFloat16x4 v2 = QuadReadAcrossY(v); + FfxFloat16x4 v3 = QuadReadAcrossDiagonal(v); return SpdReduce4H(v0, v1, v2, v3); /* // if SM6.0 is not available, you can use the AMD shader intrinsics @@ -610,7 +658,7 @@ FfxFloat16x4 SpdReduceLoadSourceImage4H(FfxUInt32x2 i0, FfxUInt32x2 i1, FfxUInt3 FfxFloat16x4 SpdReduceLoadSourceImageH(FfxUInt32x2 base, FfxUInt32 slice) { -#ifdef SPD_LINEAR_SAMPLER +#if defined(SPD_LINEAR_SAMPLER) return SpdLoadSourceImageH(FfxInt32x2(base), slice); #else return SpdReduceLoadSourceImage4H(FfxUInt32x2(base + FfxUInt32x2(0, 0)), FfxUInt32x2(base + FfxUInt32x2(0, 1)), FfxUInt32x2(base + FfxUInt32x2(1, 0)), FfxUInt32x2(base + FfxUInt32x2(1, 1)), slice); @@ -695,13 +743,13 @@ void SpdDownsampleMips_0_1_LDSH(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupI for (FfxInt32 i = 0; i < 4; i++) { SpdStoreIntermediateH(x, y, v[i]); - SpdWorkgroupShuffleBarrier(); + ffxSpdWorkgroupShuffleBarrier(); if (localInvocationIndex < 64) { v[i] = SpdReduceIntermediateH(FfxUInt32x2(x * 2 + 0, y * 2 + 0), FfxUInt32x2(x * 2 + 1, y * 2 + 0), FfxUInt32x2(x * 2 + 0, y * 2 + 1), FfxUInt32x2(x * 2 + 1, y * 2 + 1)); SpdStoreH(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x + (i % 2) * 8, y + (i / 2) * 8), v[i], 1, slice); } - SpdWorkgroupShuffleBarrier(); + ffxSpdWorkgroupShuffleBarrier(); } if (localInvocationIndex < 64) @@ -715,7 +763,7 @@ void SpdDownsampleMips_0_1_LDSH(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupI void SpdDownsampleMips_0_1H(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 slice) { -#ifdef SPD_NO_WAVE_OPERATIONS +#if defined(FFX_SPD_NO_WAVE_OPERATIONS) SpdDownsampleMips_0_1_LDSH(x, y, workGroupID, localInvocationIndex, mips, slice); #else SpdDownsampleMips_0_1_IntrinsicsH(x, y, workGroupID, localInvocationIndex, mips, slice); @@ -725,7 +773,7 @@ void SpdDownsampleMips_0_1H(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, F void SpdDownsampleMip_2H(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice) { -#ifdef SPD_NO_WAVE_OPERATIONS +#if defined(FFX_SPD_NO_WAVE_OPERATIONS) if (localInvocationIndex < 64) { FfxFloat16x4 v = SpdReduceIntermediateH(FfxUInt32x2(x * 2 + 0, y * 2 + 0), FfxUInt32x2(x * 2 + 1, y * 2 + 0), FfxUInt32x2(x * 2 + 0, y * 2 + 1), FfxUInt32x2(x * 2 + 1, y * 2 + 1)); @@ -754,7 +802,7 @@ void SpdDownsampleMip_2H(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxU void SpdDownsampleMip_3H(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice) { -#ifdef SPD_NO_WAVE_OPERATIONS +#if defined(FFX_SPD_NO_WAVE_OPERATIONS) if (localInvocationIndex < 16) { // x 0 x 0 @@ -794,7 +842,7 @@ void SpdDownsampleMip_3H(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxU void SpdDownsampleMip_4H(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice) { -#ifdef SPD_NO_WAVE_OPERATIONS +#if defined(FFX_SPD_NO_WAVE_OPERATIONS) if (localInvocationIndex < 4) { // x 0 0 0 x 0 0 0 @@ -827,7 +875,7 @@ void SpdDownsampleMip_4H(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxU void SpdDownsampleMip_5H(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice) { -#ifdef SPD_NO_WAVE_OPERATIONS +#if defined(FFX_SPD_NO_WAVE_OPERATIONS) if (localInvocationIndex < 1) { // x x x x 0 ... @@ -884,53 +932,83 @@ void SpdDownsampleNextFourH(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, F { if (mips <= baseMip) return; - SpdWorkgroupShuffleBarrier(); + ffxSpdWorkgroupShuffleBarrier(); SpdDownsampleMip_2H(x, y, workGroupID, localInvocationIndex, baseMip, slice); if (mips <= baseMip + 1) return; - SpdWorkgroupShuffleBarrier(); + ffxSpdWorkgroupShuffleBarrier(); SpdDownsampleMip_3H(x, y, workGroupID, localInvocationIndex, baseMip + 1, slice); if (mips <= baseMip + 2) return; - SpdWorkgroupShuffleBarrier(); + ffxSpdWorkgroupShuffleBarrier(); SpdDownsampleMip_4H(x, y, workGroupID, localInvocationIndex, baseMip + 2, slice); if (mips <= baseMip + 3) return; - SpdWorkgroupShuffleBarrier(); + ffxSpdWorkgroupShuffleBarrier(); SpdDownsampleMip_5H(workGroupID, localInvocationIndex, baseMip + 3, slice); } +/// Downsamples a 64x64 tile based on the work group id and work group offset. +/// If after downsampling it's the last active thread group, computes the remaining MIP levels. +/// Uses half types. +/// +/// @param [in] workGroupID index of the work group / thread group +/// @param [in] localInvocationIndex index of the thread within the thread group in 1D +/// @param [in] mips the number of total MIP levels to compute for the input texture +/// @param [in] numWorkGroups the total number of dispatched work groups / thread groups for this slice +/// @param [in] slice the slice of the input texture +/// +/// @ingroup FfxGPUSpd void SpdDownsampleH(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 numWorkGroups, FfxUInt32 slice) { FfxUInt32x2 sub_xy = ffxRemapForWaveReduction(localInvocationIndex % 64); FfxUInt32 x = sub_xy.x + 8 * ((localInvocationIndex >> 6) % 2); FfxUInt32 y = sub_xy.y + 8 * ((localInvocationIndex >> 7)); + // compute MIP level 0 and 1 SpdDownsampleMips_0_1H(x, y, workGroupID, localInvocationIndex, mips, slice); + // compute MIP level 2, 3, 4, 5 SpdDownsampleNextFourH(x, y, workGroupID, localInvocationIndex, 2, mips, slice); if (mips < 7) return; + // increase the global atomic counter for the given slice and check if it's the last remaining thread group: + // terminate if not, continue if yes. if (SpdExitWorkgroup(numWorkGroups, localInvocationIndex, slice)) return; + // reset the global atomic counter back to 0 for the next spd dispatch SpdResetAtomicCounter(slice); - // After mip 6 there is only a single workgroup left that downsamples the remaining up to 64x64 texels. + // After mip 5 there is only a single workgroup left that downsamples the remaining up to 64x64 texels. + // compute MIP level 6 and 7 SpdDownsampleMips_6_7H(x, y, mips, slice); + // compute MIP level 8, 9, 10, 11 SpdDownsampleNextFourH(x, y, FfxUInt32x2(0, 0), localInvocationIndex, 8, mips, slice); } +/// Downsamples a 64x64 tile based on the work group id and work group offset. +/// If after downsampling it's the last active thread group, computes the remaining MIP levels. +/// Uses half types. +/// +/// @param [in] workGroupID index of the work group / thread group +/// @param [in] localInvocationIndex index of the thread within the thread group in 1D +/// @param [in] mips the number of total MIP levels to compute for the input texture +/// @param [in] numWorkGroups the total number of dispatched work groups / thread groups for this slice +/// @param [in] slice the slice of the input texture +/// @param [in] workGroupOffset the work group offset. it's (0,0) in case the entire input texture is downsampled. +/// +/// @ingroup FfxGPUSpd void SpdDownsampleH(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 numWorkGroups, FfxUInt32 slice, FfxUInt32x2 workGroupOffset) { SpdDownsampleH(workGroupID + workGroupOffset, localInvocationIndex, mips, numWorkGroups, slice); } #endif // #if FFX_HALF -#endif // #ifdef FFX_GPU +#endif // #if defined(FFX_GPU) diff --git a/thirdparty/amd-ffx/gpu/spd/ffx_spd_callbacks_glsl.h b/thirdparty/amd-ffx/gpu/spd/ffx_spd_callbacks_glsl.h new file mode 100644 index 000000000000..55244eb42c77 --- /dev/null +++ b/thirdparty/amd-ffx/gpu/spd/ffx_spd_callbacks_glsl.h @@ -0,0 +1,181 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "ffx_spd_resources.h" + +#if defined(FFX_GPU) +#include "ffx_core.h" + +#ifndef FFX_PREFER_WAVE64 +#define FFX_PREFER_WAVE64 +#endif // #ifndef FFX_PREFER_WAVE64 + +#if defined(FFX_SPD_BIND_CB_SPD) + layout (set = 0, binding = FFX_SPD_BIND_CB_SPD, std140) uniform cbFSR1_t + { + FfxUInt32 mips; + FfxUInt32 numWorkGroups; + FfxUInt32x2 workGroupOffset; + FfxFloat32x2 invInputSize; // Only used for linear sampling mode + FfxFloat32x2 padding; + } cbFSR1; +#endif + + +FfxUInt32 Mips() +{ + return cbFSR1.mips; +} + +FfxUInt32 NumWorkGroups() +{ + return cbFSR1.numWorkGroups; +} + +FfxUInt32x2 WorkGroupOffset() +{ + return cbFSR1.workGroupOffset; +} + +FfxFloat32x2 InvInputSize() +{ + return cbFSR1.invInputSize; +} + +layout (set = 0, binding = 1000) uniform sampler s_LinearClamp; + +// SRVs +#if defined FFX_SPD_BIND_SRV_INPUT_DOWNSAMPLE_SRC + layout (set = 0, binding = FFX_SPD_BIND_SRV_INPUT_DOWNSAMPLE_SRC) uniform texture2DArray r_input_downsample_src; +#endif + +// UAV declarations +#if defined FFX_SPD_BIND_UAV_INTERNAL_GLOBAL_ATOMIC + layout (set = 0, binding = FFX_SPD_BIND_UAV_INTERNAL_GLOBAL_ATOMIC, std430) coherent buffer rw_internal_global_atomic_t + { + FfxUInt32 counter[6]; + } rw_internal_global_atomic; +#endif + +#if defined FFX_SPD_BIND_UAV_INPUT_DOWNSAMPLE_SRC_MID_MIPMAP + layout (set = 0, binding = FFX_SPD_BIND_UAV_INPUT_DOWNSAMPLE_SRC_MID_MIPMAP, rgba32f) coherent uniform image2DArray rw_input_downsample_src_mid_mip; +#endif + +#if defined FFX_SPD_BIND_UAV_INPUT_DOWNSAMPLE_SRC_MIPS + layout (set = 0, binding = FFX_SPD_BIND_UAV_INPUT_DOWNSAMPLE_SRC_MIPS, rgba32f) uniform image2DArray rw_input_downsample_src_mips[SPD_MAX_MIP_LEVELS+1]; +#endif + +#if FFX_HALF + +#if defined(FFX_SPD_BIND_SRV_INPUT_DOWNSAMPLE_SRC) + FfxFloat16x4 SampleSrcImageH(FfxFloat32x2 uv, FfxUInt32 slice) + { + FfxFloat32x2 textureCoord = FfxFloat32x2(uv) * InvInputSize() + InvInputSize(); + FfxFloat32x4 result = textureLod(sampler2DArray(r_input_downsample_src, s_LinearClamp), FfxFloat32x3(textureCoord, slice), 0); + return FfxFloat16x4(ffxSrgbFromLinear(result.x), ffxSrgbFromLinear(result.y), ffxSrgbFromLinear(result.z), result.w); + } +#endif // defined(FFX_SPD_BIND_SRV_INPUT_DOWNSAMPLE_SRC) + +#if defined(FFX_SPD_BIND_UAV_INPUT_DOWNSAMPLE_SRC_MIPS) + FfxFloat16x4 LoadSrcImageH(FfxFloat32x2 uv, FfxUInt32 slice) + { + return FfxFloat16x4(imageLoad(rw_input_downsample_src_mips[0], FfxInt32x3(uv, slice))); + } +#endif // defined(FFX_SPD_BIND_UAV_INPUT_DOWNSAMPLE_SRC_MIPS) + +#if defined(FFX_SPD_BIND_UAV_INPUT_DOWNSAMPLE_SRC_MIPS) + void StoreSrcMipH(FfxFloat16x4 value, FfxInt32x2 uv, FfxUInt32 slice, FfxUInt32 mip) + { + imageStore(rw_input_downsample_src_mips[mip], FfxInt32x3(uv, slice), FfxFloat32x4(value)); + } +#endif // defined(FFX_SPD_BIND_UAV_INPUT_DOWNSAMPLE_SRC_MIPS) + +#if defined(FFX_SPD_BIND_UAV_INPUT_DOWNSAMPLE_SRC_MID_MIPMAP) + FfxFloat16x4 LoadMidMipH(FfxInt32x2 uv, FfxUInt32 slice) + { + return FfxFloat16x4(imageLoad(rw_input_downsample_src_mid_mip, FfxInt32x3(uv, slice))); + } +#endif // defined(FFX_SPD_BIND_UAV_INPUT_DOWNSAMPLE_SRC_MID_MIPMAP) + +#if defined(FFX_SPD_BIND_UAV_INPUT_DOWNSAMPLE_SRC_MID_MIPMAP) + void StoreMidMipH(FfxFloat16x4 value, FfxInt32x2 uv, FfxUInt32 slice) + { + imageStore(rw_input_downsample_src_mid_mip, FfxInt32x3(uv, slice), FfxFloat32x4(value));\ + } +#endif // defined(FFX_SPD_BIND_UAV_INPUT_DOWNSAMPLE_SRC_MID_MIPMAP) + +#else // FFX_HALF + +#if defined(FFX_SPD_BIND_SRV_INPUT_DOWNSAMPLE_SRC) + FfxFloat32x4 SampleSrcImage(FfxInt32x2 uv, FfxUInt32 slice) + { + FfxFloat32x2 textureCoord = FfxFloat32x2(uv) * InvInputSize() + InvInputSize(); + FfxFloat32x4 result = textureLod(sampler2DArray(r_input_downsample_src, s_LinearClamp), FfxFloat32x3(textureCoord, slice), 0); + return FfxFloat32x4(ffxSrgbFromLinear(result.x), ffxSrgbFromLinear(result.y), ffxSrgbFromLinear(result.z), result.w); + } +#endif // defined(FFX_SPD_BIND_SRV_INPUT_DOWNSAMPLE_SRC) + +#if defined(FFX_SPD_BIND_UAV_INPUT_DOWNSAMPLE_SRC_MIPS) + FfxFloat32x4 LoadSrcImage(FfxInt32x2 uv, FfxUInt32 slice) + { + return imageLoad(rw_input_downsample_src_mips[0], FfxInt32x3(uv, slice)); + } +#endif // defined(FFX_SPD_BIND_UAV_INPUT_DOWNSAMPLE_SRC_MIPS) + +#if defined(FFX_SPD_BIND_UAV_INPUT_DOWNSAMPLE_SRC_MIPS) + void StoreSrcMip(FfxFloat32x4 value, FfxInt32x2 uv, FfxUInt32 slice, FfxUInt32 mip) + { + imageStore(rw_input_downsample_src_mips[mip], FfxInt32x3(uv, slice), value); + } +#endif // defined(FFX_SPD_BIND_UAV_INPUT_DOWNSAMPLE_SRC_MIPS) + +#if defined(FFX_SPD_BIND_UAV_INPUT_DOWNSAMPLE_SRC_MID_MIPMAP) + FfxFloat32x4 LoadMidMip(FfxInt32x2 uv, FfxUInt32 slice) + { + return imageLoad(rw_input_downsample_src_mid_mip, FfxInt32x3(uv, slice)); + } +#endif // defined(FFX_SPD_BIND_UAV_INPUT_DOWNSAMPLE_SRC_MID_MIPMAP) + +#if defined(FFX_SPD_BIND_UAV_INPUT_DOWNSAMPLE_SRC_MID_MIPMAP) + void StoreMidMip(FfxFloat32x4 value, FfxInt32x2 uv, FfxUInt32 slice) + { + imageStore(rw_input_downsample_src_mid_mip, FfxInt32x3(uv, slice), value); + } +#endif // defined(FFX_SPD_BIND_UAV_INPUT_DOWNSAMPLE_SRC_MID_MIPMAP) + +#endif // FFX_HALF + +#if defined(FFX_SPD_BIND_UAV_INTERNAL_GLOBAL_ATOMIC) +void IncreaseAtomicCounter(FFX_PARAMETER_IN FfxUInt32 slice, FFX_PARAMETER_INOUT FfxUInt32 counter) +{ + counter = atomicAdd(rw_internal_global_atomic.counter[slice], 1); +} +#endif // defined(FFX_SPD_BIND_UAV_INTERNAL_GLOBAL_ATOMIC) + +#if defined(FFX_SPD_BIND_UAV_INTERNAL_GLOBAL_ATOMIC) +void ResetAtomicCounter(FFX_PARAMETER_IN FfxUInt32 slice) +{ + rw_internal_global_atomic.counter[slice] = 0; +} +#endif // defined(FFX_SPD_BIND_UAV_INTERNAL_GLOBAL_ATOMIC) + +#endif // #if defined(FFX_GPU) diff --git a/thirdparty/amd-ffx/gpu/spd/ffx_spd_callbacks_hlsl.h b/thirdparty/amd-ffx/gpu/spd/ffx_spd_callbacks_hlsl.h new file mode 100644 index 000000000000..d86f66ffbec2 --- /dev/null +++ b/thirdparty/amd-ffx/gpu/spd/ffx_spd_callbacks_hlsl.h @@ -0,0 +1,218 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "ffx_spd_resources.h" + +#if defined(FFX_GPU) +#ifdef __hlsl_dx_compiler +#pragma dxc diagnostic push +#pragma dxc diagnostic ignored "-Wambig-lit-shift" +#endif //__hlsl_dx_compiler +#include "ffx_core.h" +#ifdef __hlsl_dx_compiler +#pragma dxc diagnostic pop +#endif //__hlsl_dx_compiler + +#ifndef FFX_PREFER_WAVE64 +#define FFX_PREFER_WAVE64 +#endif // #ifndef FFX_PREFER_WAVE64 + +#pragma warning(disable: 3205) // conversion from larger type to smaller + +#define FFX_DECLARE_SRV_REGISTER(regIndex) t##regIndex +#define FFX_DECLARE_UAV_REGISTER(regIndex) u##regIndex +#define FFX_DECLARE_CB_REGISTER(regIndex) b##regIndex +#define FFX_SPD_DECLARE_SRV(regIndex) register(FFX_DECLARE_SRV_REGISTER(regIndex)) +#define FFX_SPD_DECLARE_UAV(regIndex) register(FFX_DECLARE_UAV_REGISTER(regIndex)) +#define FFX_SPD_DECLARE_CB(regIndex) register(FFX_DECLARE_CB_REGISTER(regIndex)) + +#if defined(FFX_SPD_BIND_CB_SPD) + cbuffer cbSPD : FFX_SPD_DECLARE_CB(FFX_SPD_BIND_CB_SPD) + { + FfxUInt32 mips; + FfxUInt32 numWorkGroups; + FfxUInt32x2 workGroupOffset; + FfxFloat32x2 invInputSize; // Only used for linear sampling mode + FfxFloat32x2 padding; + + #define FFX_SPD_CONSTANT_BUFFER_1_SIZE 8 // Number of 32-bit values. This must be kept in sync with the cbSPD size. + }; +#else + #define mips 0 + #define numWorkGroups 0 + #define workGroupOffset 0 + #define invInputSize 0 + #define padding 0 +#endif + +#define FFX_SPD_ROOTSIG_STRINGIFY(p) FFX_SPD_ROOTSIG_STR(p) +#define FFX_SPD_ROOTSIG_STR(p) #p +#define FFX_SPD_ROOTSIG [RootSignature( "DescriptorTable(UAV(u0, numDescriptors = " FFX_SPD_ROOTSIG_STRINGIFY(FFX_SPD_RESOURCE_IDENTIFIER_COUNT) ")), " \ + "DescriptorTable(SRV(t0, numDescriptors = " FFX_SPD_ROOTSIG_STRINGIFY(FFX_SPD_RESOURCE_IDENTIFIER_COUNT) ")), " \ + "CBV(b0), " \ + "StaticSampler(s0, filter = FILTER_MIN_MAG_LINEAR_MIP_POINT, " \ + "addressU = TEXTURE_ADDRESS_CLAMP, " \ + "addressV = TEXTURE_ADDRESS_CLAMP, " \ + "addressW = TEXTURE_ADDRESS_CLAMP, " \ + "comparisonFunc = COMPARISON_NEVER, " \ + "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK)" )] + +#if defined(FFX_SPD_EMBED_ROOTSIG) +#define FFX_SPD_EMBED_ROOTSIG_CONTENT FFX_SPD_ROOTSIG +#else +#define FFX_SPD_EMBED_ROOTSIG_CONTENT +#endif // #if FFX_SPD_EMBED_ROOTSIG + +FfxUInt32 Mips() +{ + return mips; +} + +FfxUInt32 NumWorkGroups() +{ + return numWorkGroups; +} + +FfxUInt32x2 WorkGroupOffset() +{ + return workGroupOffset; +} + +FfxFloat32x2 InvInputSize() +{ + return invInputSize; +} + +SamplerState s_LinearClamp : register(s0); + + // SRVs + #if defined(FFX_SPD_BIND_SRV_INPUT_DOWNSAMPLE_SRC) + Texture2DArray r_input_downsample_src : FFX_SPD_DECLARE_SRV(FFX_SPD_BIND_SRV_INPUT_DOWNSAMPLE_SRC); + #endif + + // UAV declarations + #if defined(FFX_SPD_BIND_UAV_INTERNAL_GLOBAL_ATOMIC) + struct SpdGlobalAtomicBuffer { FfxUInt32 counter[6]; }; + globallycoherent RWStructuredBuffer rw_internal_global_atomic : FFX_SPD_DECLARE_UAV(FFX_SPD_BIND_UAV_INTERNAL_GLOBAL_ATOMIC); + #endif + #if defined(FFX_SPD_BIND_UAV_INPUT_DOWNSAMPLE_SRC_MID_MIPMAP) + globallycoherent RWTexture2DArray rw_input_downsample_src_mid_mip : FFX_SPD_DECLARE_UAV(FFX_SPD_BIND_UAV_INPUT_DOWNSAMPLE_SRC_MID_MIPMAP); + #endif + #if defined(FFX_SPD_BIND_UAV_INPUT_DOWNSAMPLE_SRC_MIPS) + RWTexture2DArray rw_input_downsample_src_mips[SPD_MAX_MIP_LEVELS+1] : FFX_SPD_DECLARE_UAV(FFX_SPD_BIND_UAV_INPUT_DOWNSAMPLE_SRC_MIPS); + #endif + +#if FFX_HALF + +#if defined(FFX_SPD_BIND_SRV_INPUT_DOWNSAMPLE_SRC) + FfxFloat16x4 SampleSrcImageH(FfxFloat32x2 uv, FfxUInt32 slice) + { + FfxFloat32x2 textureCoord = FfxFloat32x2(uv) * InvInputSize() + InvInputSize(); + FfxFloat32x4 result = r_input_downsample_src.SampleLevel(s_LinearClamp, FfxFloat32x3(textureCoord, slice), 0); + return FfxFloat16x4(ffxSrgbFromLinear(result.x), ffxSrgbFromLinear(result.y), ffxSrgbFromLinear(result.z), result.w); + } + #endif // defined(FFX_SPD_BIND_SRV_INPUT_DOWNSAMPLE_SRC) + +#if defined(FFX_SPD_BIND_UAV_INPUT_DOWNSAMPLE_SRC_MIPS) + FfxFloat16x4 LoadSrcImageH(FfxFloat32x2 uv, FfxUInt32 slice) + { + return FfxFloat16x4(rw_input_downsample_src_mips[0][FfxUInt32x3(uv, slice)]); + } +#endif // defined(FFX_SPD_BIND_UAV_INPUT_DOWNSAMPLE_SRC_MIPS) + +#if defined(FFX_SPD_BIND_UAV_INPUT_DOWNSAMPLE_SRC_MIPS) + void StoreSrcMipH(FfxFloat16x4 value, FfxInt32x2 uv, FfxUInt32 slice, FfxUInt32 mip) + { + rw_input_downsample_src_mips[mip][FfxUInt32x3(uv, slice)] = FfxFloat32x4(value); + } +#endif // defined(FFX_SPD_BIND_UAV_INPUT_DOWNSAMPLE_SRC_MIPS) + +#if defined(FFX_SPD_BIND_UAV_INPUT_DOWNSAMPLE_SRC_MID_MIPMAP) + FfxFloat16x4 LoadMidMipH(FfxInt32x2 uv, FfxUInt32 slice) + { + return FfxFloat16x4(rw_input_downsample_src_mid_mip[FfxUInt32x3(uv, slice)]); + } +#endif // defined(FFX_SPD_BIND_UAV_INPUT_DOWNSAMPLE_SRC_MID_MIPMAP) + +#if defined(FFX_SPD_BIND_UAV_INPUT_DOWNSAMPLE_SRC_MID_MIPMAP) + void StoreMidMipH(FfxFloat16x4 value, FfxInt32x2 uv, FfxUInt32 slice) + { + rw_input_downsample_src_mid_mip[FfxUInt32x3(uv, slice)] = FfxFloat32x4(value); + } +#endif // defined(FFX_SPD_BIND_UAV_INPUT_DOWNSAMPLE_SRC_MID_MIPMAP) + +#else // FFX_HALF + +#if defined(FFX_SPD_BIND_SRV_INPUT_DOWNSAMPLE_SRC) + FfxFloat32x4 SampleSrcImage(FfxInt32x2 uv, FfxUInt32 slice) + { + FfxFloat32x2 textureCoord = FfxFloat32x2(uv) * InvInputSize() + InvInputSize(); + FfxFloat32x4 result = r_input_downsample_src.SampleLevel(s_LinearClamp, FfxFloat32x3(textureCoord, slice), 0); + return FfxFloat32x4(ffxSrgbFromLinear(result.x), ffxSrgbFromLinear(result.y), ffxSrgbFromLinear(result.z), result.w); + } +#endif // defined(FFX_SPD_BIND_SRV_INPUT_DOWNSAMPLE_SRC) + +#if defined(FFX_SPD_BIND_UAV_INPUT_DOWNSAMPLE_SRC_MIPS) + FfxFloat32x4 LoadSrcImage(FfxInt32x2 uv, FfxUInt32 slice) + { + return rw_input_downsample_src_mips[0][FfxUInt32x3(uv, slice)]; + } +#endif // defined(FFX_SPD_BIND_UAV_INPUT_DOWNSAMPLE_SRC_MIPS) + +#if defined(FFX_SPD_BIND_UAV_INPUT_DOWNSAMPLE_SRC_MIPS) + void StoreSrcMip(FfxFloat32x4 value, FfxInt32x2 uv, FfxUInt32 slice, FfxUInt32 mip) + { + rw_input_downsample_src_mips[mip][FfxUInt32x3(uv, slice)] = value; + } +#endif // defined(FFX_SPD_BIND_UAV_INPUT_DOWNSAMPLE_SRC_MIPS) + +#if defined(FFX_SPD_BIND_UAV_INPUT_DOWNSAMPLE_SRC_MID_MIPMAP) + FfxFloat32x4 LoadMidMip(FfxInt32x2 uv, FfxUInt32 slice) + { + return rw_input_downsample_src_mid_mip[FfxUInt32x3(uv, slice)]; + } +#endif // defined(FFX_SPD_BIND_UAV_INPUT_DOWNSAMPLE_SRC_MID_MIPMAP) + +#if defined(FFX_SPD_BIND_UAV_INPUT_DOWNSAMPLE_SRC_MID_MIPMAP) + void StoreMidMip(FfxFloat32x4 value, FfxInt32x2 uv, FfxUInt32 slice) + { + rw_input_downsample_src_mid_mip[FfxUInt32x3(uv, slice)] = value; + } +#endif // defined(FFX_SPD_BIND_UAV_INPUT_DOWNSAMPLE_SRC_MID_MIPMAP) + +#endif // FFX_HALF + +#if defined(FFX_SPD_BIND_UAV_INTERNAL_GLOBAL_ATOMIC) +void IncreaseAtomicCounter(FFX_PARAMETER_IN FfxUInt32 slice, FFX_PARAMETER_INOUT FfxUInt32 counter) +{ + InterlockedAdd(rw_internal_global_atomic[0].counter[slice], 1, counter); +} +#endif // defined(FFX_SPD_BIND_UAV_INTERNAL_GLOBAL_ATOMIC) + +#if defined(FFX_SPD_BIND_UAV_INTERNAL_GLOBAL_ATOMIC) +void ResetAtomicCounter(FFX_PARAMETER_IN FfxUInt32 slice) +{ + rw_internal_global_atomic[0].counter[slice] = 0; +} +#endif // defined(FFX_SPD_BIND_UAV_INTERNAL_GLOBAL_ATOMIC) + +#endif // #if defined(FFX_GPU) diff --git a/thirdparty/amd-ffx/gpu/spd/ffx_spd_downsample.h b/thirdparty/amd-ffx/gpu/spd/ffx_spd_downsample.h new file mode 100644 index 000000000000..146cc7dfcb87 --- /dev/null +++ b/thirdparty/amd-ffx/gpu/spd/ffx_spd_downsample.h @@ -0,0 +1,171 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#include "../ffx_core.h" + +#if FFX_HALF + #define FFX_SPD_PACKED_ONLY 1 +#endif // FFX_HALF + +#if FFX_SPD_OPTION_LINEAR_SAMPLE + #define SPD_LINEAR_SAMPLER 1 +#endif // FFX_SPD_OPTION_LINEAR_SAMPLE + +#if FFX_SPD_OPTION_WAVE_INTEROP_LDS + #define FFX_SPD_NO_WAVE_OPERATIONS 1 +#endif // FFX_SPD_OPTION_WAVE_INTEROP_LDS + +FFX_GROUPSHARED FfxUInt32 spdCounter; + +void SpdIncreaseAtomicCounter(FfxUInt32 slice) +{ + IncreaseAtomicCounter(slice, spdCounter); +} + +FfxUInt32 SpdGetAtomicCounter() +{ + return spdCounter; +} + +void SpdResetAtomicCounter(FfxUInt32 slice) +{ + ResetAtomicCounter(slice); +} + +#if FFX_HALF + +FFX_GROUPSHARED FfxFloat16x2 spdIntermediateRG[16][16]; +FFX_GROUPSHARED FfxFloat16x2 spdIntermediateBA[16][16]; + +FfxFloat16x4 SpdLoadSourceImageH(FfxInt32x2 tex, FfxUInt32 slice) +{ +#if defined SPD_LINEAR_SAMPLER + return SampleSrcImageH(tex, slice); +#else + return LoadSrcImageH(tex, slice); +#endif // SPD_LINEAR_SAMPLER +} + +FfxFloat16x4 SpdLoadH(FfxInt32x2 p, FfxUInt32 slice) +{ + return LoadMidMipH(p, slice); +} + +void SpdStoreH(FfxInt32x2 pix, FfxFloat16x4 value, FfxUInt32 mip, FfxUInt32 slice) +{ + if (mip == 5) + StoreMidMipH(value, pix, slice); + else + StoreSrcMipH(value, pix, slice, mip + 1); +} + +FfxFloat16x4 SpdLoadIntermediateH(FfxUInt32 x, FfxUInt32 y) +{ + return FfxFloat16x4( + spdIntermediateRG[x][y].x, + spdIntermediateRG[x][y].y, + spdIntermediateBA[x][y].x, + spdIntermediateBA[x][y].y); +} + +void SpdStoreIntermediateH(FfxUInt32 x, FfxUInt32 y, FfxFloat16x4 value) +{ + spdIntermediateRG[x][y] = value.xy; + spdIntermediateBA[x][y] = value.zw; +} + +FfxFloat16x4 SpdReduce4H(FfxFloat16x4 v0, FfxFloat16x4 v1, FfxFloat16x4 v2, FfxFloat16x4 v3) +{ +#if FFX_SPD_OPTION_DOWNSAMPLE_FILTER == 1 + return min(min(v0, v1), min(v2, v3)); +#elif FFX_SPD_OPTION_DOWNSAMPLE_FILTER == 2 + return max(max(v0, v1), max(v2, v3)); +#else + return (v0 + v1 + v2 + v3) * FfxFloat16(0.25); +#endif +} + +#else + +FFX_GROUPSHARED FfxFloat32 spdIntermediateR[16][16]; +FFX_GROUPSHARED FfxFloat32 spdIntermediateG[16][16]; +FFX_GROUPSHARED FfxFloat32 spdIntermediateB[16][16]; +FFX_GROUPSHARED FfxFloat32 spdIntermediateA[16][16]; + +FfxFloat32x4 SpdLoadSourceImage(FfxInt32x2 tex, FfxUInt32 slice) +{ +#if defined SPD_LINEAR_SAMPLER + return SampleSrcImage(tex, slice); +#else + return LoadSrcImage(tex, slice); +#endif // SPD_LINEAR_SAMPLER +} + +FfxFloat32x4 SpdLoad(FfxInt32x2 tex, FfxUInt32 slice) +{ + return LoadMidMip(tex, slice); +} + +void SpdStore(FfxInt32x2 pix, FfxFloat32x4 outValue, FfxUInt32 mip, FfxUInt32 slice) +{ + if (mip == 5) + StoreMidMip(outValue, pix, slice); + else + StoreSrcMip(outValue, pix, slice, mip + 1); +} + +FfxFloat32x4 SpdLoadIntermediate(FfxUInt32 x, FfxUInt32 y) +{ + return FfxFloat32x4(spdIntermediateR[x][y], spdIntermediateG[x][y], spdIntermediateB[x][y], spdIntermediateA[x][y]); +} + +void SpdStoreIntermediate(FfxUInt32 x, FfxUInt32 y, FfxFloat32x4 value) +{ + spdIntermediateR[x][y] = value.x; + spdIntermediateG[x][y] = value.y; + spdIntermediateB[x][y] = value.z; + spdIntermediateA[x][y] = value.w; +} + +FfxFloat32x4 SpdReduce4(FfxFloat32x4 v0, FfxFloat32x4 v1, FfxFloat32x4 v2, FfxFloat32x4 v3) +{ +#if FFX_SPD_OPTION_DOWNSAMPLE_FILTER == 1 + return ffxMin(ffxMin(v0, v1), ffxMin(v2, v3)); +#elif FFX_SPD_OPTION_DOWNSAMPLE_FILTER == 2 + return ffxMax(ffxMax(v0, v1), ffxMax(v2, v3)); +#else + return (v0 + v1 + v2 + v3) * 0.25; +#endif +} + +#endif // FFX_HALF + +#include "ffx_spd.h" + +void DOWNSAMPLE(FfxUInt32 LocalThreadId, FfxUInt32x3 WorkGroupId) +{ +#if FFX_HALF + SpdDownsampleH(WorkGroupId.xy, LocalThreadId, Mips(), NumWorkGroups(), WorkGroupId.z, WorkGroupOffset()); +#else + SpdDownsample(WorkGroupId.xy, LocalThreadId, Mips(), NumWorkGroups(), WorkGroupId.z, WorkGroupOffset()); +#endif // FFX_HALF +} diff --git a/thirdparty/amd-ffx/gpu/spd/ffx_spd_resources.h b/thirdparty/amd-ffx/gpu/spd/ffx_spd_resources.h new file mode 100644 index 000000000000..a346656aa4fa --- /dev/null +++ b/thirdparty/amd-ffx/gpu/spd/ffx_spd_resources.h @@ -0,0 +1,58 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifndef FFX_SPD_RESOURCES_H +#define FFX_SPD_RESOURCES_H + +#if defined(FFX_CPU) || defined(FFX_GPU) + +// Assumes a maximum on 12 mips. If larger base resolution support is added, +// extra mip resources need to also be added +#define SPD_MAX_MIP_LEVELS 12 + +#define FFX_SPD_RESOURCE_IDENTIFIER_NULL 0 +#define FFX_SPD_RESOURCE_IDENTIFIER_INPUT_GLOBAL_ATOMIC 1 +#define FFX_SPD_RESOURCE_IDENTIFIER_INPUT_DOWNSAMPLE_SRC_MID_MIPMAP 2 +#define FFX_SPD_RESOURCE_IDENTIFIER_INPUT_DOWNSAMPLE_SRC 3 // same as FFX_SPD_RESOURCE_DOWNSAMPLE_SRC_MIPMAP_0 +#define FFX_SPD_RESOURCE_IDENTIFIER_INPUT_DOWNSAMPLE_SRC_MIPMAP_0 3 +#define FFX_SPD_RESOURCE_IDENTIFIER_INPUT_DOWNSAMPLE_SRC_MIPMAP_1 4 +#define FFX_SPD_RESOURCE_IDENTIFIER_INPUT_DOWNSAMPLE_SRC_MIPMAP_2 5 +#define FFX_SPD_RESOURCE_IDENTIFIER_INPUT_DOWNSAMPLE_SRC_MIPMAP_3 6 +#define FFX_SPD_RESOURCE_IDENTIFIER_INPUT_DOWNSAMPLE_SRC_MIPMAP_4 7 +#define FFX_SPD_RESOURCE_IDENTIFIER_INPUT_DOWNSAMPLE_SRC_MIPMAP_5 8 +#define FFX_SPD_RESOURCE_IDENTIFIER_INPUT_DOWNSAMPLE_SRC_MIPMAP_6 9 +#define FFX_SPD_RESOURCE_IDENTIFIER_INPUT_DOWNSAMPLE_SRC_MIPMAP_7 10 +#define FFX_SPD_RESOURCE_IDENTIFIER_INPUT_DOWNSAMPLE_SRC_MIPMAP_8 11 +#define FFX_SPD_RESOURCE_IDENTIFIER_INPUT_DOWNSAMPLE_SRC_MIPMAP_9 12 +#define FFX_SPD_RESOURCE_IDENTIFIER_INPUT_DOWNSAMPLE_SRC_MIPMAP_10 13 +#define FFX_SPD_RESOURCE_IDENTIFIER_INPUT_DOWNSAMPLE_SRC_MIPMAP_11 14 +#define FFX_SPD_RESOURCE_IDENTIFIER_INPUT_DOWNSAMPLE_SRC_MIPMAP_12 15 +#define FFX_SPD_RESOURCE_IDENTIFIER_INTERNAL_GLOBAL_ATOMIC 16 + +#define FFX_SPD_RESOURCE_IDENTIFIER_COUNT 17 + +// CBV resource definitions +#define FFX_SPD_CONSTANTBUFFER_IDENTIFIER_SPD 0 + +#endif // #if defined(FFX_CPU) || defined(FFX_GPU) + +#endif // FFX_SPD_RESOURCES_H diff --git a/thirdparty/amd-fsr/license.txt b/thirdparty/amd-ffx/license.md similarity index 71% rename from thirdparty/amd-fsr/license.txt rename to thirdparty/amd-ffx/license.md index 324cba594d11..03d3b5379ef7 100644 --- a/thirdparty/amd-fsr/license.txt +++ b/thirdparty/amd-ffx/license.md @@ -1,18 +1,22 @@ -Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. + + +# License + +Copyright © 2025 Advanced Micro Devices, Inc. Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal +of this software and associated documentation files(the "Software"), to deal in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +to use, copy, modify, merge, publish, distribute, sublicense, and /or sell copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: +furnished to do so, subject to the following conditions : The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN diff --git a/thirdparty/amd-ffx/shaders/frameinterpolation/ffx_frameinterpolation_compute_game_vector_field_inpainting_pyramid_pass.glsl b/thirdparty/amd-ffx/shaders/frameinterpolation/ffx_frameinterpolation_compute_game_vector_field_inpainting_pyramid_pass.glsl new file mode 100644 index 000000000000..4299bcdeaeb1 --- /dev/null +++ b/thirdparty/amd-ffx/shaders/frameinterpolation/ffx_frameinterpolation_compute_game_vector_field_inpainting_pyramid_pass.glsl @@ -0,0 +1,75 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#version 450 + +#extension GL_GOOGLE_include_directive : require +#extension GL_EXT_samplerless_texture_functions : require + +#define FFX_FRAMEINTERPOLATION_BIND_SRV_GAME_MOTION_VECTOR_FIELD_X 0 +#define FFX_FRAMEINTERPOLATION_BIND_SRV_GAME_MOTION_VECTOR_FIELD_Y 1 + +#define FFX_FRAMEINTERPOLATION_BIND_UAV_COUNTERS 2 +#define FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_0 3 +#define FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_1 4 +#define FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_2 5 +#define FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_3 6 +#define FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_4 7 +#define FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_5 8 +#define FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_6 9 +#define FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_7 10 +#define FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_8 11 +#define FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_9 12 +#define FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_10 13 +#define FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_11 14 +#define FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_12 15 + +#define FFX_FRAMEINTERPOLATION_BIND_CB_FRAMEINTERPOLATION 16 +#define FFX_FRAMEINTERPOLATION_BIND_CB_INPAINTING_PYRAMID 17 + +#ifdef FFX_HALF + #undef FFX_HALF + #define FFX_HALF 0 +#endif + +#include "../../gpu/frameinterpolation/ffx_frameinterpolation_callbacks_glsl.h" +#include "../../gpu/frameinterpolation/ffx_frameinterpolation_common.h" +#include "../../gpu/frameinterpolation/ffx_frameinterpolation_compute_game_vector_field_inpainting_pyramid.h" + +#ifndef FFX_FRAMEINTERPOLATION_THREAD_GROUP_WIDTH +#define FFX_FRAMEINTERPOLATION_THREAD_GROUP_WIDTH 256 +#endif // #ifndef FFX_FRAMEINTERPOLATION_THREAD_GROUP_WIDTH +#ifndef FFX_FRAMEINTERPOLATION_THREAD_GROUP_HEIGHT +#define FFX_FRAMEINTERPOLATION_THREAD_GROUP_HEIGHT 1 +#endif // #ifndef FFX_FRAMEINTERPOLATION_THREAD_GROUP_HEIGHT +#ifndef FFX_FRAMEINTERPOLATION_THREAD_GROUP_DEPTH +#define FFX_FRAMEINTERPOLATION_THREAD_GROUP_DEPTH 1 +#endif // #ifndef FFX_FRAMEINTERPOLATION_THREAD_GROUP_DEPTH +#ifndef FFX_FRAMEINTERPOLATION_NUM_THREADS +#define FFX_FRAMEINTERPOLATION_NUM_THREADS layout (local_size_x = FFX_FRAMEINTERPOLATION_THREAD_GROUP_WIDTH, local_size_y = FFX_FRAMEINTERPOLATION_THREAD_GROUP_HEIGHT, local_size_z = FFX_FRAMEINTERPOLATION_THREAD_GROUP_DEPTH) in; +#endif // #ifndef FFX_FRAMEINTERPOLATION_NUM_THREADS + +FFX_FRAMEINTERPOLATION_NUM_THREADS +void main() +{ + computeFrameinterpolationGameVectorFieldInpaintingPyramid(FfxInt32x3(gl_WorkGroupID), FfxInt32(gl_LocalInvocationIndex)); +} diff --git a/thirdparty/amd-ffx/shaders/frameinterpolation/ffx_frameinterpolation_compute_inpainting_pyramid_pass.glsl b/thirdparty/amd-ffx/shaders/frameinterpolation/ffx_frameinterpolation_compute_inpainting_pyramid_pass.glsl new file mode 100644 index 000000000000..880abe4aab4f --- /dev/null +++ b/thirdparty/amd-ffx/shaders/frameinterpolation/ffx_frameinterpolation_compute_inpainting_pyramid_pass.glsl @@ -0,0 +1,74 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#version 450 + +#extension GL_GOOGLE_include_directive : require +#extension GL_EXT_samplerless_texture_functions : require + +#define FFX_FRAMEINTERPOLATION_BIND_SRV_OUTPUT 0 + +#define FFX_FRAMEINTERPOLATION_BIND_UAV_COUNTERS 1 +#define FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_0 2 +#define FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_1 3 +#define FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_2 4 +#define FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_3 5 +#define FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_4 6 +#define FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_5 7 +#define FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_6 8 +#define FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_7 9 +#define FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_8 10 +#define FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_9 11 +#define FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_10 12 +#define FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_11 13 +#define FFX_FRAMEINTERPOLATION_BIND_UAV_INPAINTING_PYRAMID_MIPMAP_12 14 + +#define FFX_FRAMEINTERPOLATION_BIND_CB_FRAMEINTERPOLATION 15 +#define FFX_FRAMEINTERPOLATION_BIND_CB_INPAINTING_PYRAMID 16 + +#ifdef FFX_HALF + #undef FFX_HALF + #define FFX_HALF 0 +#endif + +#include "../../gpu/frameinterpolation/ffx_frameinterpolation_callbacks_glsl.h" +#include "../../gpu/frameinterpolation/ffx_frameinterpolation_common.h" +#include "../../gpu/frameinterpolation/ffx_frameinterpolation_compute_inpainting_pyramid.h" + +#ifndef FFX_FRAMEINTERPOLATION_THREAD_GROUP_WIDTH +#define FFX_FRAMEINTERPOLATION_THREAD_GROUP_WIDTH 256 +#endif // #ifndef FFX_FRAMEINTERPOLATION_THREAD_GROUP_WIDTH +#ifndef FFX_FRAMEINTERPOLATION_THREAD_GROUP_HEIGHT +#define FFX_FRAMEINTERPOLATION_THREAD_GROUP_HEIGHT 1 +#endif // #ifndef FFX_FRAMEINTERPOLATION_THREAD_GROUP_HEIGHT +#ifndef FFX_FRAMEINTERPOLATION_THREAD_GROUP_DEPTH +#define FFX_FRAMEINTERPOLATION_THREAD_GROUP_DEPTH 1 +#endif // #ifndef FFX_FRAMEINTERPOLATION_THREAD_GROUP_DEPTH +#ifndef FFX_FRAMEINTERPOLATION_NUM_THREADS +#define FFX_FRAMEINTERPOLATION_NUM_THREADS layout (local_size_x = FFX_FRAMEINTERPOLATION_THREAD_GROUP_WIDTH, local_size_y = FFX_FRAMEINTERPOLATION_THREAD_GROUP_HEIGHT, local_size_z = FFX_FRAMEINTERPOLATION_THREAD_GROUP_DEPTH) in; +#endif // #ifndef FFX_FRAMEINTERPOLATION_NUM_THREADS + +FFX_FRAMEINTERPOLATION_NUM_THREADS +void main() +{ + computeFrameinterpolationInpaintingPyramid(FfxInt32x3(gl_WorkGroupID), FfxInt32(gl_LocalInvocationIndex)); +} diff --git a/thirdparty/amd-ffx/shaders/frameinterpolation/ffx_frameinterpolation_debug_view_pass.glsl b/thirdparty/amd-ffx/shaders/frameinterpolation/ffx_frameinterpolation_debug_view_pass.glsl new file mode 100644 index 000000000000..6d2aa65d3371 --- /dev/null +++ b/thirdparty/amd-ffx/shaders/frameinterpolation/ffx_frameinterpolation_debug_view_pass.glsl @@ -0,0 +1,65 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#version 450 + +#extension GL_GOOGLE_include_directive : require +#extension GL_EXT_samplerless_texture_functions : require +// Needed for rw_output declaration +#extension GL_EXT_shader_image_load_formatted : require + +#define FFX_FRAMEINTERPOLATION_BIND_SRV_GAME_MOTION_VECTOR_FIELD_X 0 +#define FFX_FRAMEINTERPOLATION_BIND_SRV_GAME_MOTION_VECTOR_FIELD_Y 1 +#define FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW_MOTION_VECTOR_FIELD_X 2 +#define FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW_MOTION_VECTOR_FIELD_Y 3 +#define FFX_FRAMEINTERPOLATION_BIND_SRV_DISOCCLUSION_MASK 4 +#define FFX_FRAMEINTERPOLATION_BIND_SRV_PRESENT_BACKBUFFER 5 +#define FFX_FRAMEINTERPOLATION_BIND_SRV_INPAINTING_PYRAMID 6 +#define FFX_FRAMEINTERPOLATION_BIND_SRV_CURRENT_INTERPOLATION_SOURCE 7 +#define FFX_FRAMEINTERPOLATION_BIND_SRV_DISTORTION_FIELD 8 + +#define FFX_FRAMEINTERPOLATION_BIND_UAV_OUTPUT 9 + +#define FFX_FRAMEINTERPOLATION_BIND_CB_FRAMEINTERPOLATION 10 + +#include "../../gpu/frameinterpolation/ffx_frameinterpolation_callbacks_glsl.h" +#include "../../gpu/frameinterpolation/ffx_frameinterpolation_common.h" +#include "../../gpu/frameinterpolation/ffx_frameinterpolation_debug_view.h" + +#ifndef FFX_FRAMEINTERPOLATION_THREAD_GROUP_WIDTH +#define FFX_FRAMEINTERPOLATION_THREAD_GROUP_WIDTH 8 +#endif // #ifndef FFX_FRAMEINTERPOLATION_THREAD_GROUP_WIDTH +#ifndef FFX_FRAMEINTERPOLATION_THREAD_GROUP_HEIGHT +#define FFX_FRAMEINTERPOLATION_THREAD_GROUP_HEIGHT 8 +#endif // #ifndef FFX_FRAMEINTERPOLATION_THREAD_GROUP_HEIGHT +#ifndef FFX_FRAMEINTERPOLATION_THREAD_GROUP_DEPTH +#define FFX_FRAMEINTERPOLATION_THREAD_GROUP_DEPTH 1 +#endif // #ifndef FFX_FRAMEINTERPOLATION_THREAD_GROUP_DEPTH +#ifndef FFX_FRAMEINTERPOLATION_NUM_THREADS +#define FFX_FRAMEINTERPOLATION_NUM_THREADS layout (local_size_x = FFX_FRAMEINTERPOLATION_THREAD_GROUP_WIDTH, local_size_y = FFX_FRAMEINTERPOLATION_THREAD_GROUP_HEIGHT, local_size_z = FFX_FRAMEINTERPOLATION_THREAD_GROUP_DEPTH) in; +#endif // #ifndef FFX_FRAMEINTERPOLATION_NUM_THREADS + +FFX_FRAMEINTERPOLATION_NUM_THREADS +void main() +{ + computeDebugView(FfxInt32x2(gl_GlobalInvocationID.xy)); +} diff --git a/thirdparty/amd-ffx/shaders/frameinterpolation/ffx_frameinterpolation_disocclusion_mask_pass.glsl b/thirdparty/amd-ffx/shaders/frameinterpolation/ffx_frameinterpolation_disocclusion_mask_pass.glsl new file mode 100644 index 000000000000..7661c6a7a285 --- /dev/null +++ b/thirdparty/amd-ffx/shaders/frameinterpolation/ffx_frameinterpolation_disocclusion_mask_pass.glsl @@ -0,0 +1,61 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#version 450 + +#extension GL_GOOGLE_include_directive : require +#extension GL_EXT_samplerless_texture_functions : require + +#define FFX_FRAMEINTERPOLATION_BIND_SRV_GAME_MOTION_VECTOR_FIELD_X 0 +#define FFX_FRAMEINTERPOLATION_BIND_SRV_GAME_MOTION_VECTOR_FIELD_Y 1 +#define FFX_FRAMEINTERPOLATION_BIND_SRV_RECONSTRUCTED_DEPTH_PREVIOUS_FRAME 2 +#define FFX_FRAMEINTERPOLATION_BIND_SRV_DILATED_DEPTH 3 +#define FFX_FRAMEINTERPOLATION_BIND_SRV_RECONSTRUCTED_DEPTH_INTERPOLATED_FRAME 4 +#define FFX_FRAMEINTERPOLATION_BIND_SRV_INPAINTING_PYRAMID 5 +#define FFX_FRAMEINTERPOLATION_BIND_SRV_DISTORTION_FIELD 6 + +#define FFX_FRAMEINTERPOLATION_BIND_UAV_DISOCCLUSION_MASK 7 + +#define FFX_FRAMEINTERPOLATION_BIND_CB_FRAMEINTERPOLATION 8 + +#include "../../gpu/frameinterpolation/ffx_frameinterpolation_callbacks_glsl.h" +#include "../../gpu/frameinterpolation/ffx_frameinterpolation_common.h" +#include "../../gpu/frameinterpolation/ffx_frameinterpolation_disocclusion_mask.h" + +#ifndef FFX_FRAMEINTERPOLATION_THREAD_GROUP_WIDTH +#define FFX_FRAMEINTERPOLATION_THREAD_GROUP_WIDTH 8 +#endif // #ifndef FFX_FRAMEINTERPOLATION_THREAD_GROUP_WIDTH +#ifndef FFX_FRAMEINTERPOLATION_THREAD_GROUP_HEIGHT +#define FFX_FRAMEINTERPOLATION_THREAD_GROUP_HEIGHT 8 +#endif // #ifndef FFX_FRAMEINTERPOLATION_THREAD_GROUP_HEIGHT +#ifndef FFX_FRAMEINTERPOLATION_THREAD_GROUP_DEPTH +#define FFX_FRAMEINTERPOLATION_THREAD_GROUP_DEPTH 1 +#endif // #ifndef FFX_FRAMEINTERPOLATION_THREAD_GROUP_DEPTH +#ifndef FFX_FRAMEINTERPOLATION_NUM_THREADS +#define FFX_FRAMEINTERPOLATION_NUM_THREADS layout (local_size_x = FFX_FRAMEINTERPOLATION_THREAD_GROUP_WIDTH, local_size_y = FFX_FRAMEINTERPOLATION_THREAD_GROUP_HEIGHT, local_size_z = FFX_FRAMEINTERPOLATION_THREAD_GROUP_DEPTH) in; +#endif // #ifndef FFX_FRAMEINTERPOLATION_NUM_THREADS + +FFX_FRAMEINTERPOLATION_NUM_THREADS +void main() +{ + computeDisocclusionMask(FfxInt32x2(gl_GlobalInvocationID.xy)); +} diff --git a/thirdparty/amd-ffx/shaders/frameinterpolation/ffx_frameinterpolation_game_motion_vector_field_pass.glsl b/thirdparty/amd-ffx/shaders/frameinterpolation/ffx_frameinterpolation_game_motion_vector_field_pass.glsl new file mode 100644 index 000000000000..15c082fef2be --- /dev/null +++ b/thirdparty/amd-ffx/shaders/frameinterpolation/ffx_frameinterpolation_game_motion_vector_field_pass.glsl @@ -0,0 +1,60 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#version 450 + +#extension GL_GOOGLE_include_directive : require +#extension GL_EXT_samplerless_texture_functions : require + +#define FFX_FRAMEINTERPOLATION_BIND_SRV_DILATED_MOTION_VECTORS 0 +#define FFX_FRAMEINTERPOLATION_BIND_SRV_DILATED_DEPTH 1 +#define FFX_FRAMEINTERPOLATION_BIND_SRV_PREVIOUS_INTERPOLATION_SOURCE 2 +#define FFX_FRAMEINTERPOLATION_BIND_SRV_CURRENT_INTERPOLATION_SOURCE 3 +#define FFX_FRAMEINTERPOLATION_BIND_SRV_DISTORTION_FIELD 4 + +#define FFX_FRAMEINTERPOLATION_BIND_UAV_GAME_MOTION_VECTOR_FIELD_X 5 +#define FFX_FRAMEINTERPOLATION_BIND_UAV_GAME_MOTION_VECTOR_FIELD_Y 6 + +#define FFX_FRAMEINTERPOLATION_BIND_CB_FRAMEINTERPOLATION 7 + +#include "../../gpu/frameinterpolation/ffx_frameinterpolation_callbacks_glsl.h" +#include "../../gpu/frameinterpolation/ffx_frameinterpolation_common.h" +#include "../../gpu/frameinterpolation/ffx_frameinterpolation_game_motion_vector_field.h" + +#ifndef FFX_FRAMEINTERPOLATION_THREAD_GROUP_WIDTH +#define FFX_FRAMEINTERPOLATION_THREAD_GROUP_WIDTH 8 +#endif // #ifndef FFX_FRAMEINTERPOLATION_THREAD_GROUP_WIDTH +#ifndef FFX_FRAMEINTERPOLATION_THREAD_GROUP_HEIGHT +#define FFX_FRAMEINTERPOLATION_THREAD_GROUP_HEIGHT 8 +#endif // #ifndef FFX_FRAMEINTERPOLATION_THREAD_GROUP_HEIGHT +#ifndef FFX_FRAMEINTERPOLATION_THREAD_GROUP_DEPTH +#define FFX_FRAMEINTERPOLATION_THREAD_GROUP_DEPTH 1 +#endif // #ifndef FFX_FRAMEINTERPOLATION_THREAD_GROUP_DEPTH +#ifndef FFX_FRAMEINTERPOLATION_NUM_THREADS +#define FFX_FRAMEINTERPOLATION_NUM_THREADS layout (local_size_x = FFX_FRAMEINTERPOLATION_THREAD_GROUP_WIDTH, local_size_y = FFX_FRAMEINTERPOLATION_THREAD_GROUP_HEIGHT, local_size_z = FFX_FRAMEINTERPOLATION_THREAD_GROUP_DEPTH) in; +#endif // #ifndef FFX_FRAMEINTERPOLATION_NUM_THREADS + +FFX_FRAMEINTERPOLATION_NUM_THREADS +void main() +{ + computeGameFieldMvs(FfxInt32x2(gl_GlobalInvocationID.xy)); +} diff --git a/thirdparty/amd-ffx/shaders/frameinterpolation/ffx_frameinterpolation_inpainting_pass.glsl b/thirdparty/amd-ffx/shaders/frameinterpolation/ffx_frameinterpolation_inpainting_pass.glsl new file mode 100644 index 000000000000..d034d6e80029 --- /dev/null +++ b/thirdparty/amd-ffx/shaders/frameinterpolation/ffx_frameinterpolation_inpainting_pass.glsl @@ -0,0 +1,60 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#version 450 + +#extension GL_GOOGLE_include_directive : require +#extension GL_EXT_samplerless_texture_functions : require +// Needed for rw_output declaration +#extension GL_EXT_shader_image_load_formatted : require + +#define FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW_SCENE_CHANGE_DETECTION 0 +#define FFX_FRAMEINTERPOLATION_BIND_SRV_INPAINTING_PYRAMID 1 +#define FFX_FRAMEINTERPOLATION_BIND_SRV_PRESENT_BACKBUFFER 2 +#define FFX_FRAMEINTERPOLATION_BIND_SRV_CURRENT_INTERPOLATION_SOURCE 3 + +#define FFX_FRAMEINTERPOLATION_BIND_UAV_OUTPUT 7 + +#define FFX_FRAMEINTERPOLATION_BIND_CB_FRAMEINTERPOLATION 8 + +#include "../../gpu/frameinterpolation/ffx_frameinterpolation_callbacks_glsl.h" +#include "../../gpu/frameinterpolation/ffx_frameinterpolation_common.h" +#include "../../gpu/frameinterpolation/ffx_frameinterpolation_inpainting.h" + +#ifndef FFX_FRAMEINTERPOLATION_THREAD_GROUP_WIDTH +#define FFX_FRAMEINTERPOLATION_THREAD_GROUP_WIDTH 8 +#endif // #ifndef FFX_FRAMEINTERPOLATION_THREAD_GROUP_WIDTH +#ifndef FFX_FRAMEINTERPOLATION_THREAD_GROUP_HEIGHT +#define FFX_FRAMEINTERPOLATION_THREAD_GROUP_HEIGHT 8 +#endif // #ifndef FFX_FRAMEINTERPOLATION_THREAD_GROUP_HEIGHT +#ifndef FFX_FRAMEINTERPOLATION_THREAD_GROUP_DEPTH +#define FFX_FRAMEINTERPOLATION_THREAD_GROUP_DEPTH 1 +#endif // #ifndef FFX_FRAMEINTERPOLATION_THREAD_GROUP_DEPTH +#ifndef FFX_FRAMEINTERPOLATION_NUM_THREADS +#define FFX_FRAMEINTERPOLATION_NUM_THREADS layout (local_size_x = FFX_FRAMEINTERPOLATION_THREAD_GROUP_WIDTH, local_size_y = FFX_FRAMEINTERPOLATION_THREAD_GROUP_HEIGHT, local_size_z = FFX_FRAMEINTERPOLATION_THREAD_GROUP_DEPTH) in; +#endif // #ifndef FFX_FRAMEINTERPOLATION_NUM_THREADS + +FFX_FRAMEINTERPOLATION_NUM_THREADS +void main() +{ + computeInpainting(FfxInt32x2(gl_GlobalInvocationID.xy)); +} diff --git a/thirdparty/amd-ffx/shaders/frameinterpolation/ffx_frameinterpolation_optical_flow_vector_field_pass.glsl b/thirdparty/amd-ffx/shaders/frameinterpolation/ffx_frameinterpolation_optical_flow_vector_field_pass.glsl new file mode 100644 index 000000000000..f9cf49ee1e43 --- /dev/null +++ b/thirdparty/amd-ffx/shaders/frameinterpolation/ffx_frameinterpolation_optical_flow_vector_field_pass.glsl @@ -0,0 +1,60 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#version 450 + +#extension GL_GOOGLE_include_directive : require +#extension GL_EXT_samplerless_texture_functions : require + +#define FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW 0 +#define FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW_CONFIDENCE 1 +#define FFX_FRAMEINTERPOLATION_BIND_SRV_DILATED_DEPTH 2 +#define FFX_FRAMEINTERPOLATION_BIND_SRV_PREVIOUS_INTERPOLATION_SOURCE 3 +#define FFX_FRAMEINTERPOLATION_BIND_SRV_CURRENT_INTERPOLATION_SOURCE 4 + +#define FFX_FRAMEINTERPOLATION_BIND_UAV_OPTICAL_FLOW_MOTION_VECTOR_FIELD_X 6 +#define FFX_FRAMEINTERPOLATION_BIND_UAV_OPTICAL_FLOW_MOTION_VECTOR_FIELD_Y 7 + +#define FFX_FRAMEINTERPOLATION_BIND_CB_FRAMEINTERPOLATION 8 + +#include "../../gpu/frameinterpolation/ffx_frameinterpolation_callbacks_glsl.h" +#include "../../gpu/frameinterpolation/ffx_frameinterpolation_common.h" +#include "../../gpu/frameinterpolation/ffx_frameinterpolation_optical_flow_vector_field.h" + +#ifndef FFX_FRAMEINTERPOLATION_THREAD_GROUP_WIDTH +#define FFX_FRAMEINTERPOLATION_THREAD_GROUP_WIDTH 8 +#endif // #ifndef FFX_FRAMEINTERPOLATION_THREAD_GROUP_WIDTH +#ifndef FFX_FRAMEINTERPOLATION_THREAD_GROUP_HEIGHT +#define FFX_FRAMEINTERPOLATION_THREAD_GROUP_HEIGHT 8 +#endif // #ifndef FFX_FRAMEINTERPOLATION_THREAD_GROUP_HEIGHT +#ifndef FFX_FRAMEINTERPOLATION_THREAD_GROUP_DEPTH +#define FFX_FRAMEINTERPOLATION_THREAD_GROUP_DEPTH 1 +#endif // #ifndef FFX_FRAMEINTERPOLATION_THREAD_GROUP_DEPTH +#ifndef FFX_FRAMEINTERPOLATION_NUM_THREADS +#define FFX_FRAMEINTERPOLATION_NUM_THREADS layout (local_size_x = FFX_FRAMEINTERPOLATION_THREAD_GROUP_WIDTH, local_size_y = FFX_FRAMEINTERPOLATION_THREAD_GROUP_HEIGHT, local_size_z = FFX_FRAMEINTERPOLATION_THREAD_GROUP_DEPTH) in; +#endif // #ifndef FFX_FRAMEINTERPOLATION_NUM_THREADS + +FFX_FRAMEINTERPOLATION_NUM_THREADS +void main() +{ + computeOpticalFlowVectorField(FfxInt32x2(gl_GlobalInvocationID.xy)); +} diff --git a/thirdparty/amd-ffx/shaders/frameinterpolation/ffx_frameinterpolation_pass.glsl b/thirdparty/amd-ffx/shaders/frameinterpolation/ffx_frameinterpolation_pass.glsl new file mode 100644 index 000000000000..b1a794832c54 --- /dev/null +++ b/thirdparty/amd-ffx/shaders/frameinterpolation/ffx_frameinterpolation_pass.glsl @@ -0,0 +1,65 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#version 450 + +#extension GL_GOOGLE_include_directive : require +#extension GL_EXT_samplerless_texture_functions : require +// Needed for rw_output declaration +#extension GL_EXT_shader_image_load_formatted : require + +#define FFX_FRAMEINTERPOLATION_BIND_SRV_GAME_MOTION_VECTOR_FIELD_X 0 +#define FFX_FRAMEINTERPOLATION_BIND_SRV_GAME_MOTION_VECTOR_FIELD_Y 1 +#define FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW_MOTION_VECTOR_FIELD_X 2 +#define FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW_MOTION_VECTOR_FIELD_Y 3 +#define FFX_FRAMEINTERPOLATION_BIND_SRV_PREVIOUS_INTERPOLATION_SOURCE 4 +#define FFX_FRAMEINTERPOLATION_BIND_SRV_CURRENT_INTERPOLATION_SOURCE 5 +#define FFX_FRAMEINTERPOLATION_BIND_SRV_DISOCCLUSION_MASK 6 +#define FFX_FRAMEINTERPOLATION_BIND_SRV_INPAINTING_PYRAMID 7 +#define FFX_FRAMEINTERPOLATION_BIND_SRV_COUNTERS 8 + +#define FFX_FRAMEINTERPOLATION_BIND_UAV_OUTPUT 9 + +#define FFX_FRAMEINTERPOLATION_BIND_CB_FRAMEINTERPOLATION 10 + +#include "../../gpu/frameinterpolation/ffx_frameinterpolation_callbacks_glsl.h" +#include "../../gpu/frameinterpolation/ffx_frameinterpolation_common.h" +#include "../../gpu/frameinterpolation/ffx_frameinterpolation.h" + +#ifndef FFX_FRAMEINTERPOLATION_THREAD_GROUP_WIDTH +#define FFX_FRAMEINTERPOLATION_THREAD_GROUP_WIDTH 8 +#endif // #ifndef FFX_FRAMEINTERPOLATION_THREAD_GROUP_WIDTH +#ifndef FFX_FRAMEINTERPOLATION_THREAD_GROUP_HEIGHT +#define FFX_FRAMEINTERPOLATION_THREAD_GROUP_HEIGHT 8 +#endif // #ifndef FFX_FRAMEINTERPOLATION_THREAD_GROUP_HEIGHT +#ifndef FFX_FRAMEINTERPOLATION_THREAD_GROUP_DEPTH +#define FFX_FRAMEINTERPOLATION_THREAD_GROUP_DEPTH 1 +#endif // #ifndef FFX_FRAMEINTERPOLATION_THREAD_GROUP_DEPTH +#ifndef FFX_FRAMEINTERPOLATION_NUM_THREADS +#define FFX_FRAMEINTERPOLATION_NUM_THREADS layout (local_size_x = FFX_FRAMEINTERPOLATION_THREAD_GROUP_WIDTH, local_size_y = FFX_FRAMEINTERPOLATION_THREAD_GROUP_HEIGHT, local_size_z = FFX_FRAMEINTERPOLATION_THREAD_GROUP_DEPTH) in; +#endif // #ifndef FFX_FRAMEINTERPOLATION_NUM_THREADS + +FFX_FRAMEINTERPOLATION_NUM_THREADS +void main() +{ + computeFrameinterpolation(ivec2(gl_GlobalInvocationID.xy)); +} diff --git a/thirdparty/amd-ffx/shaders/frameinterpolation/ffx_frameinterpolation_reconstruct_and_dilate_pass.glsl b/thirdparty/amd-ffx/shaders/frameinterpolation/ffx_frameinterpolation_reconstruct_and_dilate_pass.glsl new file mode 100644 index 000000000000..8e9d5a9eeb2e --- /dev/null +++ b/thirdparty/amd-ffx/shaders/frameinterpolation/ffx_frameinterpolation_reconstruct_and_dilate_pass.glsl @@ -0,0 +1,58 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#version 450 + +#extension GL_GOOGLE_include_directive : require +#extension GL_EXT_samplerless_texture_functions : require + +#define FFX_FRAMEINTERPOLATION_BIND_SRV_INPUT_MOTION_VECTORS 0 +#define FFX_FRAMEINTERPOLATION_BIND_SRV_INPUT_DEPTH 1 + +#define FFX_FRAMEINTERPOLATION_BIND_UAV_RECONSTRUCTED_DEPTH_PREVIOUS_FRAME 2 +#define FFX_FRAMEINTERPOLATION_BIND_UAV_DILATED_MOTION_VECTORS 3 +#define FFX_FRAMEINTERPOLATION_BIND_UAV_DILATED_DEPTH 4 + +#define FFX_FRAMEINTERPOLATION_BIND_CB_FRAMEINTERPOLATION 5 + +#include "../../gpu/frameinterpolation/ffx_frameinterpolation_callbacks_glsl.h" +#include "../../gpu/frameinterpolation/ffx_frameinterpolation_common.h" +#include "../../gpu/frameinterpolation/ffx_frameinterpolation_reconstruct_dilated_velocity_and_previous_depth.h" + +#ifndef FFX_FRAMEINTERPOLATION_THREAD_GROUP_WIDTH +#define FFX_FRAMEINTERPOLATION_THREAD_GROUP_WIDTH 8 +#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH +#ifndef FFX_FRAMEINTERPOLATION_THREAD_GROUP_HEIGHT +#define FFX_FRAMEINTERPOLATION_THREAD_GROUP_HEIGHT 8 +#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT +#ifndef FFX_FRAMEINTERPOLATION_THREAD_GROUP_DEPTH +#define FFX_FRAMEINTERPOLATION_THREAD_GROUP_DEPTH 1 +#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH +#ifndef FFX_FRAMEINTERPOLATION_NUM_THREADS +#define FFX_FRAMEINTERPOLATION_NUM_THREADS layout (local_size_x = FFX_FRAMEINTERPOLATION_THREAD_GROUP_WIDTH, local_size_y = FFX_FRAMEINTERPOLATION_THREAD_GROUP_HEIGHT, local_size_z = FFX_FRAMEINTERPOLATION_THREAD_GROUP_DEPTH) in; +#endif // #ifndef FFX_FSR3UPSCALER_NUM_THREADS + +FFX_FRAMEINTERPOLATION_NUM_THREADS +void main() +{ + ReconstructAndDilate(FfxInt32x2(gl_GlobalInvocationID.xy)); +} diff --git a/thirdparty/amd-ffx/shaders/frameinterpolation/ffx_frameinterpolation_reconstruct_previous_depth_pass.glsl b/thirdparty/amd-ffx/shaders/frameinterpolation/ffx_frameinterpolation_reconstruct_previous_depth_pass.glsl new file mode 100644 index 000000000000..9fe1681bc2c0 --- /dev/null +++ b/thirdparty/amd-ffx/shaders/frameinterpolation/ffx_frameinterpolation_reconstruct_previous_depth_pass.glsl @@ -0,0 +1,58 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#version 450 + +#extension GL_GOOGLE_include_directive : require +#extension GL_EXT_samplerless_texture_functions : require + +#define FFX_FRAMEINTERPOLATION_BIND_SRV_DILATED_MOTION_VECTORS 0 +#define FFX_FRAMEINTERPOLATION_BIND_SRV_DILATED_DEPTH 1 +#define FFX_FRAMEINTERPOLATION_BIND_SRV_CURRENT_INTERPOLATION_SOURCE 2 +#define FFX_FRAMEINTERPOLATION_BIND_SRV_DISTORTION_FIELD 3 + +#define FFX_FRAMEINTERPOLATION_BIND_UAV_RECONSTRUCTED_DEPTH_INTERPOLATED_FRAME 4 + +#define FFX_FRAMEINTERPOLATION_BIND_CB_FRAMEINTERPOLATION 5 + +#include "../../gpu/frameinterpolation/ffx_frameinterpolation_callbacks_glsl.h" +#include "../../gpu/frameinterpolation/ffx_frameinterpolation_common.h" +#include "../../gpu/frameinterpolation/ffx_frameinterpolation_reconstruct_previous_depth.h" + +#ifndef FFX_FRAMEINTERPOLATION_THREAD_GROUP_WIDTH +#define FFX_FRAMEINTERPOLATION_THREAD_GROUP_WIDTH 8 +#endif // #ifndef FFX_FRAMEINTERPOLATION_THREAD_GROUP_WIDTH +#ifndef FFX_FRAMEINTERPOLATION_THREAD_GROUP_HEIGHT +#define FFX_FRAMEINTERPOLATION_THREAD_GROUP_HEIGHT 8 +#endif // #ifndef FFX_FRAMEINTERPOLATION_THREAD_GROUP_HEIGHT +#ifndef FFX_FRAMEINTERPOLATION_THREAD_GROUP_DEPTH +#define FFX_FRAMEINTERPOLATION_THREAD_GROUP_DEPTH 1 +#endif // #ifndef FFX_FRAMEINTERPOLATION_THREAD_GROUP_DEPTH +#ifndef FFX_FRAMEINTERPOLATION_NUM_THREADS +#define FFX_FRAMEINTERPOLATION_NUM_THREADS layout (local_size_x = FFX_FRAMEINTERPOLATION_THREAD_GROUP_WIDTH, local_size_y = FFX_FRAMEINTERPOLATION_THREAD_GROUP_HEIGHT, local_size_z = FFX_FRAMEINTERPOLATION_THREAD_GROUP_DEPTH) in; +#endif // #ifndef FFX_FRAMEINTERPOLATION_NUM_THREADS + +FFX_FRAMEINTERPOLATION_NUM_THREADS +void main() +{ + reconstructPreviousDepth(FfxInt32x2(gl_GlobalInvocationID.xy)); +} diff --git a/thirdparty/amd-ffx/shaders/frameinterpolation/ffx_frameinterpolation_setup_pass.glsl b/thirdparty/amd-ffx/shaders/frameinterpolation/ffx_frameinterpolation_setup_pass.glsl new file mode 100644 index 000000000000..32168a33f888 --- /dev/null +++ b/thirdparty/amd-ffx/shaders/frameinterpolation/ffx_frameinterpolation_setup_pass.glsl @@ -0,0 +1,60 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#version 450 + +#extension GL_GOOGLE_include_directive : require +#extension GL_EXT_samplerless_texture_functions : require + +#define FFX_FRAMEINTERPOLATION_BIND_SRV_OPTICAL_FLOW_SCENE_CHANGE_DETECTION 0 + +#define FFX_FRAMEINTERPOLATION_BIND_UAV_GAME_MOTION_VECTOR_FIELD_X 1 +#define FFX_FRAMEINTERPOLATION_BIND_UAV_GAME_MOTION_VECTOR_FIELD_Y 2 +#define FFX_FRAMEINTERPOLATION_BIND_UAV_OPTICAL_FLOW_MOTION_VECTOR_FIELD_X 3 +#define FFX_FRAMEINTERPOLATION_BIND_UAV_OPTICAL_FLOW_MOTION_VECTOR_FIELD_Y 4 +#define FFX_FRAMEINTERPOLATION_BIND_UAV_DISOCCLUSION_MASK 5 +#define FFX_FRAMEINTERPOLATION_BIND_UAV_COUNTERS 6 + +#define FFX_FRAMEINTERPOLATION_BIND_CB_FRAMEINTERPOLATION 7 + +#include "../../gpu/frameinterpolation/ffx_frameinterpolation_callbacks_glsl.h" +#include "../../gpu/frameinterpolation/ffx_frameinterpolation_common.h" +#include "../../gpu/frameinterpolation/ffx_frameinterpolation_setup.h" + +#ifndef FFX_FRAMEINTERPOLATION_THREAD_GROUP_WIDTH +#define FFX_FRAMEINTERPOLATION_THREAD_GROUP_WIDTH 8 +#endif // #ifndef FFX_FRAMEINTERPOLATION_THREAD_GROUP_WIDTH +#ifndef FFX_FRAMEINTERPOLATION_THREAD_GROUP_HEIGHT +#define FFX_FRAMEINTERPOLATION_THREAD_GROUP_HEIGHT 8 +#endif // #ifndef FFX_FRAMEINTERPOLATION_THREAD_GROUP_HEIGHT +#ifndef FFX_FRAMEINTERPOLATION_THREAD_GROUP_DEPTH +#define FFX_FRAMEINTERPOLATION_THREAD_GROUP_DEPTH 1 +#endif // #ifndef FFX_FRAMEINTERPOLATION_THREAD_GROUP_DEPTH +#ifndef FFX_FRAMEINTERPOLATION_NUM_THREADS +#define FFX_FRAMEINTERPOLATION_NUM_THREADS layout (local_size_x = FFX_FRAMEINTERPOLATION_THREAD_GROUP_WIDTH, local_size_y = FFX_FRAMEINTERPOLATION_THREAD_GROUP_HEIGHT, local_size_z = FFX_FRAMEINTERPOLATION_THREAD_GROUP_DEPTH) in; +#endif // #ifndef FFX_FRAMEINTERPOLATION_NUM_THREADS + +FFX_FRAMEINTERPOLATION_NUM_THREADS +void main() +{ + setupFrameinterpolationResources(FfxInt32x2(gl_GlobalInvocationID.xy)); +} diff --git a/thirdparty/amd-ffx/shaders/fsr1/ffx_fsr1_easu_pass.glsl b/thirdparty/amd-ffx/shaders/fsr1/ffx_fsr1_easu_pass.glsl new file mode 100644 index 000000000000..af27934f4f5b --- /dev/null +++ b/thirdparty/amd-ffx/shaders/fsr1/ffx_fsr1_easu_pass.glsl @@ -0,0 +1,63 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +// FSR1 pass 1 +// SRV 0 : FSR1_InputColor : r_input_color +// UAV 0 : FSR1_InternalUpscaled : rw_internal_upscaled_color (if doing RCAS) +// UAV 1 : FSR1_UpscaledOutput : rw_upscaled_output (if not) +// CB 0 : cbFSR1 + +//#version 450 + +#extension GL_GOOGLE_include_directive : require +#extension GL_EXT_samplerless_texture_functions : require + +#define FSR1_BIND_SRV_INPUT_COLOR 0 +// GODOT BEGINS +// Godot DX12 backend doesn't support binding numbers larger than 1000, so we have to remap them. +#define FSR1_BIND_UAV_INTERNAL_UPSCALED_COLOR 1 //2000 +#define FSR1_BIND_UAV_UPSCALED_OUTPUT 2 //2001 + +#define FSR1_BIND_CB_FSR1 3 //3000 +// GODOT ENDS + +#include "../../gpu/fsr1/ffx_fsr1_callbacks_glsl.h" +#include "../../gpu/fsr1/ffx_fsr1_easu.h" + +#ifndef FFX_FSR1_THREAD_GROUP_WIDTH +#define FFX_FSR1_THREAD_GROUP_WIDTH 64 +#endif // #ifndef FFX_FSR1_THREAD_GROUP_WIDTH +#ifndef FFX_FSR1_THREAD_GROUP_HEIGHT +#define FFX_FSR1_THREAD_GROUP_HEIGHT 1 +#endif // FFX_FSR1_THREAD_GROUP_HEIGHT +#ifndef FFX_FSR1_THREAD_GROUP_DEPTH +#define FFX_FSR1_THREAD_GROUP_DEPTH 1 +#endif // #ifndef FFX_FSR1_THREAD_GROUP_DEPTH +#ifndef FFX_FSR1_NUM_THREADS +#define FFX_FSR1_NUM_THREADS layout (local_size_x = FFX_FSR1_THREAD_GROUP_WIDTH, local_size_y = FFX_FSR1_THREAD_GROUP_HEIGHT, local_size_z = FFX_FSR1_THREAD_GROUP_DEPTH) in; +#endif // #ifndef FFX_FSR1_NUM_THREADS + +FFX_FSR1_NUM_THREADS +void main() +{ + EASU(gl_LocalInvocationID.xyz, gl_WorkGroupID.xyz, gl_GlobalInvocationID.xyz); +} diff --git a/thirdparty/amd-ffx/shaders/fsr1/ffx_fsr1_rcas_pass.glsl b/thirdparty/amd-ffx/shaders/fsr1/ffx_fsr1_rcas_pass.glsl new file mode 100644 index 000000000000..3047caf9fe73 --- /dev/null +++ b/thirdparty/amd-ffx/shaders/fsr1/ffx_fsr1_rcas_pass.glsl @@ -0,0 +1,61 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +// FSR1 pass 2 (optional RCAS) +// SRV 0 : FSR1_InternalUpscaled : r_internal_upscaled_color +// UAV 0 : FSR1_UpscaledOutput : rw_upscaled_output +// CB 0 : cbFSR1 + +//#version 450 + +#extension GL_GOOGLE_include_directive : require +#extension GL_EXT_samplerless_texture_functions : require + +#define FSR1_BIND_SRV_INTERNAL_UPSCALED_COLOR 0 +// GODOT BEGINS +// Godot DX12 backend doesn't support binding numbers larger than 1000, so we have to remap them. +#define FSR1_BIND_UAV_UPSCALED_OUTPUT 1 //2000 + +#define FSR1_BIND_CB_FSR1 2 //3000 +// GODOT ENDS + +#include "../../gpu/fsr1/ffx_fsr1_callbacks_glsl.h" +#include "../../gpu/fsr1/ffx_fsr1_rcas.h" + +#ifndef FFX_FSR1_THREAD_GROUP_WIDTH +#define FFX_FSR1_THREAD_GROUP_WIDTH 64 +#endif // #ifndef FFX_FSR1_THREAD_GROUP_WIDTH +#ifndef FFX_FSR1_THREAD_GROUP_HEIGHT +#define FFX_FSR1_THREAD_GROUP_HEIGHT 1 +#endif // FFX_FSR1_THREAD_GROUP_HEIGHT +#ifndef FFX_FSR1_THREAD_GROUP_DEPTH +#define FFX_FSR1_THREAD_GROUP_DEPTH 1 +#endif // #ifndef FFX_FSR1_THREAD_GROUP_DEPTH +#ifndef FFX_FSR1_NUM_THREADS +#define FFX_FSR1_NUM_THREADS layout (local_size_x = FFX_FSR1_THREAD_GROUP_WIDTH, local_size_y = FFX_FSR1_THREAD_GROUP_HEIGHT, local_size_z = FFX_FSR1_THREAD_GROUP_DEPTH) in; +#endif // #ifndef FFX_FSR1_NUM_THREADS + +FFX_FSR1_NUM_THREADS +void main() +{ + RCAS(gl_LocalInvocationID.xyz, gl_WorkGroupID.xyz, gl_GlobalInvocationID.xyz); +} diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_accumulate_pass.glsl b/thirdparty/amd-ffx/shaders/fsr2/ffx_fsr2_accumulate_pass.glsl similarity index 73% rename from thirdparty/amd-fsr2/shaders/ffx_fsr2_accumulate_pass.glsl rename to thirdparty/amd-ffx/shaders/fsr2/ffx_fsr2_accumulate_pass.glsl index 6b67691ec7c4..5a82771c2ab7 100644 --- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_accumulate_pass.glsl +++ b/thirdparty/amd-ffx/shaders/fsr2/ffx_fsr2_accumulate_pass.glsl @@ -1,13 +1,14 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (C) 2024 Advanced Micro Devices, Inc. // // Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal +// of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell // copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: +// furnished to do so, subject to the following conditions : +// // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // @@ -35,7 +36,10 @@ #endif #define FSR2_BIND_SRV_INTERNAL_UPSCALED 3 #define FSR2_BIND_SRV_LOCK_STATUS 4 +// GODOT BEGINS +// See `fsr2.cpp` for reason of commenting this out //#define FSR2_BIND_SRV_INPUT_DEPTH_CLIP 5 +// GODOT ENDS #define FSR2_BIND_SRV_PREPARED_INPUT_COLOR 6 #define FSR2_BIND_SRV_LUMA_INSTABILITY 7 #define FSR2_BIND_SRV_LANCZOS_LUT 8 @@ -44,25 +48,28 @@ #define FSR2_BIND_SRV_AUTO_EXPOSURE 11 #define FSR2_BIND_SRV_LUMA_HISTORY 12 -#define FSR2_BIND_UAV_INTERNAL_UPSCALED 13 -#define FSR2_BIND_UAV_LOCK_STATUS 14 -#define FSR2_BIND_UAV_UPSCALED_OUTPUT 15 -#define FSR2_BIND_UAV_NEW_LOCKS 16 -#define FSR2_BIND_UAV_LUMA_HISTORY 17 - -#define FSR2_BIND_CB_FSR2 18 - #if FFX_FSR2_OPTION_GODOT_DERIVE_INVALID_MOTION_VECTORS #define FSR2_BIND_SRV_INPUT_DEPTH 5 #endif -#include "ffx_fsr2_callbacks_glsl.h" -#include "ffx_fsr2_common.h" -#include "ffx_fsr2_sample.h" -#include "ffx_fsr2_upsample.h" -#include "ffx_fsr2_postprocess_lock_status.h" -#include "ffx_fsr2_reproject.h" -#include "ffx_fsr2_accumulate.h" +// GODOT BEGINS +// Godot DX12 backend doesn't support binding numbers larger than 1000, so we have to remap them. +#define FSR2_BIND_UAV_INTERNAL_UPSCALED 13 //2013 +#define FSR2_BIND_UAV_LOCK_STATUS 14 //2014 +#define FSR2_BIND_UAV_UPSCALED_OUTPUT 15 //2015 +#define FSR2_BIND_UAV_NEW_LOCKS 16 //2016 +#define FSR2_BIND_UAV_LUMA_HISTORY 17 //2017 + +#define FSR2_BIND_CB_FSR2 18 //3000 +// GODOT ENDS + +#include "../../gpu/fsr2/ffx_fsr2_callbacks_glsl.h" +#include "../../gpu/fsr2/ffx_fsr2_common.h" +#include "../../gpu/fsr2/ffx_fsr2_sample.h" +#include "../../gpu/fsr2/ffx_fsr2_upsample.h" +#include "../../gpu/fsr2/ffx_fsr2_postprocess_lock_status.h" +#include "../../gpu/fsr2/ffx_fsr2_reproject.h" +#include "../../gpu/fsr2/ffx_fsr2_accumulate.h" #ifndef FFX_FSR2_THREAD_GROUP_WIDTH #define FFX_FSR2_THREAD_GROUP_WIDTH 8 @@ -88,4 +95,4 @@ void main() uvec2 uDispatchThreadId = uGroupId * uvec2(FFX_FSR2_THREAD_GROUP_WIDTH, FFX_FSR2_THREAD_GROUP_HEIGHT) + gl_LocalInvocationID.xy; Accumulate(ivec2(uDispatchThreadId)); -} \ No newline at end of file +} diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_autogen_reactive_pass.glsl b/thirdparty/amd-ffx/shaders/fsr2/ffx_fsr2_autogen_reactive_pass.glsl similarity index 60% rename from thirdparty/amd-fsr2/shaders/ffx_fsr2_autogen_reactive_pass.glsl rename to thirdparty/amd-ffx/shaders/fsr2/ffx_fsr2_autogen_reactive_pass.glsl index 3b86c17d4da9..fadffb0b274d 100644 --- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_autogen_reactive_pass.glsl +++ b/thirdparty/amd-ffx/shaders/fsr2/ffx_fsr2_autogen_reactive_pass.glsl @@ -1,13 +1,14 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (C) 2024 Advanced Micro Devices, Inc. // // Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal +// of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell // copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: +// furnished to do so, subject to the following conditions : +// // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // @@ -26,17 +27,17 @@ #define FSR2_BIND_SRV_INPUT_OPAQUE_ONLY 0 #define FSR2_BIND_SRV_INPUT_COLOR 1 -#define FSR2_BIND_UAV_AUTOREACTIVE 2 -#define FSR2_BIND_CB_REACTIVE 3 -#define FSR2_BIND_CB_FSR2 4 -#include "ffx_fsr2_callbacks_glsl.h" -#include "ffx_fsr2_common.h" +// GODOT BEGINS +// Godot DX12 backend doesn't support binding numbers larger than 1000, so we have to remap them. +#define FSR2_BIND_UAV_AUTOREACTIVE 2 //2002 -// layout (set = 1, binding = FSR2_BIND_SRV_PRE_ALPHA_COLOR) uniform texture2D r_input_color_pre_alpha; -// layout (set = 1, binding = FSR2_BIND_SRV_POST_ALPHA_COLOR) uniform texture2D r_input_color_post_alpha; -// layout (set = 1, binding = FSR2_BIND_UAV_REACTIVE, r8) uniform image2D rw_output_reactive_mask; +#define FSR2_BIND_CB_REACTIVE 3 //3000 +#define FSR2_BIND_CB_FSR2 4 //3001 +// GODOT ENDS +#include "../../gpu/fsr2/ffx_fsr2_callbacks_glsl.h" +#include "../../gpu/fsr2/ffx_fsr2_common.h" #ifndef FFX_FSR2_THREAD_GROUP_WIDTH #define FFX_FSR2_THREAD_GROUP_WIDTH 8 @@ -51,16 +52,6 @@ #define FFX_FSR2_NUM_THREADS layout (local_size_x = FFX_FSR2_THREAD_GROUP_WIDTH, local_size_y = FFX_FSR2_THREAD_GROUP_HEIGHT, local_size_z = FFX_FSR2_THREAD_GROUP_DEPTH) in; #endif // #ifndef FFX_FSR2_NUM_THREADS -#if defined(FSR2_BIND_CB_REACTIVE) -layout (set = 1, binding = FSR2_BIND_CB_REACTIVE, std140) uniform cbGenerateReactive_t -{ - float scale; - float threshold; - float binaryValue; - uint flags; -} cbGenerateReactive; -#endif - FFX_FSR2_NUM_THREADS void main() { @@ -68,14 +59,14 @@ void main() FfxFloat32x3 ColorPreAlpha = LoadOpaqueOnly(FFX_MIN16_I2(uDispatchThreadId)).rgb; FfxFloat32x3 ColorPostAlpha = LoadInputColor(FFX_MIN16_I2(uDispatchThreadId)).rgb; - - if ((cbGenerateReactive.flags & FFX_FSR2_AUTOREACTIVEFLAGS_APPLY_TONEMAP) != 0) + + if ((GenReactiveFlags() & FFX_FSR2_AUTOREACTIVEFLAGS_APPLY_TONEMAP) != 0) { ColorPreAlpha = Tonemap(ColorPreAlpha); ColorPostAlpha = Tonemap(ColorPostAlpha); } - if ((cbGenerateReactive.flags & FFX_FSR2_AUTOREACTIVEFLAGS_APPLY_INVERSETONEMAP) != 0) + if ((GenReactiveFlags() & FFX_FSR2_AUTOREACTIVEFLAGS_APPLY_INVERSETONEMAP) != 0) { ColorPreAlpha = InverseTonemap(ColorPreAlpha); ColorPostAlpha = InverseTonemap(ColorPostAlpha); @@ -83,11 +74,11 @@ void main() FfxFloat32 out_reactive_value = 0.f; FfxFloat32x3 delta = abs(ColorPostAlpha - ColorPreAlpha); - - out_reactive_value = ((cbGenerateReactive.flags & FFX_FSR2_AUTOREACTIVEFLAGS_USE_COMPONENTS_MAX)!=0) ? max(delta.x, max(delta.y, delta.z)) : length(delta); - out_reactive_value *= cbGenerateReactive.scale; - out_reactive_value = ((cbGenerateReactive.flags & FFX_FSR2_AUTOREACTIVEFLAGS_APPLY_THRESHOLD)!=0) ? ((out_reactive_value < cbGenerateReactive.threshold) ? 0 : cbGenerateReactive.binaryValue) : out_reactive_value; + out_reactive_value = ((GenReactiveFlags() & FFX_FSR2_AUTOREACTIVEFLAGS_USE_COMPONENTS_MAX)!=0) ? max(delta.x, max(delta.y, delta.z)) : length(delta); + out_reactive_value *= GenReactiveScale(); + + out_reactive_value = ((GenReactiveFlags() & FFX_FSR2_AUTOREACTIVEFLAGS_APPLY_THRESHOLD)!=0) ? ((out_reactive_value < GenReactiveThreshold()) ? 0 : GenReactiveBinaryValue()) : out_reactive_value; imageStore(rw_output_autoreactive, FfxInt32x2(uDispatchThreadId), vec4(out_reactive_value)); } diff --git a/thirdparty/amd-ffx/shaders/fsr2/ffx_fsr2_compute_luminance_pyramid_pass.glsl b/thirdparty/amd-ffx/shaders/fsr2/ffx_fsr2_compute_luminance_pyramid_pass.glsl new file mode 100644 index 000000000000..c28b70ba0a42 --- /dev/null +++ b/thirdparty/amd-ffx/shaders/fsr2/ffx_fsr2_compute_luminance_pyramid_pass.glsl @@ -0,0 +1,62 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +//#version 450 + +#extension GL_GOOGLE_include_directive : require +#extension GL_EXT_samplerless_texture_functions : require + +#define FSR2_BIND_SRV_INPUT_COLOR 0 + +// GODOT BEGINS +// Godot DX12 backend doesn't support binding numbers larger than 1000, so we have to remap them. +#define FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC 1 //2001 +#define FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE 2 //2002 +#define FSR2_BIND_UAV_EXPOSURE_MIP_5 3 //2003 +#define FSR2_BIND_UAV_AUTO_EXPOSURE 4 //2004 + +#define FSR2_BIND_CB_FSR2 5 //3000 +#define FSR2_BIND_CB_SPD 6 //3001 +// GODOT ENDS + +#include "../../gpu/fsr2/ffx_fsr2_callbacks_glsl.h" +#include "../../gpu/fsr2/ffx_fsr2_common.h" +#include "../../gpu/fsr2/ffx_fsr2_compute_luminance_pyramid.h" + +#ifndef FFX_FSR2_THREAD_GROUP_WIDTH +#define FFX_FSR2_THREAD_GROUP_WIDTH 256 +#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH +#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT +#define FFX_FSR2_THREAD_GROUP_HEIGHT 1 +#endif // #ifndef FFX_FSR2_THREAD_GROUP_HEIGHT +#ifndef FFX_FSR2_THREAD_GROUP_DEPTH +#define FFX_FSR2_THREAD_GROUP_DEPTH 1 +#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH +#ifndef FFX_FSR2_NUM_THREADS +#define FFX_FSR2_NUM_THREADS layout (local_size_x = FFX_FSR2_THREAD_GROUP_WIDTH, local_size_y = FFX_FSR2_THREAD_GROUP_HEIGHT, local_size_z = FFX_FSR2_THREAD_GROUP_DEPTH) in; +#endif // #ifndef FFX_FSR2_NUM_THREADS + +FFX_FSR2_NUM_THREADS +void main() +{ + ComputeAutoExposure(gl_WorkGroupID.xyz, gl_LocalInvocationIndex); +} diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_depth_clip_pass.glsl b/thirdparty/amd-ffx/shaders/fsr2/ffx_fsr2_depth_clip_pass.glsl similarity index 70% rename from thirdparty/amd-fsr2/shaders/ffx_fsr2_depth_clip_pass.glsl rename to thirdparty/amd-ffx/shaders/fsr2/ffx_fsr2_depth_clip_pass.glsl index 45ec5bdb8674..cbdfe337e936 100644 --- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_depth_clip_pass.glsl +++ b/thirdparty/amd-ffx/shaders/fsr2/ffx_fsr2_depth_clip_pass.glsl @@ -1,13 +1,14 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (C) 2024 Advanced Micro Devices, Inc. // // Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal +// of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell // copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: +// furnished to do so, subject to the following conditions : +// // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // @@ -29,22 +30,29 @@ #define FSR2_BIND_SRV_DILATED_DEPTH 2 #define FSR2_BIND_SRV_REACTIVE_MASK 3 #define FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK 4 +// GODOT BEGINS +// Godot render graph forces one resource to serve only one usage so we have to remove this binding +//#define FSR2_BIND_SRV_PREPARED_INPUT_COLOR 5 +// GODOT ENDS #define FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS 6 #define FSR2_BIND_SRV_INPUT_MOTION_VECTORS 7 #define FSR2_BIND_SRV_INPUT_COLOR 8 #define FSR2_BIND_SRV_INPUT_DEPTH 9 #define FSR2_BIND_SRV_INPUT_EXPOSURE 10 -#define FSR2_BIND_UAV_DEPTH_CLIP 11 -#define FSR2_BIND_UAV_DILATED_REACTIVE_MASKS 12 -#define FSR2_BIND_UAV_PREPARED_INPUT_COLOR 13 +// GODOT BEGINS +// Godot DX12 backend doesn't support binding numbers larger than 1000, so we have to remap them. +#define FSR2_BIND_UAV_DEPTH_CLIP 11 //2011 +#define FSR2_BIND_UAV_DILATED_REACTIVE_MASKS 12 //2012 +#define FSR2_BIND_UAV_PREPARED_INPUT_COLOR 13 //2013 -#define FSR2_BIND_CB_FSR2 14 +#define FSR2_BIND_CB_FSR2 14 //3000 +// GODOT ENDS -#include "ffx_fsr2_callbacks_glsl.h" -#include "ffx_fsr2_common.h" -#include "ffx_fsr2_sample.h" -#include "ffx_fsr2_depth_clip.h" +#include "../../gpu/fsr2/ffx_fsr2_callbacks_glsl.h" +#include "../../gpu/fsr2/ffx_fsr2_common.h" +#include "../../gpu/fsr2/ffx_fsr2_sample.h" +#include "../../gpu/fsr2/ffx_fsr2_depth_clip.h" #ifndef FFX_FSR2_THREAD_GROUP_WIDTH #define FFX_FSR2_THREAD_GROUP_WIDTH 8 diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_lock_pass.glsl b/thirdparty/amd-ffx/shaders/fsr2/ffx_fsr2_lock_pass.glsl similarity index 73% rename from thirdparty/amd-fsr2/shaders/ffx_fsr2_lock_pass.glsl rename to thirdparty/amd-ffx/shaders/fsr2/ffx_fsr2_lock_pass.glsl index 7c3a4c27400c..7570665e21a2 100644 --- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_lock_pass.glsl +++ b/thirdparty/amd-ffx/shaders/fsr2/ffx_fsr2_lock_pass.glsl @@ -1,13 +1,14 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (C) 2024 Advanced Micro Devices, Inc. // // Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal +// of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell // copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: +// furnished to do so, subject to the following conditions : +// // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // @@ -25,14 +26,19 @@ #extension GL_EXT_samplerless_texture_functions : require #define FSR2_BIND_SRV_LOCK_INPUT_LUMA 0 -#define FSR2_BIND_UAV_NEW_LOCKS 1 -#define FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH 2 -#define FSR2_BIND_CB_FSR2 3 - -#include "ffx_fsr2_callbacks_glsl.h" -#include "ffx_fsr2_common.h" -#include "ffx_fsr2_sample.h" -#include "ffx_fsr2_lock.h" + +// GODOT BEGINS +// Godot DX12 backend doesn't support binding numbers larger than 1000, so we have to remap them. +#define FSR2_BIND_UAV_NEW_LOCKS 1 //2001 +#define FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH 2 //2002 + +#define FSR2_BIND_CB_FSR2 3 //3000 +// GODOT ENDS + +#include "../../gpu/fsr2/ffx_fsr2_callbacks_glsl.h" +#include "../../gpu/fsr2/ffx_fsr2_common.h" +#include "../../gpu/fsr2/ffx_fsr2_sample.h" +#include "../../gpu/fsr2/ffx_fsr2_lock.h" #ifndef FFX_FSR2_THREAD_GROUP_WIDTH #define FFX_FSR2_THREAD_GROUP_WIDTH 8 diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_rcas_pass.glsl b/thirdparty/amd-ffx/shaders/fsr2/ffx_fsr2_rcas_pass.glsl similarity index 69% rename from thirdparty/amd-fsr2/shaders/ffx_fsr2_rcas_pass.glsl rename to thirdparty/amd-ffx/shaders/fsr2/ffx_fsr2_rcas_pass.glsl index 668279be5115..f72d21bb1176 100644 --- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_rcas_pass.glsl +++ b/thirdparty/amd-ffx/shaders/fsr2/ffx_fsr2_rcas_pass.glsl @@ -1,13 +1,14 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (C) 2024 Advanced Micro Devices, Inc. // // Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal +// of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell // copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: +// furnished to do so, subject to the following conditions : +// // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // @@ -28,37 +29,18 @@ #define FSR2_BIND_SRV_INPUT_EXPOSURE 0 #define FSR2_BIND_SRV_RCAS_INPUT 1 -#define FSR2_BIND_UAV_UPSCALED_OUTPUT 2 -#define FSR2_BIND_CB_FSR2 3 -#define FSR2_BIND_CB_RCAS 4 - -#include "ffx_fsr2_callbacks_glsl.h" -#include "ffx_fsr2_common.h" - -//Move to prototype shader! -#if defined(FSR2_BIND_CB_RCAS) - layout (set = 1, binding = FSR2_BIND_CB_RCAS, std140) uniform cbRCAS_t - { - uvec4 rcasConfig; - } cbRCAS; - uvec4 RCASConfig() - { - return cbRCAS.rcasConfig; - } -#else - uvec4 RCASConfig() - { - return uvec4(0); - } -#endif +// GODOT BEGINS +// Godot DX12 backend doesn't support binding numbers larger than 1000, so we have to remap them. +#define FSR2_BIND_UAV_UPSCALED_OUTPUT 2 //2002 -vec4 LoadRCAS_Input(FfxInt32x2 iPxPos) -{ - return texelFetch(r_rcas_input, iPxPos, 0); -} +#define FSR2_BIND_CB_FSR2 3 //3000 +#define FSR2_BIND_CB_RCAS 4 //3001 +// GODOT ENDS -#include "ffx_fsr2_rcas.h" +#include "../../gpu/fsr2/ffx_fsr2_callbacks_glsl.h" +#include "../../gpu/fsr2/ffx_fsr2_common.h" +#include "../../gpu/fsr2/ffx_fsr2_rcas.h" #ifndef FFX_FSR2_THREAD_GROUP_WIDTH #define FFX_FSR2_THREAD_GROUP_WIDTH 64 @@ -77,4 +59,4 @@ FFX_FSR2_NUM_THREADS void main() { RCAS(gl_LocalInvocationID.xyz, gl_WorkGroupID.xyz, gl_GlobalInvocationID.xyz); -} \ No newline at end of file +} diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_reconstruct_previous_depth_pass.glsl b/thirdparty/amd-ffx/shaders/fsr2/ffx_fsr2_reconstruct_previous_depth_pass.glsl similarity index 65% rename from thirdparty/amd-fsr2/shaders/ffx_fsr2_reconstruct_previous_depth_pass.glsl rename to thirdparty/amd-ffx/shaders/fsr2/ffx_fsr2_reconstruct_previous_depth_pass.glsl index be4395aaed74..6bda99ec1710 100644 --- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_reconstruct_previous_depth_pass.glsl +++ b/thirdparty/amd-ffx/shaders/fsr2/ffx_fsr2_reconstruct_previous_depth_pass.glsl @@ -1,13 +1,14 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (C) 2024 Advanced Micro Devices, Inc. // // Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal +// of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell // copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: +// furnished to do so, subject to the following conditions : +// // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // @@ -30,20 +31,23 @@ #define FSR2_BIND_SRV_INPUT_EXPOSURE 3 #define FSR2_BIND_SRV_LUMA_HISTORY 4 -#define FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH 5 -#define FSR2_BIND_UAV_DILATED_MOTION_VECTORS 6 -#define FSR2_BIND_UAV_DILATED_DEPTH 7 -#define FSR2_BIND_UAV_PREPARED_INPUT_COLOR 8 -#define FSR2_BIND_UAV_LUMA_HISTORY 9 -#define FSR2_BIND_UAV_LUMA_INSTABILITY 10 -#define FSR2_BIND_UAV_LOCK_INPUT_LUMA 11 +// GODOT BEGINS +// Godot DX12 backend doesn't support binding numbers larger than 1000, so we have to remap them. +#define FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH 5 //2005 +#define FSR2_BIND_UAV_DILATED_MOTION_VECTORS 6 //2006 +#define FSR2_BIND_UAV_DILATED_DEPTH 7 //2007 +#define FSR2_BIND_UAV_PREPARED_INPUT_COLOR 8 //2008 +#define FSR2_BIND_UAV_LUMA_HISTORY 9 //2009 +#define FSR2_BIND_UAV_LUMA_INSTABILITY 10 //2010 +#define FSR2_BIND_UAV_LOCK_INPUT_LUMA 11 //2011 -#define FSR2_BIND_CB_FSR2 12 +#define FSR2_BIND_CB_FSR2 12 //3000 +// GODOT ENDS -#include "ffx_fsr2_callbacks_glsl.h" -#include "ffx_fsr2_common.h" -#include "ffx_fsr2_sample.h" -#include "ffx_fsr2_reconstruct_dilated_velocity_and_previous_depth.h" +#include "../../gpu/fsr2/ffx_fsr2_callbacks_glsl.h" +#include "../../gpu/fsr2/ffx_fsr2_common.h" +#include "../../gpu/fsr2/ffx_fsr2_sample.h" +#include "../../gpu/fsr2/ffx_fsr2_reconstruct_dilated_velocity_and_previous_depth.h" #ifndef FFX_FSR2_THREAD_GROUP_WIDTH #define FFX_FSR2_THREAD_GROUP_WIDTH 8 diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_tcr_autogen_pass.glsl b/thirdparty/amd-ffx/shaders/fsr2/ffx_fsr2_tcr_autogen_pass.glsl similarity index 71% rename from thirdparty/amd-fsr2/shaders/ffx_fsr2_tcr_autogen_pass.glsl rename to thirdparty/amd-ffx/shaders/fsr2/ffx_fsr2_tcr_autogen_pass.glsl index 5c042c332afb..5df408481c2e 100644 --- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_tcr_autogen_pass.glsl +++ b/thirdparty/amd-ffx/shaders/fsr2/ffx_fsr2_tcr_autogen_pass.glsl @@ -1,13 +1,14 @@ // This file is part of the FidelityFX SDK. // -// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (C) 2024 Advanced Micro Devices, Inc. // // Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal +// of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell // copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: +// furnished to do so, subject to the following conditions : +// // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // @@ -32,43 +33,26 @@ #define FSR2_BIND_SRV_REACTIVE_MASK 5 #define FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK 6 -#define FSR2_BIND_UAV_AUTOREACTIVE 7 -#define FSR2_BIND_UAV_AUTOCOMPOSITION 8 -#define FSR2_BIND_UAV_PREV_PRE_ALPHA_COLOR 9 -#define FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR 10 - -#define FSR2_BIND_CB_FSR2 11 -#define FSR2_BIND_CB_REACTIVE 12 - +// GODOT BEGINS #if FFX_FSR2_OPTION_GODOT_DERIVE_INVALID_MOTION_VECTORS #define FSR2_BIND_SRV_INPUT_DEPTH 13 #endif +// GODOT ENDS -#include "ffx_fsr2_callbacks_glsl.h" -#include "ffx_fsr2_common.h" +// GODOT BEGINS +// Godot DX12 backend doesn't support binding numbers larger than 1000, so we have to remap them. +#define FSR2_BIND_UAV_AUTOREACTIVE 7 //2007 +#define FSR2_BIND_UAV_AUTOCOMPOSITION 8 //2008 +#define FSR2_BIND_UAV_PREV_PRE_ALPHA_COLOR 9 //2009 +#define FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR 10 //2010 -#ifdef FSR2_BIND_CB_REACTIVE -layout (set = 1, binding = FSR2_BIND_CB_REACTIVE, std140) uniform cbGenerateReactive_t -{ - float fTcThreshold; // 0.1 is a good starting value, lower will result in more TC pixels - float fTcScale; - float fReactiveScale; - float fReactiveMax; -} cbGenerateReactive; +#define FSR2_BIND_CB_FSR2 11 //3000 +#define FSR2_BIND_CB_AUTOREACTIVE 12 //3001 -float getTcThreshold() -{ - return cbGenerateReactive.fTcThreshold; -} - -#else - float getTcThreshold() - { - return 0.05f; - } -#endif -#include "ffx_fsr2_tcr_autogen.h" +#include "../../gpu/fsr2/ffx_fsr2_callbacks_glsl.h" +#include "../../gpu/fsr2/ffx_fsr2_common.h" +#include "../../gpu/fsr2/ffx_fsr2_tcr_autogen.h" #ifndef FFX_FSR2_THREAD_GROUP_WIDTH #define FFX_FSR2_THREAD_GROUP_WIDTH 8 @@ -98,17 +82,17 @@ void main() FFX_MIN16_F3 colorPostAlpha = FFX_MIN16_F3( LoadInputColor( uDispatchThreadId ) ); FFX_MIN16_F2 outReactiveMask = FFX_MIN16_F2( 0.f, 0.f ); - + outReactiveMask.y = ComputeTransparencyAndComposition(uDispatchThreadId, iPrevIdx); if (outReactiveMask.y > 0.5f) { outReactiveMask.x = ComputeReactive(uDispatchThreadId, iPrevIdx); - outReactiveMask.x *= FFX_MIN16_F(cbGenerateReactive.fReactiveScale); - outReactiveMask.x = outReactiveMask.x < cbGenerateReactive.fReactiveMax ? outReactiveMask.x : FFX_MIN16_F( cbGenerateReactive.fReactiveMax ); + outReactiveMask.x *= FFX_MIN16_F(ReactiveScale()); + outReactiveMask.x = outReactiveMask.x < ReactiveMax() ? outReactiveMask.x : FFX_MIN16_F( ReactiveMax() ); } - outReactiveMask.y *= FFX_MIN16_F(cbGenerateReactive.fTcScale); + outReactiveMask.y *= FFX_MIN16_F(TcScale()); outReactiveMask.x = ffxMax(outReactiveMask.x, FFX_MIN16_F(LoadReactiveMask(uDispatchThreadId))); outReactiveMask.y = ffxMax(outReactiveMask.y, FFX_MIN16_F(LoadTransparencyAndCompositionMask(uDispatchThreadId))); diff --git a/thirdparty/amd-ffx/shaders/fsr3upscaler/ffx_fsr3upscaler_accumulate_pass.glsl b/thirdparty/amd-ffx/shaders/fsr3upscaler/ffx_fsr3upscaler_accumulate_pass.glsl new file mode 100644 index 000000000000..eb355cd33954 --- /dev/null +++ b/thirdparty/amd-ffx/shaders/fsr3upscaler/ffx_fsr3upscaler_accumulate_pass.glsl @@ -0,0 +1,75 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +//#version 450 + +#extension GL_GOOGLE_include_directive : require +#extension GL_EXT_samplerless_texture_functions : require +// Needed for rw_upscaled_output declaration +#extension GL_EXT_shader_image_load_formatted : require + +#define FSR3UPSCALER_BIND_SRV_INPUT_EXPOSURE 0 +#define FSR3UPSCALER_BIND_SRV_DILATED_REACTIVE_MASKS 1 +#if FFX_FSR3UPSCALER_OPTION_LOW_RESOLUTION_MOTION_VECTORS +#define FSR3UPSCALER_BIND_SRV_DILATED_MOTION_VECTORS 2 +#else +#define FSR3UPSCALER_BIND_SRV_INPUT_MOTION_VECTORS 2 +#endif +#define FSR3UPSCALER_BIND_SRV_INTERNAL_UPSCALED 3 +#define FSR3UPSCALER_BIND_SRV_LANCZOS_LUT 4 +#define FSR3UPSCALER_BIND_SRV_FARTHEST_DEPTH_MIP1 5 + +#define FSR3UPSCALER_BIND_SRV_CURRENT_LUMA 6 +#define FSR3UPSCALER_BIND_SRV_LUMA_INSTABILITY 7 +#define FSR3UPSCALER_BIND_SRV_INPUT_COLOR 8 + +#define FSR3UPSCALER_BIND_UAV_INTERNAL_UPSCALED 9 +#define FSR3UPSCALER_BIND_UAV_UPSCALED_OUTPUT 10 +#define FSR3UPSCALER_BIND_UAV_NEW_LOCKS 11 + +#define FSR3UPSCALER_BIND_CB_FSR3UPSCALER 12 + +#include "../../gpu/fsr3upscaler/ffx_fsr3upscaler_callbacks_glsl.h" +#include "../../gpu/fsr3upscaler/ffx_fsr3upscaler_common.h" +#include "../../gpu/fsr3upscaler/ffx_fsr3upscaler_sample.h" +#include "../../gpu/fsr3upscaler/ffx_fsr3upscaler_upsample.h" +#include "../../gpu/fsr3upscaler/ffx_fsr3upscaler_reproject.h" +#include "../../gpu/fsr3upscaler/ffx_fsr3upscaler_accumulate.h" + +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH +#define FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH 8 +#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT +#define FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT 8 +#endif // FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH +#define FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH 1 +#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH +#ifndef FFX_FSR3UPSCALER_NUM_THREADS +#define FFX_FSR3UPSCALER_NUM_THREADS layout (local_size_x = FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH, local_size_y = FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT, local_size_z = FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH) in; +#endif // #ifndef FFX_FSR3UPSCALER_NUM_THREADS + +FFX_FSR3UPSCALER_NUM_THREADS +void main() +{ + Accumulate(FfxInt32x2(gl_GlobalInvocationID.xy)); +} diff --git a/thirdparty/amd-ffx/shaders/fsr3upscaler/ffx_fsr3upscaler_autogen_reactive_pass.glsl b/thirdparty/amd-ffx/shaders/fsr3upscaler/ffx_fsr3upscaler_autogen_reactive_pass.glsl new file mode 100644 index 000000000000..9bec3a1466c1 --- /dev/null +++ b/thirdparty/amd-ffx/shaders/fsr3upscaler/ffx_fsr3upscaler_autogen_reactive_pass.glsl @@ -0,0 +1,84 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +//#version 450 + +#extension GL_GOOGLE_include_directive : require +#extension GL_EXT_samplerless_texture_functions : require + +#define FSR3UPSCALER_BIND_SRV_INPUT_OPAQUE_ONLY 0 +#define FSR3UPSCALER_BIND_SRV_INPUT_COLOR 1 + +#define FSR3UPSCALER_BIND_UAV_AUTOREACTIVE 2 +#define FSR3UPSCALER_BIND_UAV_AUTOCOMPOSITION 3 + +#define FSR3UPSCALER_BIND_CB_FSR3UPSCALER 4 +#define FSR3UPSCALER_BIND_CB_REACTIVE 5 + +#include "../../gpu/fsr3upscaler/ffx_fsr3upscaler_callbacks_glsl.h" +#include "../../gpu/fsr3upscaler/ffx_fsr3upscaler_common.h" + +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH +#define FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH 8 +#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT +#define FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT 8 +#endif // FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH +#define FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH 1 +#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH +#ifndef FFX_FSR3UPSCALER_NUM_THREADS +#define FFX_FSR3UPSCALER_NUM_THREADS layout (local_size_x = FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH, local_size_y = FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT, local_size_z = FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH) in; +#endif // #ifndef FFX_FSR3UPSCALER_NUM_THREADS + +FFX_FSR3UPSCALER_NUM_THREADS +void main() +{ + FfxUInt32x2 uGroupId = gl_WorkGroupID.xy; + FfxUInt32x2 uGroupThreadId = gl_LocalInvocationID.xy; + FfxUInt32x2 uDispatchThreadId = uGroupId * FfxUInt32x2(FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH, FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT) + uGroupThreadId; + + FfxFloat32x3 ColorPreAlpha = LoadOpaqueOnly(FFX_MIN16_I2(uDispatchThreadId)).rgb; + FfxFloat32x3 ColorPostAlpha = LoadInputColor(FfxInt32x2(uDispatchThreadId)).rgb; + + if ((GenReactiveFlags() & FFX_FSR3UPSCALER_AUTOREACTIVEFLAGS_APPLY_TONEMAP) != 0) + { + ColorPreAlpha = Tonemap(ColorPreAlpha); + ColorPostAlpha = Tonemap(ColorPostAlpha); + } + + if ((GenReactiveFlags() & FFX_FSR3UPSCALER_AUTOREACTIVEFLAGS_APPLY_INVERSETONEMAP) != 0) + { + ColorPreAlpha = InverseTonemap(ColorPreAlpha); + ColorPostAlpha = InverseTonemap(ColorPostAlpha); + } + + FfxFloat32 out_reactive_value = 0.f; + FfxFloat32x3 delta = abs(ColorPostAlpha - ColorPreAlpha); + + out_reactive_value = ((GenReactiveFlags() & FFX_FSR3UPSCALER_AUTOREACTIVEFLAGS_USE_COMPONENTS_MAX)!=0) ? max(delta.x, max(delta.y, delta.z)) : length(delta); + out_reactive_value *= GenReactiveScale(); + + out_reactive_value = ((GenReactiveFlags() & FFX_FSR3UPSCALER_AUTOREACTIVEFLAGS_APPLY_THRESHOLD)!=0) ? ((out_reactive_value < GenReactiveThreshold()) ? 0 : GenReactiveBinaryValue()) : out_reactive_value; + + imageStore(rw_output_autoreactive, FfxInt32x2(uDispatchThreadId), FfxFloat32x4(out_reactive_value, 0.0, 0.0, 0.0)); +} diff --git a/thirdparty/amd-ffx/shaders/fsr3upscaler/ffx_fsr3upscaler_debug_view_pass.glsl b/thirdparty/amd-ffx/shaders/fsr3upscaler/ffx_fsr3upscaler_debug_view_pass.glsl new file mode 100644 index 000000000000..00b4c530a562 --- /dev/null +++ b/thirdparty/amd-ffx/shaders/fsr3upscaler/ffx_fsr3upscaler_debug_view_pass.glsl @@ -0,0 +1,59 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +//#version 450 + +#extension GL_GOOGLE_include_directive : require +#extension GL_EXT_samplerless_texture_functions : require + +#define FSR3UPSCALER_BIND_SRV_DILATED_REACTIVE_MASKS 0 +#define FSR3UPSCALER_BIND_SRV_DILATED_MOTION_VECTORS 1 +#define FSR3UPSCALER_BIND_SRV_DILATED_DEPTH 2 +#define FSR3UPSCALER_BIND_SRV_INTERNAL_UPSCALED 3 +#define FSR3UPSCALER_BIND_SRV_INPUT_EXPOSURE 4 + +#define FSR3UPSCALER_BIND_UAV_UPSCALED_OUTPUT 5 + +#define FSR3UPSCALER_BIND_CB_FSR3UPSCALER 6 + +#include "../../gpu/fsr3upscaler/ffx_fsr3upscaler_callbacks_glsl.h" +#include "../../gpu/fsr3upscaler/ffx_fsr3upscaler_common.h" +#include "../../gpu/fsr3upscaler/ffx_fsr3upscaler_debug_view.h" + +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH +#define FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH 8 +#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT +#define FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT 8 +#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH +#define FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH 1 +#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH +#ifndef FFX_FSR3UPSCALER_NUM_THREADS +#define FFX_FSR3UPSCALER_NUM_THREADS layout (local_size_x = FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH, local_size_y = FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT, local_size_z = FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH) in; +#endif // #ifndef FFX_FSR3UPSCALER_NUM_THREADS + +FFX_FSR3UPSCALER_NUM_THREADS +void main() +{ + DebugView(FfxInt32x2(gl_GlobalInvocationID.xy)); +} diff --git a/thirdparty/amd-ffx/shaders/fsr3upscaler/ffx_fsr3upscaler_luma_instability_pass.glsl b/thirdparty/amd-ffx/shaders/fsr3upscaler/ffx_fsr3upscaler_luma_instability_pass.glsl new file mode 100644 index 000000000000..da1238d2eba4 --- /dev/null +++ b/thirdparty/amd-ffx/shaders/fsr3upscaler/ffx_fsr3upscaler_luma_instability_pass.glsl @@ -0,0 +1,62 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +//#version 450 + +#extension GL_GOOGLE_include_directive : require +#extension GL_EXT_samplerless_texture_functions : require + +#define FSR3UPSCALER_BIND_SRV_INPUT_EXPOSURE 0 +#define FSR3UPSCALER_BIND_SRV_DILATED_REACTIVE_MASKS 1 +#define FSR3UPSCALER_BIND_SRV_DILATED_MOTION_VECTORS 2 +#define FSR3UPSCALER_BIND_SRV_FRAME_INFO 3 +#define FSR3UPSCALER_BIND_SRV_LUMA_HISTORY 4 +#define FSR3UPSCALER_BIND_SRV_FARTHEST_DEPTH_MIP1 5 +#define FSR3UPSCALER_BIND_SRV_CURRENT_LUMA 6 + +#define FSR3UPSCALER_BIND_UAV_LUMA_HISTORY 7 +#define FSR3UPSCALER_BIND_UAV_LUMA_INSTABILITY 8 + +#define FSR3UPSCALER_BIND_CB_FSR3UPSCALER 9 + +#include "../../gpu/fsr3upscaler/ffx_fsr3upscaler_callbacks_glsl.h" +#include "../../gpu/fsr3upscaler/ffx_fsr3upscaler_common.h" +#include "../../gpu/fsr3upscaler/ffx_fsr3upscaler_luma_instability.h" + +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH +#define FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH 8 +#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT +#define FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT 8 +#endif // FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH +#define FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH 1 +#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH +#ifndef FFX_FSR3UPSCALER_NUM_THREADS +#define FFX_FSR3UPSCALER_NUM_THREADS layout (local_size_x = FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH, local_size_y = FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT, local_size_z = FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH) in; +#endif // #ifndef FFX_FSR3UPSCALER_NUM_THREADS + +FFX_FSR3UPSCALER_NUM_THREADS +void main() +{ + LumaInstability(FfxInt32x2(gl_GlobalInvocationID.xy)); +} diff --git a/thirdparty/amd-ffx/shaders/fsr3upscaler/ffx_fsr3upscaler_luma_pyramid_pass.glsl b/thirdparty/amd-ffx/shaders/fsr3upscaler/ffx_fsr3upscaler_luma_pyramid_pass.glsl new file mode 100644 index 000000000000..0eac11297a89 --- /dev/null +++ b/thirdparty/amd-ffx/shaders/fsr3upscaler/ffx_fsr3upscaler_luma_pyramid_pass.glsl @@ -0,0 +1,65 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +//#version 450 + +#extension GL_GOOGLE_include_directive : require +#extension GL_EXT_samplerless_texture_functions : require + +#define FSR3UPSCALER_BIND_SRV_CURRENT_LUMA 0 +#define FSR3UPSCALER_BIND_SRV_FARTHEST_DEPTH 1 + +#define FSR3UPSCALER_BIND_UAV_SPD_GLOBAL_ATOMIC 2 +#define FSR3UPSCALER_BIND_UAV_FRAME_INFO 3 +#define FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_0 4 +#define FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_1 5 +#define FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_2 6 +#define FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_3 7 +#define FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_4 8 +#define FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_5 9 +#define FSR3UPSCALER_BIND_UAV_FARTHEST_DEPTH_MIP1 10 + +#define FSR3UPSCALER_BIND_CB_FSR3UPSCALER 11 +#define FSR3UPSCALER_BIND_CB_SPD 12 + +#include "../../gpu/fsr3upscaler/ffx_fsr3upscaler_callbacks_glsl.h" +#include "../../gpu/fsr3upscaler/ffx_fsr3upscaler_common.h" +#include "../../gpu/fsr3upscaler/ffx_fsr3upscaler_luma_pyramid.h" + +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH +#define FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH 256 +#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT +#define FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT 1 +#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH +#define FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH 1 +#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH +#ifndef FFX_FSR3UPSCALER_NUM_THREADS +#define FFX_FSR3UPSCALER_NUM_THREADS layout (local_size_x = FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH, local_size_y = FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT, local_size_z = FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH) in; +#endif // #ifndef FFX_FSR3UPSCALER_NUM_THREADS + +FFX_FSR3UPSCALER_NUM_THREADS +void main() +{ + ComputeAutoExposure(gl_WorkGroupID, gl_LocalInvocationIndex); +} diff --git a/thirdparty/amd-ffx/shaders/fsr3upscaler/ffx_fsr3upscaler_prepare_inputs_pass.glsl b/thirdparty/amd-ffx/shaders/fsr3upscaler/ffx_fsr3upscaler_prepare_inputs_pass.glsl new file mode 100644 index 000000000000..59ce2a84b373 --- /dev/null +++ b/thirdparty/amd-ffx/shaders/fsr3upscaler/ffx_fsr3upscaler_prepare_inputs_pass.glsl @@ -0,0 +1,61 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +//#version 450 + +#extension GL_GOOGLE_include_directive : require +#extension GL_EXT_samplerless_texture_functions : require + +#define FSR3UPSCALER_BIND_SRV_INPUT_MOTION_VECTORS 0 +#define FSR3UPSCALER_BIND_SRV_INPUT_DEPTH 1 +#define FSR3UPSCALER_BIND_SRV_INPUT_COLOR 2 + +#define FSR3UPSCALER_BIND_UAV_DILATED_MOTION_VECTORS 3 +#define FSR3UPSCALER_BIND_UAV_DILATED_DEPTH 4 +#define FSR3UPSCALER_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH 5 +#define FSR3UPSCALER_BIND_UAV_FARTHEST_DEPTH 6 +#define FSR3UPSCALER_BIND_UAV_CURRENT_LUMA 7 + +#define FSR3UPSCALER_BIND_CB_FSR3UPSCALER 8 + +#include "../../gpu/fsr3upscaler/ffx_fsr3upscaler_callbacks_glsl.h" +#include "../../gpu/fsr3upscaler/ffx_fsr3upscaler_common.h" +#include "../../gpu/fsr3upscaler/ffx_fsr3upscaler_prepare_inputs.h" + +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH +#define FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH 8 +#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT +#define FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT 8 +#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH +#define FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH 1 +#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH +#ifndef FFX_FSR3UPSCALER_NUM_THREADS +#define FFX_FSR3UPSCALER_NUM_THREADS layout (local_size_x = FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH, local_size_y = FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT, local_size_z = FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH) in; +#endif // #ifndef FFX_FSR3UPSCALER_NUM_THREADS + +FFX_FSR3UPSCALER_NUM_THREADS +void main() +{ + PrepareInputs(FfxInt32x2(gl_GlobalInvocationID.xy)); +} diff --git a/thirdparty/amd-ffx/shaders/fsr3upscaler/ffx_fsr3upscaler_prepare_reactivity_pass.glsl b/thirdparty/amd-ffx/shaders/fsr3upscaler/ffx_fsr3upscaler_prepare_reactivity_pass.glsl new file mode 100644 index 000000000000..4250abbead29 --- /dev/null +++ b/thirdparty/amd-ffx/shaders/fsr3upscaler/ffx_fsr3upscaler_prepare_reactivity_pass.glsl @@ -0,0 +1,65 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +//#version 450 + +#extension GL_GOOGLE_include_directive : require +#extension GL_EXT_samplerless_texture_functions : require + +#define FSR3UPSCALER_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH 0 +#define FSR3UPSCALER_BIND_SRV_DILATED_MOTION_VECTORS 1 +#define FSR3UPSCALER_BIND_SRV_DILATED_DEPTH 2 +#define FSR3UPSCALER_BIND_SRV_REACTIVE_MASK 3 +#define FSR3UPSCALER_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK 4 +#define FSR3UPSCALER_BIND_SRV_ACCUMULATION 5 +#define FSR3UPSCALER_BIND_SRV_SHADING_CHANGE 6 +#define FSR3UPSCALER_BIND_SRV_CURRENT_LUMA 7 +#define FSR3UPSCALER_BIND_SRV_INPUT_EXPOSURE 8 + +#define FSR3UPSCALER_BIND_UAV_DILATED_REACTIVE_MASKS 9 +#define FSR3UPSCALER_BIND_UAV_NEW_LOCKS 10 +#define FSR3UPSCALER_BIND_UAV_ACCUMULATION 11 + +#define FSR3UPSCALER_BIND_CB_FSR3UPSCALER 12 + +#include "../../gpu/fsr3upscaler/ffx_fsr3upscaler_callbacks_glsl.h" +#include "../../gpu/fsr3upscaler/ffx_fsr3upscaler_common.h" +#include "../../gpu/fsr3upscaler/ffx_fsr3upscaler_prepare_reactivity.h" + +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH +#define FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH 8 +#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT +#define FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT 8 +#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH +#define FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH 1 +#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH +#ifndef FFX_FSR3UPSCALER_NUM_THREADS +#define FFX_FSR3UPSCALER_NUM_THREADS layout (local_size_x = FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH, local_size_y = FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT, local_size_z = FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH) in; +#endif // #ifndef FFX_FSR3UPSCALER_NUM_THREADS + +FFX_FSR3UPSCALER_NUM_THREADS +void main() +{ + PrepareReactivity(FfxInt32x2(gl_GlobalInvocationID.xy)); +} diff --git a/thirdparty/amd-ffx/shaders/fsr3upscaler/ffx_fsr3upscaler_rcas_pass.glsl b/thirdparty/amd-ffx/shaders/fsr3upscaler/ffx_fsr3upscaler_rcas_pass.glsl new file mode 100644 index 000000000000..f1824203460f --- /dev/null +++ b/thirdparty/amd-ffx/shaders/fsr3upscaler/ffx_fsr3upscaler_rcas_pass.glsl @@ -0,0 +1,59 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +//#version 450 + +#extension GL_GOOGLE_include_directive : require +#extension GL_EXT_samplerless_texture_functions : require +// Needed for rw_upscaled_output declaration +#extension GL_EXT_shader_image_load_formatted : require + +#define FSR3UPSCALER_BIND_SRV_INPUT_EXPOSURE 0 +#define FSR3UPSCALER_BIND_SRV_RCAS_INPUT 1 + +#define FSR3UPSCALER_BIND_UAV_UPSCALED_OUTPUT 2 + +#define FSR3UPSCALER_BIND_CB_FSR3UPSCALER 3 +#define FSR3UPSCALER_BIND_CB_RCAS 4 + +#include "../../gpu/fsr3upscaler/ffx_fsr3upscaler_callbacks_glsl.h" +#include "../../gpu/fsr3upscaler/ffx_fsr3upscaler_common.h" +#include "../../gpu/fsr3upscaler/ffx_fsr3upscaler_rcas.h" + +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH +#define FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH 64 +#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT +#define FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT 1 +#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH +#define FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH 1 +#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH +#ifndef FFX_FSR3UPSCALER_NUM_THREADS +#define FFX_FSR3UPSCALER_NUM_THREADS layout (local_size_x = FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH, local_size_y = FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT, local_size_z = FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH) in; +#endif // #ifndef FFX_FSR3UPSCALER_NUM_THREADS + +FFX_FSR3UPSCALER_NUM_THREADS +void main() +{ + RCAS(gl_LocalInvocationID.xyz, gl_WorkGroupID.xyz, gl_GlobalInvocationID.xyz); +} diff --git a/thirdparty/amd-ffx/shaders/fsr3upscaler/ffx_fsr3upscaler_shading_change_pass.glsl b/thirdparty/amd-ffx/shaders/fsr3upscaler/ffx_fsr3upscaler_shading_change_pass.glsl new file mode 100644 index 000000000000..691a055ac4aa --- /dev/null +++ b/thirdparty/amd-ffx/shaders/fsr3upscaler/ffx_fsr3upscaler_shading_change_pass.glsl @@ -0,0 +1,55 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +//#version 450 + +#extension GL_GOOGLE_include_directive : require +#extension GL_EXT_samplerless_texture_functions : require + +#define FSR3UPSCALER_BIND_SRV_SPD_MIPS 0 + +#define FSR3UPSCALER_BIND_UAV_SHADING_CHANGE 1 + +#define FSR3UPSCALER_BIND_CB_FSR3UPSCALER 2 + +#include "../../gpu/fsr3upscaler/ffx_fsr3upscaler_callbacks_glsl.h" +#include "../../gpu/fsr3upscaler/ffx_fsr3upscaler_common.h" +#include "../../gpu/fsr3upscaler/ffx_fsr3upscaler_shading_change.h" + +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH +#define FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH 8 +#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT +#define FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT 8 +#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH +#define FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH 1 +#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH +#ifndef FFX_FSR3UPSCALER_NUM_THREADS +#define FFX_FSR3UPSCALER_NUM_THREADS layout (local_size_x = FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH, local_size_y = FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT, local_size_z = FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH) in; +#endif // #ifndef FFX_FSR3UPSCALER_NUM_THREADS + +FFX_FSR3UPSCALER_NUM_THREADS +void main() +{ + ShadingChange(FfxInt32x2(gl_GlobalInvocationID.xy)); +} diff --git a/thirdparty/amd-ffx/shaders/fsr3upscaler/ffx_fsr3upscaler_shading_change_pyramid_pass.glsl b/thirdparty/amd-ffx/shaders/fsr3upscaler/ffx_fsr3upscaler_shading_change_pyramid_pass.glsl new file mode 100644 index 000000000000..8180a1350f29 --- /dev/null +++ b/thirdparty/amd-ffx/shaders/fsr3upscaler/ffx_fsr3upscaler_shading_change_pyramid_pass.glsl @@ -0,0 +1,67 @@ +// This file is part of the FidelityFX SDK. +// +// Copyright (C) 2024 Advanced Micro Devices, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and /or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +//#version 450 + +#extension GL_GOOGLE_include_directive : require +#extension GL_EXT_samplerless_texture_functions : require + +#define FSR3UPSCALER_BIND_SRV_CURRENT_LUMA 0 +#define FSR3UPSCALER_BIND_SRV_PREVIOUS_LUMA 1 +#define FSR3UPSCALER_BIND_SRV_DILATED_MOTION_VECTORS 2 +#define FSR3UPSCALER_BIND_SRV_INPUT_EXPOSURE 3 + + +#define FSR3UPSCALER_BIND_UAV_SPD_GLOBAL_ATOMIC 4 +#define FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_0 5 +#define FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_1 6 +#define FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_2 7 +#define FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_3 8 +#define FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_4 9 +#define FSR3UPSCALER_BIND_UAV_SPD_MIPS_LEVEL_5 10 + + +#define FSR3UPSCALER_BIND_CB_FSR3UPSCALER 11 +#define FSR3UPSCALER_BIND_CB_SPD 12 + +#include "../../gpu/fsr3upscaler/ffx_fsr3upscaler_callbacks_glsl.h" +#include "../../gpu/fsr3upscaler/ffx_fsr3upscaler_common.h" +#include "../../gpu/fsr3upscaler/ffx_fsr3upscaler_shading_change_pyramid.h" + +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH +#define FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH 256 +#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT +#define FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT 1 +#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT +#ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH +#define FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH 1 +#endif // #ifndef FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH +#ifndef FFX_FSR3UPSCALER_NUM_THREADS +#define FFX_FSR3UPSCALER_NUM_THREADS layout (local_size_x = FFX_FSR3UPSCALER_THREAD_GROUP_WIDTH, local_size_y = FFX_FSR3UPSCALER_THREAD_GROUP_HEIGHT, local_size_z = FFX_FSR3UPSCALER_THREAD_GROUP_DEPTH) in; +#endif // #ifndef FFX_FSR3UPSCALER_NUM_THREADS + +FFX_FSR3UPSCALER_NUM_THREADS +void main() +{ + ComputeShadingChangePyramid(gl_WorkGroupID, gl_LocalInvocationIndex); +} diff --git a/thirdparty/amd-fsr/ffx_a.h b/thirdparty/amd-fsr/ffx_a.h deleted file mode 100644 index d04bff55cbe5..000000000000 --- a/thirdparty/amd-fsr/ffx_a.h +++ /dev/null @@ -1,2656 +0,0 @@ -//============================================================================================================================== -// -// [A] SHADER PORTABILITY 1.20210629 -// -//============================================================================================================================== -// FidelityFX Super Resolution Sample -// -// Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files(the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions : -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. -//------------------------------------------------------------------------------------------------------------------------------ -// MIT LICENSE -// =========== -// Copyright (c) 2014 Michal Drobot (for concepts used in "FLOAT APPROXIMATIONS"). -// ----------- -// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, -// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// ----------- -// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the -// Software. -// ----------- -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE -// WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -// COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -//------------------------------------------------------------------------------------------------------------------------------ -// ABOUT -// ===== -// Common central point for high-level shading language and C portability for various shader headers. -//------------------------------------------------------------------------------------------------------------------------------ -// DEFINES -// ======= -// A_CPU ..... Include the CPU related code. -// A_GPU ..... Include the GPU related code. -// A_GLSL .... Using GLSL. -// A_HLSL .... Using HLSL. -// A_HLSL_6_2 Using HLSL 6.2 with new 'uint16_t' and related types (requires '-enable-16bit-types'). -// A_NO_16_BIT_CAST Don't use instructions that are not availabe in SPIR-V (needed for running A_HLSL_6_2 on Vulkan) -// A_GCC ..... Using a GCC compatible compiler (else assume MSVC compatible compiler by default). -// ======= -// A_BYTE .... Support 8-bit integer. -// A_HALF .... Support 16-bit integer and floating point. -// A_LONG .... Support 64-bit integer. -// A_DUBL .... Support 64-bit floating point. -// ======= -// A_WAVE .... Support wave-wide operations. -//------------------------------------------------------------------------------------------------------------------------------ -// To get #include "ffx_a.h" working in GLSL use '#extension GL_GOOGLE_include_directive:require'. -//------------------------------------------------------------------------------------------------------------------------------ -// SIMPLIFIED TYPE SYSTEM -// ====================== -// - All ints will be unsigned with exception of when signed is required. -// - Type naming simplified and shortened "A<#components>", -// - H = 16-bit float (half) -// - F = 32-bit float (float) -// - D = 64-bit float (double) -// - P = 1-bit integer (predicate, not using bool because 'B' is used for byte) -// - B = 8-bit integer (byte) -// - W = 16-bit integer (word) -// - U = 32-bit integer (unsigned) -// - L = 64-bit integer (long) -// - Using "AS<#components>" for signed when required. -//------------------------------------------------------------------------------------------------------------------------------ -// TODO -// ==== -// - Make sure 'ALerp*(a,b,m)' does 'b*m+(-a*m+a)' (2 ops). -//------------------------------------------------------------------------------------------------------------------------------ -// CHANGE LOG -// ========== -// 20200914 - Expanded wave ops and prx code. -// 20200713 - Added [ZOL] section, fixed serious bugs in sRGB and Rec.709 color conversion code, etc. -//============================================================================================================================== -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// COMMON -//============================================================================================================================== -#define A_2PI 6.28318530718 -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// -// -// CPU -// -// -//============================================================================================================================== -#ifdef A_CPU - // Supporting user defined overrides. - #ifndef A_RESTRICT - #define A_RESTRICT __restrict - #endif -//------------------------------------------------------------------------------------------------------------------------------ - #ifndef A_STATIC - #define A_STATIC static - #endif -//------------------------------------------------------------------------------------------------------------------------------ - // Same types across CPU and GPU. - // Predicate uses 32-bit integer (C friendly bool). - typedef uint32_t AP1; - typedef float AF1; - typedef double AD1; - typedef uint8_t AB1; - typedef uint16_t AW1; - typedef uint32_t AU1; - typedef uint64_t AL1; - typedef int8_t ASB1; - typedef int16_t ASW1; - typedef int32_t ASU1; - typedef int64_t ASL1; -//------------------------------------------------------------------------------------------------------------------------------ - #define AD1_(a) ((AD1)(a)) - #define AF1_(a) ((AF1)(a)) - #define AL1_(a) ((AL1)(a)) - #define AU1_(a) ((AU1)(a)) -//------------------------------------------------------------------------------------------------------------------------------ - #define ASL1_(a) ((ASL1)(a)) - #define ASU1_(a) ((ASU1)(a)) -//------------------------------------------------------------------------------------------------------------------------------ - A_STATIC AU1 AU1_AF1(AF1 a){union{AF1 f;AU1 u;}bits;bits.f=a;return bits.u;} -//------------------------------------------------------------------------------------------------------------------------------ - #define A_TRUE 1 - #define A_FALSE 0 -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// -// CPU/GPU PORTING -// -//------------------------------------------------------------------------------------------------------------------------------ -// Get CPU and GPU to share all setup code, without duplicate code paths. -// This uses a lower-case prefix for special vector constructs. -// - In C restrict pointers are used. -// - In the shading language, in/inout/out arguments are used. -// This depends on the ability to access a vector value in both languages via array syntax (aka color[2]). -//============================================================================================================================== -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// VECTOR ARGUMENT/RETURN/INITIALIZATION PORTABILITY -//============================================================================================================================== - #define retAD2 AD1 *A_RESTRICT - #define retAD3 AD1 *A_RESTRICT - #define retAD4 AD1 *A_RESTRICT - #define retAF2 AF1 *A_RESTRICT - #define retAF3 AF1 *A_RESTRICT - #define retAF4 AF1 *A_RESTRICT - #define retAL2 AL1 *A_RESTRICT - #define retAL3 AL1 *A_RESTRICT - #define retAL4 AL1 *A_RESTRICT - #define retAU2 AU1 *A_RESTRICT - #define retAU3 AU1 *A_RESTRICT - #define retAU4 AU1 *A_RESTRICT -//------------------------------------------------------------------------------------------------------------------------------ - #define inAD2 AD1 *A_RESTRICT - #define inAD3 AD1 *A_RESTRICT - #define inAD4 AD1 *A_RESTRICT - #define inAF2 AF1 *A_RESTRICT - #define inAF3 AF1 *A_RESTRICT - #define inAF4 AF1 *A_RESTRICT - #define inAL2 AL1 *A_RESTRICT - #define inAL3 AL1 *A_RESTRICT - #define inAL4 AL1 *A_RESTRICT - #define inAU2 AU1 *A_RESTRICT - #define inAU3 AU1 *A_RESTRICT - #define inAU4 AU1 *A_RESTRICT -//------------------------------------------------------------------------------------------------------------------------------ - #define inoutAD2 AD1 *A_RESTRICT - #define inoutAD3 AD1 *A_RESTRICT - #define inoutAD4 AD1 *A_RESTRICT - #define inoutAF2 AF1 *A_RESTRICT - #define inoutAF3 AF1 *A_RESTRICT - #define inoutAF4 AF1 *A_RESTRICT - #define inoutAL2 AL1 *A_RESTRICT - #define inoutAL3 AL1 *A_RESTRICT - #define inoutAL4 AL1 *A_RESTRICT - #define inoutAU2 AU1 *A_RESTRICT - #define inoutAU3 AU1 *A_RESTRICT - #define inoutAU4 AU1 *A_RESTRICT -//------------------------------------------------------------------------------------------------------------------------------ - #define outAD2 AD1 *A_RESTRICT - #define outAD3 AD1 *A_RESTRICT - #define outAD4 AD1 *A_RESTRICT - #define outAF2 AF1 *A_RESTRICT - #define outAF3 AF1 *A_RESTRICT - #define outAF4 AF1 *A_RESTRICT - #define outAL2 AL1 *A_RESTRICT - #define outAL3 AL1 *A_RESTRICT - #define outAL4 AL1 *A_RESTRICT - #define outAU2 AU1 *A_RESTRICT - #define outAU3 AU1 *A_RESTRICT - #define outAU4 AU1 *A_RESTRICT -//------------------------------------------------------------------------------------------------------------------------------ - #define varAD2(x) AD1 x[2] - #define varAD3(x) AD1 x[3] - #define varAD4(x) AD1 x[4] - #define varAF2(x) AF1 x[2] - #define varAF3(x) AF1 x[3] - #define varAF4(x) AF1 x[4] - #define varAL2(x) AL1 x[2] - #define varAL3(x) AL1 x[3] - #define varAL4(x) AL1 x[4] - #define varAU2(x) AU1 x[2] - #define varAU3(x) AU1 x[3] - #define varAU4(x) AU1 x[4] -//------------------------------------------------------------------------------------------------------------------------------ - #define initAD2(x,y) {x,y} - #define initAD3(x,y,z) {x,y,z} - #define initAD4(x,y,z,w) {x,y,z,w} - #define initAF2(x,y) {x,y} - #define initAF3(x,y,z) {x,y,z} - #define initAF4(x,y,z,w) {x,y,z,w} - #define initAL2(x,y) {x,y} - #define initAL3(x,y,z) {x,y,z} - #define initAL4(x,y,z,w) {x,y,z,w} - #define initAU2(x,y) {x,y} - #define initAU3(x,y,z) {x,y,z} - #define initAU4(x,y,z,w) {x,y,z,w} -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// SCALAR RETURN OPS -//------------------------------------------------------------------------------------------------------------------------------ -// TODO -// ==== -// - Replace transcendentals with manual versions. -//============================================================================================================================== - #ifdef A_GCC - A_STATIC AD1 AAbsD1(AD1 a){return __builtin_fabs(a);} - A_STATIC AF1 AAbsF1(AF1 a){return __builtin_fabsf(a);} - A_STATIC AU1 AAbsSU1(AU1 a){return AU1_(__builtin_abs(ASU1_(a)));} - A_STATIC AL1 AAbsSL1(AL1 a){return AL1_(__builtin_llabs(ASL1_(a)));} - #else - A_STATIC AD1 AAbsD1(AD1 a){return fabs(a);} - A_STATIC AF1 AAbsF1(AF1 a){return fabsf(a);} - A_STATIC AU1 AAbsSU1(AU1 a){return AU1_(abs(ASU1_(a)));} - A_STATIC AL1 AAbsSL1(AL1 a){return AL1_(labs((long)ASL1_(a)));} - #endif -//------------------------------------------------------------------------------------------------------------------------------ - #ifdef A_GCC - A_STATIC AD1 ACosD1(AD1 a){return __builtin_cos(a);} - A_STATIC AF1 ACosF1(AF1 a){return __builtin_cosf(a);} - #else - A_STATIC AD1 ACosD1(AD1 a){return cos(a);} - A_STATIC AF1 ACosF1(AF1 a){return cosf(a);} - #endif -//------------------------------------------------------------------------------------------------------------------------------ - A_STATIC AD1 ADotD2(inAD2 a,inAD2 b){return a[0]*b[0]+a[1]*b[1];} - A_STATIC AD1 ADotD3(inAD3 a,inAD3 b){return a[0]*b[0]+a[1]*b[1]+a[2]*b[2];} - A_STATIC AD1 ADotD4(inAD4 a,inAD4 b){return a[0]*b[0]+a[1]*b[1]+a[2]*b[2]+a[3]*b[3];} - A_STATIC AF1 ADotF2(inAF2 a,inAF2 b){return a[0]*b[0]+a[1]*b[1];} - A_STATIC AF1 ADotF3(inAF3 a,inAF3 b){return a[0]*b[0]+a[1]*b[1]+a[2]*b[2];} - A_STATIC AF1 ADotF4(inAF4 a,inAF4 b){return a[0]*b[0]+a[1]*b[1]+a[2]*b[2]+a[3]*b[3];} -//------------------------------------------------------------------------------------------------------------------------------ - #ifdef A_GCC - A_STATIC AD1 AExp2D1(AD1 a){return __builtin_exp2(a);} - A_STATIC AF1 AExp2F1(AF1 a){return __builtin_exp2f(a);} - #else - A_STATIC AD1 AExp2D1(AD1 a){return exp2(a);} - A_STATIC AF1 AExp2F1(AF1 a){return exp2f(a);} - #endif -//------------------------------------------------------------------------------------------------------------------------------ - #ifdef A_GCC - A_STATIC AD1 AFloorD1(AD1 a){return __builtin_floor(a);} - A_STATIC AF1 AFloorF1(AF1 a){return __builtin_floorf(a);} - #else - A_STATIC AD1 AFloorD1(AD1 a){return floor(a);} - A_STATIC AF1 AFloorF1(AF1 a){return floorf(a);} - #endif -//------------------------------------------------------------------------------------------------------------------------------ - A_STATIC AD1 ALerpD1(AD1 a,AD1 b,AD1 c){return b*c+(-a*c+a);} - A_STATIC AF1 ALerpF1(AF1 a,AF1 b,AF1 c){return b*c+(-a*c+a);} -//------------------------------------------------------------------------------------------------------------------------------ - #ifdef A_GCC - A_STATIC AD1 ALog2D1(AD1 a){return __builtin_log2(a);} - A_STATIC AF1 ALog2F1(AF1 a){return __builtin_log2f(a);} - #else - A_STATIC AD1 ALog2D1(AD1 a){return log2(a);} - A_STATIC AF1 ALog2F1(AF1 a){return log2f(a);} - #endif -//------------------------------------------------------------------------------------------------------------------------------ - A_STATIC AD1 AMaxD1(AD1 a,AD1 b){return a>b?a:b;} - A_STATIC AF1 AMaxF1(AF1 a,AF1 b){return a>b?a:b;} - A_STATIC AL1 AMaxL1(AL1 a,AL1 b){return a>b?a:b;} - A_STATIC AU1 AMaxU1(AU1 a,AU1 b){return a>b?a:b;} -//------------------------------------------------------------------------------------------------------------------------------ - // These follow the convention that A integer types don't have signage, until they are operated on. - A_STATIC AL1 AMaxSL1(AL1 a,AL1 b){return (ASL1_(a)>ASL1_(b))?a:b;} - A_STATIC AU1 AMaxSU1(AU1 a,AU1 b){return (ASU1_(a)>ASU1_(b))?a:b;} -//------------------------------------------------------------------------------------------------------------------------------ - A_STATIC AD1 AMinD1(AD1 a,AD1 b){return a>ASL1_(b));} - A_STATIC AU1 AShrSU1(AU1 a,AU1 b){return AU1_(ASU1_(a)>>ASU1_(b));} -//------------------------------------------------------------------------------------------------------------------------------ - #ifdef A_GCC - A_STATIC AD1 ASinD1(AD1 a){return __builtin_sin(a);} - A_STATIC AF1 ASinF1(AF1 a){return __builtin_sinf(a);} - #else - A_STATIC AD1 ASinD1(AD1 a){return sin(a);} - A_STATIC AF1 ASinF1(AF1 a){return sinf(a);} - #endif -//------------------------------------------------------------------------------------------------------------------------------ - #ifdef A_GCC - A_STATIC AD1 ASqrtD1(AD1 a){return __builtin_sqrt(a);} - A_STATIC AF1 ASqrtF1(AF1 a){return __builtin_sqrtf(a);} - #else - A_STATIC AD1 ASqrtD1(AD1 a){return sqrt(a);} - A_STATIC AF1 ASqrtF1(AF1 a){return sqrtf(a);} - #endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// SCALAR RETURN OPS - DEPENDENT -//============================================================================================================================== - A_STATIC AD1 AClampD1(AD1 x,AD1 n,AD1 m){return AMaxD1(n,AMinD1(x,m));} - A_STATIC AF1 AClampF1(AF1 x,AF1 n,AF1 m){return AMaxF1(n,AMinF1(x,m));} -//------------------------------------------------------------------------------------------------------------------------------ - A_STATIC AD1 AFractD1(AD1 a){return a-AFloorD1(a);} - A_STATIC AF1 AFractF1(AF1 a){return a-AFloorF1(a);} -//------------------------------------------------------------------------------------------------------------------------------ - A_STATIC AD1 APowD1(AD1 a,AD1 b){return AExp2D1(b*ALog2D1(a));} - A_STATIC AF1 APowF1(AF1 a,AF1 b){return AExp2F1(b*ALog2F1(a));} -//------------------------------------------------------------------------------------------------------------------------------ - A_STATIC AD1 ARsqD1(AD1 a){return ARcpD1(ASqrtD1(a));} - A_STATIC AF1 ARsqF1(AF1 a){return ARcpF1(ASqrtF1(a));} -//------------------------------------------------------------------------------------------------------------------------------ - A_STATIC AD1 ASatD1(AD1 a){return AMinD1(1.0,AMaxD1(0.0,a));} - A_STATIC AF1 ASatF1(AF1 a){return AMinF1(1.0f,AMaxF1(0.0f,a));} -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// VECTOR OPS -//------------------------------------------------------------------------------------------------------------------------------ -// These are added as needed for production or prototyping, so not necessarily a complete set. -// They follow a convention of taking in a destination and also returning the destination value to increase utility. -//============================================================================================================================== - A_STATIC retAD2 opAAbsD2(outAD2 d,inAD2 a){d[0]=AAbsD1(a[0]);d[1]=AAbsD1(a[1]);return d;} - A_STATIC retAD3 opAAbsD3(outAD3 d,inAD3 a){d[0]=AAbsD1(a[0]);d[1]=AAbsD1(a[1]);d[2]=AAbsD1(a[2]);return d;} - A_STATIC retAD4 opAAbsD4(outAD4 d,inAD4 a){d[0]=AAbsD1(a[0]);d[1]=AAbsD1(a[1]);d[2]=AAbsD1(a[2]);d[3]=AAbsD1(a[3]);return d;} -//------------------------------------------------------------------------------------------------------------------------------ - A_STATIC retAF2 opAAbsF2(outAF2 d,inAF2 a){d[0]=AAbsF1(a[0]);d[1]=AAbsF1(a[1]);return d;} - A_STATIC retAF3 opAAbsF3(outAF3 d,inAF3 a){d[0]=AAbsF1(a[0]);d[1]=AAbsF1(a[1]);d[2]=AAbsF1(a[2]);return d;} - A_STATIC retAF4 opAAbsF4(outAF4 d,inAF4 a){d[0]=AAbsF1(a[0]);d[1]=AAbsF1(a[1]);d[2]=AAbsF1(a[2]);d[3]=AAbsF1(a[3]);return d;} -//============================================================================================================================== - A_STATIC retAD2 opAAddD2(outAD2 d,inAD2 a,inAD2 b){d[0]=a[0]+b[0];d[1]=a[1]+b[1];return d;} - A_STATIC retAD3 opAAddD3(outAD3 d,inAD3 a,inAD3 b){d[0]=a[0]+b[0];d[1]=a[1]+b[1];d[2]=a[2]+b[2];return d;} - A_STATIC retAD4 opAAddD4(outAD4 d,inAD4 a,inAD4 b){d[0]=a[0]+b[0];d[1]=a[1]+b[1];d[2]=a[2]+b[2];d[3]=a[3]+b[3];return d;} -//------------------------------------------------------------------------------------------------------------------------------ - A_STATIC retAF2 opAAddF2(outAF2 d,inAF2 a,inAF2 b){d[0]=a[0]+b[0];d[1]=a[1]+b[1];return d;} - A_STATIC retAF3 opAAddF3(outAF3 d,inAF3 a,inAF3 b){d[0]=a[0]+b[0];d[1]=a[1]+b[1];d[2]=a[2]+b[2];return d;} - A_STATIC retAF4 opAAddF4(outAF4 d,inAF4 a,inAF4 b){d[0]=a[0]+b[0];d[1]=a[1]+b[1];d[2]=a[2]+b[2];d[3]=a[3]+b[3];return d;} -//============================================================================================================================== - A_STATIC retAD2 opAAddOneD2(outAD2 d,inAD2 a,AD1 b){d[0]=a[0]+b;d[1]=a[1]+b;return d;} - A_STATIC retAD3 opAAddOneD3(outAD3 d,inAD3 a,AD1 b){d[0]=a[0]+b;d[1]=a[1]+b;d[2]=a[2]+b;return d;} - A_STATIC retAD4 opAAddOneD4(outAD4 d,inAD4 a,AD1 b){d[0]=a[0]+b;d[1]=a[1]+b;d[2]=a[2]+b;d[3]=a[3]+b;return d;} -//------------------------------------------------------------------------------------------------------------------------------ - A_STATIC retAF2 opAAddOneF2(outAF2 d,inAF2 a,AF1 b){d[0]=a[0]+b;d[1]=a[1]+b;return d;} - A_STATIC retAF3 opAAddOneF3(outAF3 d,inAF3 a,AF1 b){d[0]=a[0]+b;d[1]=a[1]+b;d[2]=a[2]+b;return d;} - A_STATIC retAF4 opAAddOneF4(outAF4 d,inAF4 a,AF1 b){d[0]=a[0]+b;d[1]=a[1]+b;d[2]=a[2]+b;d[3]=a[3]+b;return d;} -//============================================================================================================================== - A_STATIC retAD2 opACpyD2(outAD2 d,inAD2 a){d[0]=a[0];d[1]=a[1];return d;} - A_STATIC retAD3 opACpyD3(outAD3 d,inAD3 a){d[0]=a[0];d[1]=a[1];d[2]=a[2];return d;} - A_STATIC retAD4 opACpyD4(outAD4 d,inAD4 a){d[0]=a[0];d[1]=a[1];d[2]=a[2];d[3]=a[3];return d;} -//------------------------------------------------------------------------------------------------------------------------------ - A_STATIC retAF2 opACpyF2(outAF2 d,inAF2 a){d[0]=a[0];d[1]=a[1];return d;} - A_STATIC retAF3 opACpyF3(outAF3 d,inAF3 a){d[0]=a[0];d[1]=a[1];d[2]=a[2];return d;} - A_STATIC retAF4 opACpyF4(outAF4 d,inAF4 a){d[0]=a[0];d[1]=a[1];d[2]=a[2];d[3]=a[3];return d;} -//============================================================================================================================== - A_STATIC retAD2 opALerpD2(outAD2 d,inAD2 a,inAD2 b,inAD2 c){d[0]=ALerpD1(a[0],b[0],c[0]);d[1]=ALerpD1(a[1],b[1],c[1]);return d;} - A_STATIC retAD3 opALerpD3(outAD3 d,inAD3 a,inAD3 b,inAD3 c){d[0]=ALerpD1(a[0],b[0],c[0]);d[1]=ALerpD1(a[1],b[1],c[1]);d[2]=ALerpD1(a[2],b[2],c[2]);return d;} - A_STATIC retAD4 opALerpD4(outAD4 d,inAD4 a,inAD4 b,inAD4 c){d[0]=ALerpD1(a[0],b[0],c[0]);d[1]=ALerpD1(a[1],b[1],c[1]);d[2]=ALerpD1(a[2],b[2],c[2]);d[3]=ALerpD1(a[3],b[3],c[3]);return d;} -//------------------------------------------------------------------------------------------------------------------------------ - A_STATIC retAF2 opALerpF2(outAF2 d,inAF2 a,inAF2 b,inAF2 c){d[0]=ALerpF1(a[0],b[0],c[0]);d[1]=ALerpF1(a[1],b[1],c[1]);return d;} - A_STATIC retAF3 opALerpF3(outAF3 d,inAF3 a,inAF3 b,inAF3 c){d[0]=ALerpF1(a[0],b[0],c[0]);d[1]=ALerpF1(a[1],b[1],c[1]);d[2]=ALerpF1(a[2],b[2],c[2]);return d;} - A_STATIC retAF4 opALerpF4(outAF4 d,inAF4 a,inAF4 b,inAF4 c){d[0]=ALerpF1(a[0],b[0],c[0]);d[1]=ALerpF1(a[1],b[1],c[1]);d[2]=ALerpF1(a[2],b[2],c[2]);d[3]=ALerpF1(a[3],b[3],c[3]);return d;} -//============================================================================================================================== - A_STATIC retAD2 opALerpOneD2(outAD2 d,inAD2 a,inAD2 b,AD1 c){d[0]=ALerpD1(a[0],b[0],c);d[1]=ALerpD1(a[1],b[1],c);return d;} - A_STATIC retAD3 opALerpOneD3(outAD3 d,inAD3 a,inAD3 b,AD1 c){d[0]=ALerpD1(a[0],b[0],c);d[1]=ALerpD1(a[1],b[1],c);d[2]=ALerpD1(a[2],b[2],c);return d;} - A_STATIC retAD4 opALerpOneD4(outAD4 d,inAD4 a,inAD4 b,AD1 c){d[0]=ALerpD1(a[0],b[0],c);d[1]=ALerpD1(a[1],b[1],c);d[2]=ALerpD1(a[2],b[2],c);d[3]=ALerpD1(a[3],b[3],c);return d;} -//------------------------------------------------------------------------------------------------------------------------------ - A_STATIC retAF2 opALerpOneF2(outAF2 d,inAF2 a,inAF2 b,AF1 c){d[0]=ALerpF1(a[0],b[0],c);d[1]=ALerpF1(a[1],b[1],c);return d;} - A_STATIC retAF3 opALerpOneF3(outAF3 d,inAF3 a,inAF3 b,AF1 c){d[0]=ALerpF1(a[0],b[0],c);d[1]=ALerpF1(a[1],b[1],c);d[2]=ALerpF1(a[2],b[2],c);return d;} - A_STATIC retAF4 opALerpOneF4(outAF4 d,inAF4 a,inAF4 b,AF1 c){d[0]=ALerpF1(a[0],b[0],c);d[1]=ALerpF1(a[1],b[1],c);d[2]=ALerpF1(a[2],b[2],c);d[3]=ALerpF1(a[3],b[3],c);return d;} -//============================================================================================================================== - A_STATIC retAD2 opAMaxD2(outAD2 d,inAD2 a,inAD2 b){d[0]=AMaxD1(a[0],b[0]);d[1]=AMaxD1(a[1],b[1]);return d;} - A_STATIC retAD3 opAMaxD3(outAD3 d,inAD3 a,inAD3 b){d[0]=AMaxD1(a[0],b[0]);d[1]=AMaxD1(a[1],b[1]);d[2]=AMaxD1(a[2],b[2]);return d;} - A_STATIC retAD4 opAMaxD4(outAD4 d,inAD4 a,inAD4 b){d[0]=AMaxD1(a[0],b[0]);d[1]=AMaxD1(a[1],b[1]);d[2]=AMaxD1(a[2],b[2]);d[3]=AMaxD1(a[3],b[3]);return d;} -//------------------------------------------------------------------------------------------------------------------------------ - A_STATIC retAF2 opAMaxF2(outAF2 d,inAF2 a,inAF2 b){d[0]=AMaxF1(a[0],b[0]);d[1]=AMaxF1(a[1],b[1]);return d;} - A_STATIC retAF3 opAMaxF3(outAF3 d,inAF3 a,inAF3 b){d[0]=AMaxF1(a[0],b[0]);d[1]=AMaxF1(a[1],b[1]);d[2]=AMaxF1(a[2],b[2]);return d;} - A_STATIC retAF4 opAMaxF4(outAF4 d,inAF4 a,inAF4 b){d[0]=AMaxF1(a[0],b[0]);d[1]=AMaxF1(a[1],b[1]);d[2]=AMaxF1(a[2],b[2]);d[3]=AMaxF1(a[3],b[3]);return d;} -//============================================================================================================================== - A_STATIC retAD2 opAMinD2(outAD2 d,inAD2 a,inAD2 b){d[0]=AMinD1(a[0],b[0]);d[1]=AMinD1(a[1],b[1]);return d;} - A_STATIC retAD3 opAMinD3(outAD3 d,inAD3 a,inAD3 b){d[0]=AMinD1(a[0],b[0]);d[1]=AMinD1(a[1],b[1]);d[2]=AMinD1(a[2],b[2]);return d;} - A_STATIC retAD4 opAMinD4(outAD4 d,inAD4 a,inAD4 b){d[0]=AMinD1(a[0],b[0]);d[1]=AMinD1(a[1],b[1]);d[2]=AMinD1(a[2],b[2]);d[3]=AMinD1(a[3],b[3]);return d;} -//------------------------------------------------------------------------------------------------------------------------------ - A_STATIC retAF2 opAMinF2(outAF2 d,inAF2 a,inAF2 b){d[0]=AMinF1(a[0],b[0]);d[1]=AMinF1(a[1],b[1]);return d;} - A_STATIC retAF3 opAMinF3(outAF3 d,inAF3 a,inAF3 b){d[0]=AMinF1(a[0],b[0]);d[1]=AMinF1(a[1],b[1]);d[2]=AMinF1(a[2],b[2]);return d;} - A_STATIC retAF4 opAMinF4(outAF4 d,inAF4 a,inAF4 b){d[0]=AMinF1(a[0],b[0]);d[1]=AMinF1(a[1],b[1]);d[2]=AMinF1(a[2],b[2]);d[3]=AMinF1(a[3],b[3]);return d;} -//============================================================================================================================== - A_STATIC retAD2 opAMulD2(outAD2 d,inAD2 a,inAD2 b){d[0]=a[0]*b[0];d[1]=a[1]*b[1];return d;} - A_STATIC retAD3 opAMulD3(outAD3 d,inAD3 a,inAD3 b){d[0]=a[0]*b[0];d[1]=a[1]*b[1];d[2]=a[2]*b[2];return d;} - A_STATIC retAD4 opAMulD4(outAD4 d,inAD4 a,inAD4 b){d[0]=a[0]*b[0];d[1]=a[1]*b[1];d[2]=a[2]*b[2];d[3]=a[3]*b[3];return d;} -//------------------------------------------------------------------------------------------------------------------------------ - A_STATIC retAF2 opAMulF2(outAF2 d,inAF2 a,inAF2 b){d[0]=a[0]*b[0];d[1]=a[1]*b[1];return d;} - A_STATIC retAF3 opAMulF3(outAF3 d,inAF3 a,inAF3 b){d[0]=a[0]*b[0];d[1]=a[1]*b[1];d[2]=a[2]*b[2];return d;} - A_STATIC retAF4 opAMulF4(outAF4 d,inAF4 a,inAF4 b){d[0]=a[0]*b[0];d[1]=a[1]*b[1];d[2]=a[2]*b[2];d[3]=a[3]*b[3];return d;} -//============================================================================================================================== - A_STATIC retAD2 opAMulOneD2(outAD2 d,inAD2 a,AD1 b){d[0]=a[0]*b;d[1]=a[1]*b;return d;} - A_STATIC retAD3 opAMulOneD3(outAD3 d,inAD3 a,AD1 b){d[0]=a[0]*b;d[1]=a[1]*b;d[2]=a[2]*b;return d;} - A_STATIC retAD4 opAMulOneD4(outAD4 d,inAD4 a,AD1 b){d[0]=a[0]*b;d[1]=a[1]*b;d[2]=a[2]*b;d[3]=a[3]*b;return d;} -//------------------------------------------------------------------------------------------------------------------------------ - A_STATIC retAF2 opAMulOneF2(outAF2 d,inAF2 a,AF1 b){d[0]=a[0]*b;d[1]=a[1]*b;return d;} - A_STATIC retAF3 opAMulOneF3(outAF3 d,inAF3 a,AF1 b){d[0]=a[0]*b;d[1]=a[1]*b;d[2]=a[2]*b;return d;} - A_STATIC retAF4 opAMulOneF4(outAF4 d,inAF4 a,AF1 b){d[0]=a[0]*b;d[1]=a[1]*b;d[2]=a[2]*b;d[3]=a[3]*b;return d;} -//============================================================================================================================== - A_STATIC retAD2 opANegD2(outAD2 d,inAD2 a){d[0]=-a[0];d[1]=-a[1];return d;} - A_STATIC retAD3 opANegD3(outAD3 d,inAD3 a){d[0]=-a[0];d[1]=-a[1];d[2]=-a[2];return d;} - A_STATIC retAD4 opANegD4(outAD4 d,inAD4 a){d[0]=-a[0];d[1]=-a[1];d[2]=-a[2];d[3]=-a[3];return d;} -//------------------------------------------------------------------------------------------------------------------------------ - A_STATIC retAF2 opANegF2(outAF2 d,inAF2 a){d[0]=-a[0];d[1]=-a[1];return d;} - A_STATIC retAF3 opANegF3(outAF3 d,inAF3 a){d[0]=-a[0];d[1]=-a[1];d[2]=-a[2];return d;} - A_STATIC retAF4 opANegF4(outAF4 d,inAF4 a){d[0]=-a[0];d[1]=-a[1];d[2]=-a[2];d[3]=-a[3];return d;} -//============================================================================================================================== - A_STATIC retAD2 opARcpD2(outAD2 d,inAD2 a){d[0]=ARcpD1(a[0]);d[1]=ARcpD1(a[1]);return d;} - A_STATIC retAD3 opARcpD3(outAD3 d,inAD3 a){d[0]=ARcpD1(a[0]);d[1]=ARcpD1(a[1]);d[2]=ARcpD1(a[2]);return d;} - A_STATIC retAD4 opARcpD4(outAD4 d,inAD4 a){d[0]=ARcpD1(a[0]);d[1]=ARcpD1(a[1]);d[2]=ARcpD1(a[2]);d[3]=ARcpD1(a[3]);return d;} -//------------------------------------------------------------------------------------------------------------------------------ - A_STATIC retAF2 opARcpF2(outAF2 d,inAF2 a){d[0]=ARcpF1(a[0]);d[1]=ARcpF1(a[1]);return d;} - A_STATIC retAF3 opARcpF3(outAF3 d,inAF3 a){d[0]=ARcpF1(a[0]);d[1]=ARcpF1(a[1]);d[2]=ARcpF1(a[2]);return d;} - A_STATIC retAF4 opARcpF4(outAF4 d,inAF4 a){d[0]=ARcpF1(a[0]);d[1]=ARcpF1(a[1]);d[2]=ARcpF1(a[2]);d[3]=ARcpF1(a[3]);return d;} -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// HALF FLOAT PACKING -//============================================================================================================================== - // Convert float to half (in lower 16-bits of output). - // Same fast technique as documented here: ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf - // Supports denormals. - // Conversion rules are to make computations possibly "safer" on the GPU, - // -INF & -NaN -> -65504 - // +INF & +NaN -> +65504 - A_STATIC AU1 AU1_AH1_AF1(AF1 f){ - static AW1 base[512]={ - 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, - 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, - 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, - 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, - 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, - 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000, - 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0001,0x0002,0x0004,0x0008,0x0010,0x0020,0x0040,0x0080,0x0100, - 0x0200,0x0400,0x0800,0x0c00,0x1000,0x1400,0x1800,0x1c00,0x2000,0x2400,0x2800,0x2c00,0x3000,0x3400,0x3800,0x3c00, - 0x4000,0x4400,0x4800,0x4c00,0x5000,0x5400,0x5800,0x5c00,0x6000,0x6400,0x6800,0x6c00,0x7000,0x7400,0x7800,0x7bff, - 0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff, - 0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff, - 0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff, - 0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff, - 0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff, - 0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff, - 0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff,0x7bff, - 0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000, - 0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000, - 0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000, - 0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000, - 0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000, - 0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000, - 0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8000,0x8001,0x8002,0x8004,0x8008,0x8010,0x8020,0x8040,0x8080,0x8100, - 0x8200,0x8400,0x8800,0x8c00,0x9000,0x9400,0x9800,0x9c00,0xa000,0xa400,0xa800,0xac00,0xb000,0xb400,0xb800,0xbc00, - 0xc000,0xc400,0xc800,0xcc00,0xd000,0xd400,0xd800,0xdc00,0xe000,0xe400,0xe800,0xec00,0xf000,0xf400,0xf800,0xfbff, - 0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff, - 0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff, - 0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff, - 0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff, - 0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff, - 0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff, - 0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff,0xfbff}; - static AB1 shift[512]={ - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x17,0x16,0x15,0x14,0x13,0x12,0x11,0x10,0x0f, - 0x0e,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d, - 0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x17,0x16,0x15,0x14,0x13,0x12,0x11,0x10,0x0f, - 0x0e,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d, - 0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x0d,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, - 0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18}; - union{AF1 f;AU1 u;}bits;bits.f=f;AU1 u=bits.u;AU1 i=u>>23;return (AU1)(base[i])+((u&0x7fffff)>>shift[i]);} -//------------------------------------------------------------------------------------------------------------------------------ - // Used to output packed constant. - A_STATIC AU1 AU1_AH2_AF2(inAF2 a){return AU1_AH1_AF1(a[0])+(AU1_AH1_AF1(a[1])<<16);} -#endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// -// -// GLSL -// -// -//============================================================================================================================== -#if defined(A_GLSL) && defined(A_GPU) - #ifndef A_SKIP_EXT - #ifdef A_HALF - #extension GL_EXT_shader_16bit_storage:require - #extension GL_EXT_shader_explicit_arithmetic_types:require - #endif -//------------------------------------------------------------------------------------------------------------------------------ - #ifdef A_LONG - #extension GL_ARB_gpu_shader_int64:require - #extension GL_NV_shader_atomic_int64:require - #endif -//------------------------------------------------------------------------------------------------------------------------------ - #ifdef A_WAVE - #extension GL_KHR_shader_subgroup_arithmetic:require - #extension GL_KHR_shader_subgroup_ballot:require - #extension GL_KHR_shader_subgroup_quad:require - #extension GL_KHR_shader_subgroup_shuffle:require - #endif - #endif -//============================================================================================================================== - #define AP1 bool - #define AP2 bvec2 - #define AP3 bvec3 - #define AP4 bvec4 -//------------------------------------------------------------------------------------------------------------------------------ - #define AF1 float - #define AF2 vec2 - #define AF3 vec3 - #define AF4 vec4 -//------------------------------------------------------------------------------------------------------------------------------ - #define AU1 uint - #define AU2 uvec2 - #define AU3 uvec3 - #define AU4 uvec4 -//------------------------------------------------------------------------------------------------------------------------------ - #define ASU1 int - #define ASU2 ivec2 - #define ASU3 ivec3 - #define ASU4 ivec4 -//============================================================================================================================== - #define AF1_AU1(x) uintBitsToFloat(AU1(x)) - #define AF2_AU2(x) uintBitsToFloat(AU2(x)) - #define AF3_AU3(x) uintBitsToFloat(AU3(x)) - #define AF4_AU4(x) uintBitsToFloat(AU4(x)) -//------------------------------------------------------------------------------------------------------------------------------ - #define AU1_AF1(x) floatBitsToUint(AF1(x)) - #define AU2_AF2(x) floatBitsToUint(AF2(x)) - #define AU3_AF3(x) floatBitsToUint(AF3(x)) - #define AU4_AF4(x) floatBitsToUint(AF4(x)) -//------------------------------------------------------------------------------------------------------------------------------ - AU1 AU1_AH1_AF1_x(AF1 a){return packHalf2x16(AF2(a,0.0));} - #define AU1_AH1_AF1(a) AU1_AH1_AF1_x(AF1(a)) -//------------------------------------------------------------------------------------------------------------------------------ - #define AU1_AH2_AF2 packHalf2x16 - #define AU1_AW2Unorm_AF2 packUnorm2x16 - #define AU1_AB4Unorm_AF4 packUnorm4x8 -//------------------------------------------------------------------------------------------------------------------------------ - #define AF2_AH2_AU1 unpackHalf2x16 - #define AF2_AW2Unorm_AU1 unpackUnorm2x16 - #define AF4_AB4Unorm_AU1 unpackUnorm4x8 -//============================================================================================================================== - AF1 AF1_x(AF1 a){return AF1(a);} - AF2 AF2_x(AF1 a){return AF2(a,a);} - AF3 AF3_x(AF1 a){return AF3(a,a,a);} - AF4 AF4_x(AF1 a){return AF4(a,a,a,a);} - #define AF1_(a) AF1_x(AF1(a)) - #define AF2_(a) AF2_x(AF1(a)) - #define AF3_(a) AF3_x(AF1(a)) - #define AF4_(a) AF4_x(AF1(a)) -//------------------------------------------------------------------------------------------------------------------------------ - AU1 AU1_x(AU1 a){return AU1(a);} - AU2 AU2_x(AU1 a){return AU2(a,a);} - AU3 AU3_x(AU1 a){return AU3(a,a,a);} - AU4 AU4_x(AU1 a){return AU4(a,a,a,a);} - #define AU1_(a) AU1_x(AU1(a)) - #define AU2_(a) AU2_x(AU1(a)) - #define AU3_(a) AU3_x(AU1(a)) - #define AU4_(a) AU4_x(AU1(a)) -//============================================================================================================================== - AU1 AAbsSU1(AU1 a){return AU1(abs(ASU1(a)));} - AU2 AAbsSU2(AU2 a){return AU2(abs(ASU2(a)));} - AU3 AAbsSU3(AU3 a){return AU3(abs(ASU3(a)));} - AU4 AAbsSU4(AU4 a){return AU4(abs(ASU4(a)));} -//------------------------------------------------------------------------------------------------------------------------------ - AU1 ABfe(AU1 src,AU1 off,AU1 bits){return bitfieldExtract(src,ASU1(off),ASU1(bits));} - AU1 ABfi(AU1 src,AU1 ins,AU1 mask){return (ins&mask)|(src&(~mask));} - // Proxy for V_BFI_B32 where the 'mask' is set as 'bits', 'mask=(1<>ASU1(b));} - AU2 AShrSU2(AU2 a,AU2 b){return AU2(ASU2(a)>>ASU2(b));} - AU3 AShrSU3(AU3 a,AU3 b){return AU3(ASU3(a)>>ASU3(b));} - AU4 AShrSU4(AU4 a,AU4 b){return AU4(ASU4(a)>>ASU4(b));} -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// GLSL BYTE -//============================================================================================================================== - #ifdef A_BYTE - #define AB1 uint8_t - #define AB2 u8vec2 - #define AB3 u8vec3 - #define AB4 u8vec4 -//------------------------------------------------------------------------------------------------------------------------------ - #define ASB1 int8_t - #define ASB2 i8vec2 - #define ASB3 i8vec3 - #define ASB4 i8vec4 -//------------------------------------------------------------------------------------------------------------------------------ - AB1 AB1_x(AB1 a){return AB1(a);} - AB2 AB2_x(AB1 a){return AB2(a,a);} - AB3 AB3_x(AB1 a){return AB3(a,a,a);} - AB4 AB4_x(AB1 a){return AB4(a,a,a,a);} - #define AB1_(a) AB1_x(AB1(a)) - #define AB2_(a) AB2_x(AB1(a)) - #define AB3_(a) AB3_x(AB1(a)) - #define AB4_(a) AB4_x(AB1(a)) - #endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// GLSL HALF -//============================================================================================================================== - #ifdef A_HALF - #define AH1 float16_t - #define AH2 f16vec2 - #define AH3 f16vec3 - #define AH4 f16vec4 -//------------------------------------------------------------------------------------------------------------------------------ - #define AW1 uint16_t - #define AW2 u16vec2 - #define AW3 u16vec3 - #define AW4 u16vec4 -//------------------------------------------------------------------------------------------------------------------------------ - #define ASW1 int16_t - #define ASW2 i16vec2 - #define ASW3 i16vec3 - #define ASW4 i16vec4 -//============================================================================================================================== - #define AH2_AU1(x) unpackFloat2x16(AU1(x)) - AH4 AH4_AU2_x(AU2 x){return AH4(unpackFloat2x16(x.x),unpackFloat2x16(x.y));} - #define AH4_AU2(x) AH4_AU2_x(AU2(x)) - #define AW2_AU1(x) unpackUint2x16(AU1(x)) - #define AW4_AU2(x) unpackUint4x16(pack64(AU2(x))) -//------------------------------------------------------------------------------------------------------------------------------ - #define AU1_AH2(x) packFloat2x16(AH2(x)) - AU2 AU2_AH4_x(AH4 x){return AU2(packFloat2x16(x.xy),packFloat2x16(x.zw));} - #define AU2_AH4(x) AU2_AH4_x(AH4(x)) - #define AU1_AW2(x) packUint2x16(AW2(x)) - #define AU2_AW4(x) unpack32(packUint4x16(AW4(x))) -//============================================================================================================================== - #define AW1_AH1(x) halfBitsToUint16(AH1(x)) - #define AW2_AH2(x) halfBitsToUint16(AH2(x)) - #define AW3_AH3(x) halfBitsToUint16(AH3(x)) - #define AW4_AH4(x) halfBitsToUint16(AH4(x)) -//------------------------------------------------------------------------------------------------------------------------------ - #define AH1_AW1(x) uint16BitsToHalf(AW1(x)) - #define AH2_AW2(x) uint16BitsToHalf(AW2(x)) - #define AH3_AW3(x) uint16BitsToHalf(AW3(x)) - #define AH4_AW4(x) uint16BitsToHalf(AW4(x)) -//============================================================================================================================== - AH1 AH1_x(AH1 a){return AH1(a);} - AH2 AH2_x(AH1 a){return AH2(a,a);} - AH3 AH3_x(AH1 a){return AH3(a,a,a);} - AH4 AH4_x(AH1 a){return AH4(a,a,a,a);} - #define AH1_(a) AH1_x(AH1(a)) - #define AH2_(a) AH2_x(AH1(a)) - #define AH3_(a) AH3_x(AH1(a)) - #define AH4_(a) AH4_x(AH1(a)) -//------------------------------------------------------------------------------------------------------------------------------ - AW1 AW1_x(AW1 a){return AW1(a);} - AW2 AW2_x(AW1 a){return AW2(a,a);} - AW3 AW3_x(AW1 a){return AW3(a,a,a);} - AW4 AW4_x(AW1 a){return AW4(a,a,a,a);} - #define AW1_(a) AW1_x(AW1(a)) - #define AW2_(a) AW2_x(AW1(a)) - #define AW3_(a) AW3_x(AW1(a)) - #define AW4_(a) AW4_x(AW1(a)) -//============================================================================================================================== - AW1 AAbsSW1(AW1 a){return AW1(abs(ASW1(a)));} - AW2 AAbsSW2(AW2 a){return AW2(abs(ASW2(a)));} - AW3 AAbsSW3(AW3 a){return AW3(abs(ASW3(a)));} - AW4 AAbsSW4(AW4 a){return AW4(abs(ASW4(a)));} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 AClampH1(AH1 x,AH1 n,AH1 m){return clamp(x,n,m);} - AH2 AClampH2(AH2 x,AH2 n,AH2 m){return clamp(x,n,m);} - AH3 AClampH3(AH3 x,AH3 n,AH3 m){return clamp(x,n,m);} - AH4 AClampH4(AH4 x,AH4 n,AH4 m){return clamp(x,n,m);} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 AFractH1(AH1 x){return fract(x);} - AH2 AFractH2(AH2 x){return fract(x);} - AH3 AFractH3(AH3 x){return fract(x);} - AH4 AFractH4(AH4 x){return fract(x);} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 ALerpH1(AH1 x,AH1 y,AH1 a){return mix(x,y,a);} - AH2 ALerpH2(AH2 x,AH2 y,AH2 a){return mix(x,y,a);} - AH3 ALerpH3(AH3 x,AH3 y,AH3 a){return mix(x,y,a);} - AH4 ALerpH4(AH4 x,AH4 y,AH4 a){return mix(x,y,a);} -//------------------------------------------------------------------------------------------------------------------------------ - // No packed version of max3. - AH1 AMax3H1(AH1 x,AH1 y,AH1 z){return max(x,max(y,z));} - AH2 AMax3H2(AH2 x,AH2 y,AH2 z){return max(x,max(y,z));} - AH3 AMax3H3(AH3 x,AH3 y,AH3 z){return max(x,max(y,z));} - AH4 AMax3H4(AH4 x,AH4 y,AH4 z){return max(x,max(y,z));} -//------------------------------------------------------------------------------------------------------------------------------ - AW1 AMaxSW1(AW1 a,AW1 b){return AW1(max(ASU1(a),ASU1(b)));} - AW2 AMaxSW2(AW2 a,AW2 b){return AW2(max(ASU2(a),ASU2(b)));} - AW3 AMaxSW3(AW3 a,AW3 b){return AW3(max(ASU3(a),ASU3(b)));} - AW4 AMaxSW4(AW4 a,AW4 b){return AW4(max(ASU4(a),ASU4(b)));} -//------------------------------------------------------------------------------------------------------------------------------ - // No packed version of min3. - AH1 AMin3H1(AH1 x,AH1 y,AH1 z){return min(x,min(y,z));} - AH2 AMin3H2(AH2 x,AH2 y,AH2 z){return min(x,min(y,z));} - AH3 AMin3H3(AH3 x,AH3 y,AH3 z){return min(x,min(y,z));} - AH4 AMin3H4(AH4 x,AH4 y,AH4 z){return min(x,min(y,z));} -//------------------------------------------------------------------------------------------------------------------------------ - AW1 AMinSW1(AW1 a,AW1 b){return AW1(min(ASU1(a),ASU1(b)));} - AW2 AMinSW2(AW2 a,AW2 b){return AW2(min(ASU2(a),ASU2(b)));} - AW3 AMinSW3(AW3 a,AW3 b){return AW3(min(ASU3(a),ASU3(b)));} - AW4 AMinSW4(AW4 a,AW4 b){return AW4(min(ASU4(a),ASU4(b)));} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 ARcpH1(AH1 x){return AH1_(1.0)/x;} - AH2 ARcpH2(AH2 x){return AH2_(1.0)/x;} - AH3 ARcpH3(AH3 x){return AH3_(1.0)/x;} - AH4 ARcpH4(AH4 x){return AH4_(1.0)/x;} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 ARsqH1(AH1 x){return AH1_(1.0)/sqrt(x);} - AH2 ARsqH2(AH2 x){return AH2_(1.0)/sqrt(x);} - AH3 ARsqH3(AH3 x){return AH3_(1.0)/sqrt(x);} - AH4 ARsqH4(AH4 x){return AH4_(1.0)/sqrt(x);} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 ASatH1(AH1 x){return clamp(x,AH1_(0.0),AH1_(1.0));} - AH2 ASatH2(AH2 x){return clamp(x,AH2_(0.0),AH2_(1.0));} - AH3 ASatH3(AH3 x){return clamp(x,AH3_(0.0),AH3_(1.0));} - AH4 ASatH4(AH4 x){return clamp(x,AH4_(0.0),AH4_(1.0));} -//------------------------------------------------------------------------------------------------------------------------------ - AW1 AShrSW1(AW1 a,AW1 b){return AW1(ASW1(a)>>ASW1(b));} - AW2 AShrSW2(AW2 a,AW2 b){return AW2(ASW2(a)>>ASW2(b));} - AW3 AShrSW3(AW3 a,AW3 b){return AW3(ASW3(a)>>ASW3(b));} - AW4 AShrSW4(AW4 a,AW4 b){return AW4(ASW4(a)>>ASW4(b));} - #endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// GLSL DOUBLE -//============================================================================================================================== - #ifdef A_DUBL - #define AD1 double - #define AD2 dvec2 - #define AD3 dvec3 - #define AD4 dvec4 -//------------------------------------------------------------------------------------------------------------------------------ - AD1 AD1_x(AD1 a){return AD1(a);} - AD2 AD2_x(AD1 a){return AD2(a,a);} - AD3 AD3_x(AD1 a){return AD3(a,a,a);} - AD4 AD4_x(AD1 a){return AD4(a,a,a,a);} - #define AD1_(a) AD1_x(AD1(a)) - #define AD2_(a) AD2_x(AD1(a)) - #define AD3_(a) AD3_x(AD1(a)) - #define AD4_(a) AD4_x(AD1(a)) -//============================================================================================================================== - AD1 AFractD1(AD1 x){return fract(x);} - AD2 AFractD2(AD2 x){return fract(x);} - AD3 AFractD3(AD3 x){return fract(x);} - AD4 AFractD4(AD4 x){return fract(x);} -//------------------------------------------------------------------------------------------------------------------------------ - AD1 ALerpD1(AD1 x,AD1 y,AD1 a){return mix(x,y,a);} - AD2 ALerpD2(AD2 x,AD2 y,AD2 a){return mix(x,y,a);} - AD3 ALerpD3(AD3 x,AD3 y,AD3 a){return mix(x,y,a);} - AD4 ALerpD4(AD4 x,AD4 y,AD4 a){return mix(x,y,a);} -//------------------------------------------------------------------------------------------------------------------------------ - AD1 ARcpD1(AD1 x){return AD1_(1.0)/x;} - AD2 ARcpD2(AD2 x){return AD2_(1.0)/x;} - AD3 ARcpD3(AD3 x){return AD3_(1.0)/x;} - AD4 ARcpD4(AD4 x){return AD4_(1.0)/x;} -//------------------------------------------------------------------------------------------------------------------------------ - AD1 ARsqD1(AD1 x){return AD1_(1.0)/sqrt(x);} - AD2 ARsqD2(AD2 x){return AD2_(1.0)/sqrt(x);} - AD3 ARsqD3(AD3 x){return AD3_(1.0)/sqrt(x);} - AD4 ARsqD4(AD4 x){return AD4_(1.0)/sqrt(x);} -//------------------------------------------------------------------------------------------------------------------------------ - AD1 ASatD1(AD1 x){return clamp(x,AD1_(0.0),AD1_(1.0));} - AD2 ASatD2(AD2 x){return clamp(x,AD2_(0.0),AD2_(1.0));} - AD3 ASatD3(AD3 x){return clamp(x,AD3_(0.0),AD3_(1.0));} - AD4 ASatD4(AD4 x){return clamp(x,AD4_(0.0),AD4_(1.0));} - #endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// GLSL LONG -//============================================================================================================================== - #ifdef A_LONG - #define AL1 uint64_t - #define AL2 u64vec2 - #define AL3 u64vec3 - #define AL4 u64vec4 -//------------------------------------------------------------------------------------------------------------------------------ - #define ASL1 int64_t - #define ASL2 i64vec2 - #define ASL3 i64vec3 - #define ASL4 i64vec4 -//------------------------------------------------------------------------------------------------------------------------------ - #define AL1_AU2(x) packUint2x32(AU2(x)) - #define AU2_AL1(x) unpackUint2x32(AL1(x)) -//------------------------------------------------------------------------------------------------------------------------------ - AL1 AL1_x(AL1 a){return AL1(a);} - AL2 AL2_x(AL1 a){return AL2(a,a);} - AL3 AL3_x(AL1 a){return AL3(a,a,a);} - AL4 AL4_x(AL1 a){return AL4(a,a,a,a);} - #define AL1_(a) AL1_x(AL1(a)) - #define AL2_(a) AL2_x(AL1(a)) - #define AL3_(a) AL3_x(AL1(a)) - #define AL4_(a) AL4_x(AL1(a)) -//============================================================================================================================== - AL1 AAbsSL1(AL1 a){return AL1(abs(ASL1(a)));} - AL2 AAbsSL2(AL2 a){return AL2(abs(ASL2(a)));} - AL3 AAbsSL3(AL3 a){return AL3(abs(ASL3(a)));} - AL4 AAbsSL4(AL4 a){return AL4(abs(ASL4(a)));} -//------------------------------------------------------------------------------------------------------------------------------ - AL1 AMaxSL1(AL1 a,AL1 b){return AL1(max(ASU1(a),ASU1(b)));} - AL2 AMaxSL2(AL2 a,AL2 b){return AL2(max(ASU2(a),ASU2(b)));} - AL3 AMaxSL3(AL3 a,AL3 b){return AL3(max(ASU3(a),ASU3(b)));} - AL4 AMaxSL4(AL4 a,AL4 b){return AL4(max(ASU4(a),ASU4(b)));} -//------------------------------------------------------------------------------------------------------------------------------ - AL1 AMinSL1(AL1 a,AL1 b){return AL1(min(ASU1(a),ASU1(b)));} - AL2 AMinSL2(AL2 a,AL2 b){return AL2(min(ASU2(a),ASU2(b)));} - AL3 AMinSL3(AL3 a,AL3 b){return AL3(min(ASU3(a),ASU3(b)));} - AL4 AMinSL4(AL4 a,AL4 b){return AL4(min(ASU4(a),ASU4(b)));} - #endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// WAVE OPERATIONS -//============================================================================================================================== - #ifdef A_WAVE - // Where 'x' must be a compile time literal. - AF1 AWaveXorF1(AF1 v,AU1 x){return subgroupShuffleXor(v,x);} - AF2 AWaveXorF2(AF2 v,AU1 x){return subgroupShuffleXor(v,x);} - AF3 AWaveXorF3(AF3 v,AU1 x){return subgroupShuffleXor(v,x);} - AF4 AWaveXorF4(AF4 v,AU1 x){return subgroupShuffleXor(v,x);} - AU1 AWaveXorU1(AU1 v,AU1 x){return subgroupShuffleXor(v,x);} - AU2 AWaveXorU2(AU2 v,AU1 x){return subgroupShuffleXor(v,x);} - AU3 AWaveXorU3(AU3 v,AU1 x){return subgroupShuffleXor(v,x);} - AU4 AWaveXorU4(AU4 v,AU1 x){return subgroupShuffleXor(v,x);} -//------------------------------------------------------------------------------------------------------------------------------ - #ifdef A_HALF - AH2 AWaveXorH2(AH2 v,AU1 x){return AH2_AU1(subgroupShuffleXor(AU1_AH2(v),x));} - AH4 AWaveXorH4(AH4 v,AU1 x){return AH4_AU2(subgroupShuffleXor(AU2_AH4(v),x));} - AW2 AWaveXorW2(AW2 v,AU1 x){return AW2_AU1(subgroupShuffleXor(AU1_AW2(v),x));} - AW4 AWaveXorW4(AW4 v,AU1 x){return AW4_AU2(subgroupShuffleXor(AU2_AW4(v),x));} - #endif - #endif -//============================================================================================================================== -#endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// -// -// HLSL -// -// -//============================================================================================================================== -#if defined(A_HLSL) && defined(A_GPU) - #ifdef A_HLSL_6_2 - #define AP1 bool - #define AP2 bool2 - #define AP3 bool3 - #define AP4 bool4 -//------------------------------------------------------------------------------------------------------------------------------ - #define AF1 float32_t - #define AF2 float32_t2 - #define AF3 float32_t3 - #define AF4 float32_t4 -//------------------------------------------------------------------------------------------------------------------------------ - #define AU1 uint32_t - #define AU2 uint32_t2 - #define AU3 uint32_t3 - #define AU4 uint32_t4 -//------------------------------------------------------------------------------------------------------------------------------ - #define ASU1 int32_t - #define ASU2 int32_t2 - #define ASU3 int32_t3 - #define ASU4 int32_t4 - #else - #define AP1 bool - #define AP2 bool2 - #define AP3 bool3 - #define AP4 bool4 -//------------------------------------------------------------------------------------------------------------------------------ - #define AF1 float - #define AF2 float2 - #define AF3 float3 - #define AF4 float4 -//------------------------------------------------------------------------------------------------------------------------------ - #define AU1 uint - #define AU2 uint2 - #define AU3 uint3 - #define AU4 uint4 -//------------------------------------------------------------------------------------------------------------------------------ - #define ASU1 int - #define ASU2 int2 - #define ASU3 int3 - #define ASU4 int4 - #endif -//============================================================================================================================== - #define AF1_AU1(x) asfloat(AU1(x)) - #define AF2_AU2(x) asfloat(AU2(x)) - #define AF3_AU3(x) asfloat(AU3(x)) - #define AF4_AU4(x) asfloat(AU4(x)) -//------------------------------------------------------------------------------------------------------------------------------ - #define AU1_AF1(x) asuint(AF1(x)) - #define AU2_AF2(x) asuint(AF2(x)) - #define AU3_AF3(x) asuint(AF3(x)) - #define AU4_AF4(x) asuint(AF4(x)) -//------------------------------------------------------------------------------------------------------------------------------ - AU1 AU1_AH1_AF1_x(AF1 a){return f32tof16(a);} - #define AU1_AH1_AF1(a) AU1_AH1_AF1_x(AF1(a)) -//------------------------------------------------------------------------------------------------------------------------------ - AU1 AU1_AH2_AF2_x(AF2 a){return f32tof16(a.x)|(f32tof16(a.y)<<16);} - #define AU1_AH2_AF2(a) AU1_AH2_AF2_x(AF2(a)) - #define AU1_AB4Unorm_AF4(x) D3DCOLORtoUBYTE4(AF4(x)) -//------------------------------------------------------------------------------------------------------------------------------ - AF2 AF2_AH2_AU1_x(AU1 x){return AF2(f16tof32(x&0xFFFF),f16tof32(x>>16));} - #define AF2_AH2_AU1(x) AF2_AH2_AU1_x(AU1(x)) -//============================================================================================================================== - AF1 AF1_x(AF1 a){return AF1(a);} - AF2 AF2_x(AF1 a){return AF2(a,a);} - AF3 AF3_x(AF1 a){return AF3(a,a,a);} - AF4 AF4_x(AF1 a){return AF4(a,a,a,a);} - #define AF1_(a) AF1_x(AF1(a)) - #define AF2_(a) AF2_x(AF1(a)) - #define AF3_(a) AF3_x(AF1(a)) - #define AF4_(a) AF4_x(AF1(a)) -//------------------------------------------------------------------------------------------------------------------------------ - AU1 AU1_x(AU1 a){return AU1(a);} - AU2 AU2_x(AU1 a){return AU2(a,a);} - AU3 AU3_x(AU1 a){return AU3(a,a,a);} - AU4 AU4_x(AU1 a){return AU4(a,a,a,a);} - #define AU1_(a) AU1_x(AU1(a)) - #define AU2_(a) AU2_x(AU1(a)) - #define AU3_(a) AU3_x(AU1(a)) - #define AU4_(a) AU4_x(AU1(a)) -//============================================================================================================================== - AU1 AAbsSU1(AU1 a){return AU1(abs(ASU1(a)));} - AU2 AAbsSU2(AU2 a){return AU2(abs(ASU2(a)));} - AU3 AAbsSU3(AU3 a){return AU3(abs(ASU3(a)));} - AU4 AAbsSU4(AU4 a){return AU4(abs(ASU4(a)));} -//------------------------------------------------------------------------------------------------------------------------------ - AU1 ABfe(AU1 src,AU1 off,AU1 bits){AU1 mask=(1u<>off)&mask;} - AU1 ABfi(AU1 src,AU1 ins,AU1 mask){return (ins&mask)|(src&(~mask));} - AU1 ABfiM(AU1 src,AU1 ins,AU1 bits){AU1 mask=(1u<>ASU1(b));} - AU2 AShrSU2(AU2 a,AU2 b){return AU2(ASU2(a)>>ASU2(b));} - AU3 AShrSU3(AU3 a,AU3 b){return AU3(ASU3(a)>>ASU3(b));} - AU4 AShrSU4(AU4 a,AU4 b){return AU4(ASU4(a)>>ASU4(b));} -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// HLSL BYTE -//============================================================================================================================== - #ifdef A_BYTE - #endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// HLSL HALF -//============================================================================================================================== - #ifdef A_HALF - #ifdef A_HLSL_6_2 - #define AH1 float16_t - #define AH2 float16_t2 - #define AH3 float16_t3 - #define AH4 float16_t4 -//------------------------------------------------------------------------------------------------------------------------------ - #define AW1 uint16_t - #define AW2 uint16_t2 - #define AW3 uint16_t3 - #define AW4 uint16_t4 -//------------------------------------------------------------------------------------------------------------------------------ - #define ASW1 int16_t - #define ASW2 int16_t2 - #define ASW3 int16_t3 - #define ASW4 int16_t4 - #else - #define AH1 min16float - #define AH2 min16float2 - #define AH3 min16float3 - #define AH4 min16float4 -//------------------------------------------------------------------------------------------------------------------------------ - #define AW1 min16uint - #define AW2 min16uint2 - #define AW3 min16uint3 - #define AW4 min16uint4 -//------------------------------------------------------------------------------------------------------------------------------ - #define ASW1 min16int - #define ASW2 min16int2 - #define ASW3 min16int3 - #define ASW4 min16int4 - #endif -//============================================================================================================================== - // Need to use manual unpack to get optimal execution (don't use packed types in buffers directly). - // Unpack requires this pattern: https://gpuopen.com/first-steps-implementing-fp16/ - AH2 AH2_AU1_x(AU1 x){AF2 t=f16tof32(AU2(x&0xFFFF,x>>16));return AH2(t);} - AH4 AH4_AU2_x(AU2 x){return AH4(AH2_AU1_x(x.x),AH2_AU1_x(x.y));} - AW2 AW2_AU1_x(AU1 x){AU2 t=AU2(x&0xFFFF,x>>16);return AW2(t);} - AW4 AW4_AU2_x(AU2 x){return AW4(AW2_AU1_x(x.x),AW2_AU1_x(x.y));} - #define AH2_AU1(x) AH2_AU1_x(AU1(x)) - #define AH4_AU2(x) AH4_AU2_x(AU2(x)) - #define AW2_AU1(x) AW2_AU1_x(AU1(x)) - #define AW4_AU2(x) AW4_AU2_x(AU2(x)) -//------------------------------------------------------------------------------------------------------------------------------ - AU1 AU1_AH2_x(AH2 x){return f32tof16(x.x)+(f32tof16(x.y)<<16);} - AU2 AU2_AH4_x(AH4 x){return AU2(AU1_AH2_x(x.xy),AU1_AH2_x(x.zw));} - AU1 AU1_AW2_x(AW2 x){return AU1(x.x)+(AU1(x.y)<<16);} - AU2 AU2_AW4_x(AW4 x){return AU2(AU1_AW2_x(x.xy),AU1_AW2_x(x.zw));} - #define AU1_AH2(x) AU1_AH2_x(AH2(x)) - #define AU2_AH4(x) AU2_AH4_x(AH4(x)) - #define AU1_AW2(x) AU1_AW2_x(AW2(x)) - #define AU2_AW4(x) AU2_AW4_x(AW4(x)) -//============================================================================================================================== - #if defined(A_HLSL_6_2) && !defined(A_NO_16_BIT_CAST) - #define AW1_AH1(x) asuint16(x) - #define AW2_AH2(x) asuint16(x) - #define AW3_AH3(x) asuint16(x) - #define AW4_AH4(x) asuint16(x) - #else - #define AW1_AH1(a) AW1(f32tof16(AF1(a))) - #define AW2_AH2(a) AW2(AW1_AH1((a).x),AW1_AH1((a).y)) - #define AW3_AH3(a) AW3(AW1_AH1((a).x),AW1_AH1((a).y),AW1_AH1((a).z)) - #define AW4_AH4(a) AW4(AW1_AH1((a).x),AW1_AH1((a).y),AW1_AH1((a).z),AW1_AH1((a).w)) - #endif -//------------------------------------------------------------------------------------------------------------------------------ - #if defined(A_HLSL_6_2) && !defined(A_NO_16_BIT_CAST) - #define AH1_AW1(x) asfloat16(x) - #define AH2_AW2(x) asfloat16(x) - #define AH3_AW3(x) asfloat16(x) - #define AH4_AW4(x) asfloat16(x) - #else - #define AH1_AW1(a) AH1(f16tof32(AU1(a))) - #define AH2_AW2(a) AH2(AH1_AW1((a).x),AH1_AW1((a).y)) - #define AH3_AW3(a) AH3(AH1_AW1((a).x),AH1_AW1((a).y),AH1_AW1((a).z)) - #define AH4_AW4(a) AH4(AH1_AW1((a).x),AH1_AW1((a).y),AH1_AW1((a).z),AH1_AW1((a).w)) - #endif -//============================================================================================================================== - AH1 AH1_x(AH1 a){return AH1(a);} - AH2 AH2_x(AH1 a){return AH2(a,a);} - AH3 AH3_x(AH1 a){return AH3(a,a,a);} - AH4 AH4_x(AH1 a){return AH4(a,a,a,a);} - #define AH1_(a) AH1_x(AH1(a)) - #define AH2_(a) AH2_x(AH1(a)) - #define AH3_(a) AH3_x(AH1(a)) - #define AH4_(a) AH4_x(AH1(a)) -//------------------------------------------------------------------------------------------------------------------------------ - AW1 AW1_x(AW1 a){return AW1(a);} - AW2 AW2_x(AW1 a){return AW2(a,a);} - AW3 AW3_x(AW1 a){return AW3(a,a,a);} - AW4 AW4_x(AW1 a){return AW4(a,a,a,a);} - #define AW1_(a) AW1_x(AW1(a)) - #define AW2_(a) AW2_x(AW1(a)) - #define AW3_(a) AW3_x(AW1(a)) - #define AW4_(a) AW4_x(AW1(a)) -//============================================================================================================================== - AW1 AAbsSW1(AW1 a){return AW1(abs(ASW1(a)));} - AW2 AAbsSW2(AW2 a){return AW2(abs(ASW2(a)));} - AW3 AAbsSW3(AW3 a){return AW3(abs(ASW3(a)));} - AW4 AAbsSW4(AW4 a){return AW4(abs(ASW4(a)));} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 AClampH1(AH1 x,AH1 n,AH1 m){return max(n,min(x,m));} - AH2 AClampH2(AH2 x,AH2 n,AH2 m){return max(n,min(x,m));} - AH3 AClampH3(AH3 x,AH3 n,AH3 m){return max(n,min(x,m));} - AH4 AClampH4(AH4 x,AH4 n,AH4 m){return max(n,min(x,m));} -//------------------------------------------------------------------------------------------------------------------------------ - // V_FRACT_F16 (note DX frac() is different). - AH1 AFractH1(AH1 x){return x-floor(x);} - AH2 AFractH2(AH2 x){return x-floor(x);} - AH3 AFractH3(AH3 x){return x-floor(x);} - AH4 AFractH4(AH4 x){return x-floor(x);} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 ALerpH1(AH1 x,AH1 y,AH1 a){return lerp(x,y,a);} - AH2 ALerpH2(AH2 x,AH2 y,AH2 a){return lerp(x,y,a);} - AH3 ALerpH3(AH3 x,AH3 y,AH3 a){return lerp(x,y,a);} - AH4 ALerpH4(AH4 x,AH4 y,AH4 a){return lerp(x,y,a);} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 AMax3H1(AH1 x,AH1 y,AH1 z){return max(x,max(y,z));} - AH2 AMax3H2(AH2 x,AH2 y,AH2 z){return max(x,max(y,z));} - AH3 AMax3H3(AH3 x,AH3 y,AH3 z){return max(x,max(y,z));} - AH4 AMax3H4(AH4 x,AH4 y,AH4 z){return max(x,max(y,z));} -//------------------------------------------------------------------------------------------------------------------------------ - AW1 AMaxSW1(AW1 a,AW1 b){return AW1(max(ASU1(a),ASU1(b)));} - AW2 AMaxSW2(AW2 a,AW2 b){return AW2(max(ASU2(a),ASU2(b)));} - AW3 AMaxSW3(AW3 a,AW3 b){return AW3(max(ASU3(a),ASU3(b)));} - AW4 AMaxSW4(AW4 a,AW4 b){return AW4(max(ASU4(a),ASU4(b)));} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 AMin3H1(AH1 x,AH1 y,AH1 z){return min(x,min(y,z));} - AH2 AMin3H2(AH2 x,AH2 y,AH2 z){return min(x,min(y,z));} - AH3 AMin3H3(AH3 x,AH3 y,AH3 z){return min(x,min(y,z));} - AH4 AMin3H4(AH4 x,AH4 y,AH4 z){return min(x,min(y,z));} -//------------------------------------------------------------------------------------------------------------------------------ - AW1 AMinSW1(AW1 a,AW1 b){return AW1(min(ASU1(a),ASU1(b)));} - AW2 AMinSW2(AW2 a,AW2 b){return AW2(min(ASU2(a),ASU2(b)));} - AW3 AMinSW3(AW3 a,AW3 b){return AW3(min(ASU3(a),ASU3(b)));} - AW4 AMinSW4(AW4 a,AW4 b){return AW4(min(ASU4(a),ASU4(b)));} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 ARcpH1(AH1 x){return rcp(x);} - AH2 ARcpH2(AH2 x){return rcp(x);} - AH3 ARcpH3(AH3 x){return rcp(x);} - AH4 ARcpH4(AH4 x){return rcp(x);} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 ARsqH1(AH1 x){return rsqrt(x);} - AH2 ARsqH2(AH2 x){return rsqrt(x);} - AH3 ARsqH3(AH3 x){return rsqrt(x);} - AH4 ARsqH4(AH4 x){return rsqrt(x);} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 ASatH1(AH1 x){return saturate(x);} - AH2 ASatH2(AH2 x){return saturate(x);} - AH3 ASatH3(AH3 x){return saturate(x);} - AH4 ASatH4(AH4 x){return saturate(x);} -//------------------------------------------------------------------------------------------------------------------------------ - AW1 AShrSW1(AW1 a,AW1 b){return AW1(ASW1(a)>>ASW1(b));} - AW2 AShrSW2(AW2 a,AW2 b){return AW2(ASW2(a)>>ASW2(b));} - AW3 AShrSW3(AW3 a,AW3 b){return AW3(ASW3(a)>>ASW3(b));} - AW4 AShrSW4(AW4 a,AW4 b){return AW4(ASW4(a)>>ASW4(b));} - #endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// HLSL DOUBLE -//============================================================================================================================== - #ifdef A_DUBL - #ifdef A_HLSL_6_2 - #define AD1 float64_t - #define AD2 float64_t2 - #define AD3 float64_t3 - #define AD4 float64_t4 - #else - #define AD1 double - #define AD2 double2 - #define AD3 double3 - #define AD4 double4 - #endif -//------------------------------------------------------------------------------------------------------------------------------ - AD1 AD1_x(AD1 a){return AD1(a);} - AD2 AD2_x(AD1 a){return AD2(a,a);} - AD3 AD3_x(AD1 a){return AD3(a,a,a);} - AD4 AD4_x(AD1 a){return AD4(a,a,a,a);} - #define AD1_(a) AD1_x(AD1(a)) - #define AD2_(a) AD2_x(AD1(a)) - #define AD3_(a) AD3_x(AD1(a)) - #define AD4_(a) AD4_x(AD1(a)) -//============================================================================================================================== - AD1 AFractD1(AD1 a){return a-floor(a);} - AD2 AFractD2(AD2 a){return a-floor(a);} - AD3 AFractD3(AD3 a){return a-floor(a);} - AD4 AFractD4(AD4 a){return a-floor(a);} -//------------------------------------------------------------------------------------------------------------------------------ - AD1 ALerpD1(AD1 x,AD1 y,AD1 a){return lerp(x,y,a);} - AD2 ALerpD2(AD2 x,AD2 y,AD2 a){return lerp(x,y,a);} - AD3 ALerpD3(AD3 x,AD3 y,AD3 a){return lerp(x,y,a);} - AD4 ALerpD4(AD4 x,AD4 y,AD4 a){return lerp(x,y,a);} -//------------------------------------------------------------------------------------------------------------------------------ - AD1 ARcpD1(AD1 x){return rcp(x);} - AD2 ARcpD2(AD2 x){return rcp(x);} - AD3 ARcpD3(AD3 x){return rcp(x);} - AD4 ARcpD4(AD4 x){return rcp(x);} -//------------------------------------------------------------------------------------------------------------------------------ - AD1 ARsqD1(AD1 x){return rsqrt(x);} - AD2 ARsqD2(AD2 x){return rsqrt(x);} - AD3 ARsqD3(AD3 x){return rsqrt(x);} - AD4 ARsqD4(AD4 x){return rsqrt(x);} -//------------------------------------------------------------------------------------------------------------------------------ - AD1 ASatD1(AD1 x){return saturate(x);} - AD2 ASatD2(AD2 x){return saturate(x);} - AD3 ASatD3(AD3 x){return saturate(x);} - AD4 ASatD4(AD4 x){return saturate(x);} - #endif -//============================================================================================================================== -// HLSL WAVE -//============================================================================================================================== - #ifdef A_WAVE - // Where 'x' must be a compile time literal. - AF1 AWaveXorF1(AF1 v,AU1 x){return WaveReadLaneAt(v,WaveGetLaneIndex()^x);} - AF2 AWaveXorF2(AF2 v,AU1 x){return WaveReadLaneAt(v,WaveGetLaneIndex()^x);} - AF3 AWaveXorF3(AF3 v,AU1 x){return WaveReadLaneAt(v,WaveGetLaneIndex()^x);} - AF4 AWaveXorF4(AF4 v,AU1 x){return WaveReadLaneAt(v,WaveGetLaneIndex()^x);} - AU1 AWaveXorU1(AU1 v,AU1 x){return WaveReadLaneAt(v,WaveGetLaneIndex()^x);} - AU2 AWaveXorU1(AU2 v,AU1 x){return WaveReadLaneAt(v,WaveGetLaneIndex()^x);} - AU3 AWaveXorU1(AU3 v,AU1 x){return WaveReadLaneAt(v,WaveGetLaneIndex()^x);} - AU4 AWaveXorU1(AU4 v,AU1 x){return WaveReadLaneAt(v,WaveGetLaneIndex()^x);} -//------------------------------------------------------------------------------------------------------------------------------ - #ifdef A_HALF - AH2 AWaveXorH2(AH2 v,AU1 x){return AH2_AU1(WaveReadLaneAt(AU1_AH2(v),WaveGetLaneIndex()^x));} - AH4 AWaveXorH4(AH4 v,AU1 x){return AH4_AU2(WaveReadLaneAt(AU2_AH4(v),WaveGetLaneIndex()^x));} - AW2 AWaveXorW2(AW2 v,AU1 x){return AW2_AU1(WaveReadLaneAt(AU1_AW2(v),WaveGetLaneIndex()^x));} - AW4 AWaveXorW4(AW4 v,AU1 x){return AW4_AU1(WaveReadLaneAt(AU1_AW4(v),WaveGetLaneIndex()^x));} - #endif - #endif -//============================================================================================================================== -#endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// -// -// GPU COMMON -// -// -//============================================================================================================================== -#ifdef A_GPU - // Negative and positive infinity. - #define A_INFP_F AF1_AU1(0x7f800000u) - #define A_INFN_F AF1_AU1(0xff800000u) -//------------------------------------------------------------------------------------------------------------------------------ - // Copy sign from 's' to positive 'd'. - AF1 ACpySgnF1(AF1 d,AF1 s){return AF1_AU1(AU1_AF1(d)|(AU1_AF1(s)&AU1_(0x80000000u)));} - AF2 ACpySgnF2(AF2 d,AF2 s){return AF2_AU2(AU2_AF2(d)|(AU2_AF2(s)&AU2_(0x80000000u)));} - AF3 ACpySgnF3(AF3 d,AF3 s){return AF3_AU3(AU3_AF3(d)|(AU3_AF3(s)&AU3_(0x80000000u)));} - AF4 ACpySgnF4(AF4 d,AF4 s){return AF4_AU4(AU4_AF4(d)|(AU4_AF4(s)&AU4_(0x80000000u)));} -//------------------------------------------------------------------------------------------------------------------------------ - // Single operation to return (useful to create a mask to use in lerp for branch free logic), - // m=NaN := 0 - // m>=0 := 0 - // m<0 := 1 - // Uses the following useful floating point logic, - // saturate(+a*(-INF)==-INF) := 0 - // saturate( 0*(-INF)== NaN) := 0 - // saturate(-a*(-INF)==+INF) := 1 - AF1 ASignedF1(AF1 m){return ASatF1(m*AF1_(A_INFN_F));} - AF2 ASignedF2(AF2 m){return ASatF2(m*AF2_(A_INFN_F));} - AF3 ASignedF3(AF3 m){return ASatF3(m*AF3_(A_INFN_F));} - AF4 ASignedF4(AF4 m){return ASatF4(m*AF4_(A_INFN_F));} -//------------------------------------------------------------------------------------------------------------------------------ - AF1 AGtZeroF1(AF1 m){return ASatF1(m*AF1_(A_INFP_F));} - AF2 AGtZeroF2(AF2 m){return ASatF2(m*AF2_(A_INFP_F));} - AF3 AGtZeroF3(AF3 m){return ASatF3(m*AF3_(A_INFP_F));} - AF4 AGtZeroF4(AF4 m){return ASatF4(m*AF4_(A_INFP_F));} -//============================================================================================================================== - #ifdef A_HALF - #ifdef A_HLSL_6_2 - #define A_INFP_H AH1_AW1((uint16_t)0x7c00u) - #define A_INFN_H AH1_AW1((uint16_t)0xfc00u) - #else - #define A_INFP_H AH1_AW1(0x7c00u) - #define A_INFN_H AH1_AW1(0xfc00u) - #endif - -//------------------------------------------------------------------------------------------------------------------------------ - AH1 ACpySgnH1(AH1 d,AH1 s){return AH1_AW1(AW1_AH1(d)|(AW1_AH1(s)&AW1_(0x8000u)));} - AH2 ACpySgnH2(AH2 d,AH2 s){return AH2_AW2(AW2_AH2(d)|(AW2_AH2(s)&AW2_(0x8000u)));} - AH3 ACpySgnH3(AH3 d,AH3 s){return AH3_AW3(AW3_AH3(d)|(AW3_AH3(s)&AW3_(0x8000u)));} - AH4 ACpySgnH4(AH4 d,AH4 s){return AH4_AW4(AW4_AH4(d)|(AW4_AH4(s)&AW4_(0x8000u)));} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 ASignedH1(AH1 m){return ASatH1(m*AH1_(A_INFN_H));} - AH2 ASignedH2(AH2 m){return ASatH2(m*AH2_(A_INFN_H));} - AH3 ASignedH3(AH3 m){return ASatH3(m*AH3_(A_INFN_H));} - AH4 ASignedH4(AH4 m){return ASatH4(m*AH4_(A_INFN_H));} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 AGtZeroH1(AH1 m){return ASatH1(m*AH1_(A_INFP_H));} - AH2 AGtZeroH2(AH2 m){return ASatH2(m*AH2_(A_INFP_H));} - AH3 AGtZeroH3(AH3 m){return ASatH3(m*AH3_(A_INFP_H));} - AH4 AGtZeroH4(AH4 m){return ASatH4(m*AH4_(A_INFP_H));} - #endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// [FIS] FLOAT INTEGER SORTABLE -//------------------------------------------------------------------------------------------------------------------------------ -// Float to integer sortable. -// - If sign bit=0, flip the sign bit (positives). -// - If sign bit=1, flip all bits (negatives). -// Integer sortable to float. -// - If sign bit=1, flip the sign bit (positives). -// - If sign bit=0, flip all bits (negatives). -// Has nice side effects. -// - Larger integers are more positive values. -// - Float zero is mapped to center of integers (so clear to integer zero is a nice default for atomic max usage). -// Burns 3 ops for conversion {shift,or,xor}. -//============================================================================================================================== - AU1 AFisToU1(AU1 x){return x^(( AShrSU1(x,AU1_(31)))|AU1_(0x80000000));} - AU1 AFisFromU1(AU1 x){return x^((~AShrSU1(x,AU1_(31)))|AU1_(0x80000000));} -//------------------------------------------------------------------------------------------------------------------------------ - // Just adjust high 16-bit value (useful when upper part of 32-bit word is a 16-bit float value). - AU1 AFisToHiU1(AU1 x){return x^(( AShrSU1(x,AU1_(15)))|AU1_(0x80000000));} - AU1 AFisFromHiU1(AU1 x){return x^((~AShrSU1(x,AU1_(15)))|AU1_(0x80000000));} -//------------------------------------------------------------------------------------------------------------------------------ - #ifdef A_HALF - AW1 AFisToW1(AW1 x){return x^(( AShrSW1(x,AW1_(15)))|AW1_(0x8000));} - AW1 AFisFromW1(AW1 x){return x^((~AShrSW1(x,AW1_(15)))|AW1_(0x8000));} -//------------------------------------------------------------------------------------------------------------------------------ - AW2 AFisToW2(AW2 x){return x^(( AShrSW2(x,AW2_(15)))|AW2_(0x8000));} - AW2 AFisFromW2(AW2 x){return x^((~AShrSW2(x,AW2_(15)))|AW2_(0x8000));} - #endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// [PERM] V_PERM_B32 -//------------------------------------------------------------------------------------------------------------------------------ -// Support for V_PERM_B32 started in the 3rd generation of GCN. -//------------------------------------------------------------------------------------------------------------------------------ -// yyyyxxxx - The 'i' input. -// 76543210 -// ======== -// HGFEDCBA - Naming on permutation. -//------------------------------------------------------------------------------------------------------------------------------ -// TODO -// ==== -// - Make sure compiler optimizes this. -//============================================================================================================================== - #ifdef A_HALF - AU1 APerm0E0A(AU2 i){return((i.x )&0xffu)|((i.y<<16)&0xff0000u);} - AU1 APerm0F0B(AU2 i){return((i.x>> 8)&0xffu)|((i.y<< 8)&0xff0000u);} - AU1 APerm0G0C(AU2 i){return((i.x>>16)&0xffu)|((i.y )&0xff0000u);} - AU1 APerm0H0D(AU2 i){return((i.x>>24)&0xffu)|((i.y>> 8)&0xff0000u);} -//------------------------------------------------------------------------------------------------------------------------------ - AU1 APermHGFA(AU2 i){return((i.x )&0x000000ffu)|(i.y&0xffffff00u);} - AU1 APermHGFC(AU2 i){return((i.x>>16)&0x000000ffu)|(i.y&0xffffff00u);} - AU1 APermHGAE(AU2 i){return((i.x<< 8)&0x0000ff00u)|(i.y&0xffff00ffu);} - AU1 APermHGCE(AU2 i){return((i.x>> 8)&0x0000ff00u)|(i.y&0xffff00ffu);} - AU1 APermHAFE(AU2 i){return((i.x<<16)&0x00ff0000u)|(i.y&0xff00ffffu);} - AU1 APermHCFE(AU2 i){return((i.x )&0x00ff0000u)|(i.y&0xff00ffffu);} - AU1 APermAGFE(AU2 i){return((i.x<<24)&0xff000000u)|(i.y&0x00ffffffu);} - AU1 APermCGFE(AU2 i){return((i.x<< 8)&0xff000000u)|(i.y&0x00ffffffu);} -//------------------------------------------------------------------------------------------------------------------------------ - AU1 APermGCEA(AU2 i){return((i.x)&0x00ff00ffu)|((i.y<<8)&0xff00ff00u);} - AU1 APermGECA(AU2 i){return(((i.x)&0xffu)|((i.x>>8)&0xff00u)|((i.y<<16)&0xff0000u)|((i.y<<8)&0xff000000u));} - #endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// [BUC] BYTE UNSIGNED CONVERSION -//------------------------------------------------------------------------------------------------------------------------------ -// Designed to use the optimal conversion, enables the scaling to possibly be factored into other computation. -// Works on a range of {0 to A_BUC_<32,16>}, for <32-bit, and 16-bit> respectively. -//------------------------------------------------------------------------------------------------------------------------------ -// OPCODE NOTES -// ============ -// GCN does not do UNORM or SNORM for bytes in opcodes. -// - V_CVT_F32_UBYTE{0,1,2,3} - Unsigned byte to float. -// - V_CVT_PKACC_U8_F32 - Float to unsigned byte (does bit-field insert into 32-bit integer). -// V_PERM_B32 does byte packing with ability to zero fill bytes as well. -// - Can pull out byte values from two sources, and zero fill upper 8-bits of packed hi and lo. -//------------------------------------------------------------------------------------------------------------------------------ -// BYTE : FLOAT - ABuc{0,1,2,3}{To,From}U1() - Designed for V_CVT_F32_UBYTE* and V_CVT_PKACCUM_U8_F32 ops. -// ==== ===== -// 0 : 0 -// 1 : 1 -// ... -// 255 : 255 -// : 256 (just outside the encoding range) -//------------------------------------------------------------------------------------------------------------------------------ -// BYTE : FLOAT - ABuc{0,1,2,3}{To,From}U2() - Designed for 16-bit denormal tricks and V_PERM_B32. -// ==== ===== -// 0 : 0 -// 1 : 1/512 -// 2 : 1/256 -// ... -// 64 : 1/8 -// 128 : 1/4 -// 255 : 255/512 -// : 1/2 (just outside the encoding range) -//------------------------------------------------------------------------------------------------------------------------------ -// OPTIMAL IMPLEMENTATIONS ON AMD ARCHITECTURES -// ============================================ -// r=ABuc0FromU1(i) -// V_CVT_F32_UBYTE0 r,i -// -------------------------------------------- -// r=ABuc0ToU1(d,i) -// V_CVT_PKACCUM_U8_F32 r,i,0,d -// -------------------------------------------- -// d=ABuc0FromU2(i) -// Where 'k0' is an SGPR with 0x0E0A -// Where 'k1' is an SGPR with {32768.0} packed into the lower 16-bits -// V_PERM_B32 d,i.x,i.y,k0 -// V_PK_FMA_F16 d,d,k1.x,0 -// -------------------------------------------- -// r=ABuc0ToU2(d,i) -// Where 'k0' is an SGPR with {1.0/32768.0} packed into the lower 16-bits -// Where 'k1' is an SGPR with 0x???? -// Where 'k2' is an SGPR with 0x???? -// V_PK_FMA_F16 i,i,k0.x,0 -// V_PERM_B32 r.x,i,i,k1 -// V_PERM_B32 r.y,i,i,k2 -//============================================================================================================================== - // Peak range for 32-bit and 16-bit operations. - #define A_BUC_32 (255.0) - #define A_BUC_16 (255.0/512.0) -//============================================================================================================================== - #if 1 - // Designed to be one V_CVT_PKACCUM_U8_F32. - // The extra min is required to pattern match to V_CVT_PKACCUM_U8_F32. - AU1 ABuc0ToU1(AU1 d,AF1 i){return (d&0xffffff00u)|((min(AU1(i),255u) )&(0x000000ffu));} - AU1 ABuc1ToU1(AU1 d,AF1 i){return (d&0xffff00ffu)|((min(AU1(i),255u)<< 8)&(0x0000ff00u));} - AU1 ABuc2ToU1(AU1 d,AF1 i){return (d&0xff00ffffu)|((min(AU1(i),255u)<<16)&(0x00ff0000u));} - AU1 ABuc3ToU1(AU1 d,AF1 i){return (d&0x00ffffffu)|((min(AU1(i),255u)<<24)&(0xff000000u));} -//------------------------------------------------------------------------------------------------------------------------------ - // Designed to be one V_CVT_F32_UBYTE*. - AF1 ABuc0FromU1(AU1 i){return AF1((i )&255u);} - AF1 ABuc1FromU1(AU1 i){return AF1((i>> 8)&255u);} - AF1 ABuc2FromU1(AU1 i){return AF1((i>>16)&255u);} - AF1 ABuc3FromU1(AU1 i){return AF1((i>>24)&255u);} - #endif -//============================================================================================================================== - #ifdef A_HALF - // Takes {x0,x1} and {y0,y1} and builds {{x0,y0},{x1,y1}}. - AW2 ABuc01ToW2(AH2 x,AH2 y){x*=AH2_(1.0/32768.0);y*=AH2_(1.0/32768.0); - return AW2_AU1(APermGCEA(AU2(AU1_AW2(AW2_AH2(x)),AU1_AW2(AW2_AH2(y)))));} -//------------------------------------------------------------------------------------------------------------------------------ - // Designed for 3 ops to do SOA to AOS and conversion. - AU2 ABuc0ToU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0))); - return AU2(APermHGFA(AU2(d.x,b)),APermHGFC(AU2(d.y,b)));} - AU2 ABuc1ToU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0))); - return AU2(APermHGAE(AU2(d.x,b)),APermHGCE(AU2(d.y,b)));} - AU2 ABuc2ToU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0))); - return AU2(APermHAFE(AU2(d.x,b)),APermHCFE(AU2(d.y,b)));} - AU2 ABuc3ToU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0))); - return AU2(APermAGFE(AU2(d.x,b)),APermCGFE(AU2(d.y,b)));} -//------------------------------------------------------------------------------------------------------------------------------ - // Designed for 2 ops to do both AOS to SOA, and conversion. - AH2 ABuc0FromU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0E0A(i)))*AH2_(32768.0);} - AH2 ABuc1FromU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0F0B(i)))*AH2_(32768.0);} - AH2 ABuc2FromU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0G0C(i)))*AH2_(32768.0);} - AH2 ABuc3FromU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0H0D(i)))*AH2_(32768.0);} - #endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// [BSC] BYTE SIGNED CONVERSION -//------------------------------------------------------------------------------------------------------------------------------ -// Similar to [BUC]. -// Works on a range of {-/+ A_BSC_<32,16>}, for <32-bit, and 16-bit> respectively. -//------------------------------------------------------------------------------------------------------------------------------ -// ENCODING (without zero-based encoding) -// ======== -// 0 = unused (can be used to mean something else) -// 1 = lowest value -// 128 = exact zero center (zero based encoding -// 255 = highest value -//------------------------------------------------------------------------------------------------------------------------------ -// Zero-based [Zb] flips the MSB bit of the byte (making 128 "exact zero" actually zero). -// This is useful if there is a desire for cleared values to decode as zero. -//------------------------------------------------------------------------------------------------------------------------------ -// BYTE : FLOAT - ABsc{0,1,2,3}{To,From}U2() - Designed for 16-bit denormal tricks and V_PERM_B32. -// ==== ===== -// 0 : -127/512 (unused) -// 1 : -126/512 -// 2 : -125/512 -// ... -// 128 : 0 -// ... -// 255 : 127/512 -// : 1/4 (just outside the encoding range) -//============================================================================================================================== - // Peak range for 32-bit and 16-bit operations. - #define A_BSC_32 (127.0) - #define A_BSC_16 (127.0/512.0) -//============================================================================================================================== - #if 1 - AU1 ABsc0ToU1(AU1 d,AF1 i){return (d&0xffffff00u)|((min(AU1(i+128.0),255u) )&(0x000000ffu));} - AU1 ABsc1ToU1(AU1 d,AF1 i){return (d&0xffff00ffu)|((min(AU1(i+128.0),255u)<< 8)&(0x0000ff00u));} - AU1 ABsc2ToU1(AU1 d,AF1 i){return (d&0xff00ffffu)|((min(AU1(i+128.0),255u)<<16)&(0x00ff0000u));} - AU1 ABsc3ToU1(AU1 d,AF1 i){return (d&0x00ffffffu)|((min(AU1(i+128.0),255u)<<24)&(0xff000000u));} -//------------------------------------------------------------------------------------------------------------------------------ - AU1 ABsc0ToZbU1(AU1 d,AF1 i){return ((d&0xffffff00u)|((min(AU1(trunc(i)+128.0),255u) )&(0x000000ffu)))^0x00000080u;} - AU1 ABsc1ToZbU1(AU1 d,AF1 i){return ((d&0xffff00ffu)|((min(AU1(trunc(i)+128.0),255u)<< 8)&(0x0000ff00u)))^0x00008000u;} - AU1 ABsc2ToZbU1(AU1 d,AF1 i){return ((d&0xff00ffffu)|((min(AU1(trunc(i)+128.0),255u)<<16)&(0x00ff0000u)))^0x00800000u;} - AU1 ABsc3ToZbU1(AU1 d,AF1 i){return ((d&0x00ffffffu)|((min(AU1(trunc(i)+128.0),255u)<<24)&(0xff000000u)))^0x80000000u;} -//------------------------------------------------------------------------------------------------------------------------------ - AF1 ABsc0FromU1(AU1 i){return AF1((i )&255u)-128.0;} - AF1 ABsc1FromU1(AU1 i){return AF1((i>> 8)&255u)-128.0;} - AF1 ABsc2FromU1(AU1 i){return AF1((i>>16)&255u)-128.0;} - AF1 ABsc3FromU1(AU1 i){return AF1((i>>24)&255u)-128.0;} -//------------------------------------------------------------------------------------------------------------------------------ - AF1 ABsc0FromZbU1(AU1 i){return AF1(((i )&255u)^0x80u)-128.0;} - AF1 ABsc1FromZbU1(AU1 i){return AF1(((i>> 8)&255u)^0x80u)-128.0;} - AF1 ABsc2FromZbU1(AU1 i){return AF1(((i>>16)&255u)^0x80u)-128.0;} - AF1 ABsc3FromZbU1(AU1 i){return AF1(((i>>24)&255u)^0x80u)-128.0;} - #endif -//============================================================================================================================== - #ifdef A_HALF - // Takes {x0,x1} and {y0,y1} and builds {{x0,y0},{x1,y1}}. - AW2 ABsc01ToW2(AH2 x,AH2 y){x=x*AH2_(1.0/32768.0)+AH2_(0.25/32768.0);y=y*AH2_(1.0/32768.0)+AH2_(0.25/32768.0); - return AW2_AU1(APermGCEA(AU2(AU1_AW2(AW2_AH2(x)),AU1_AW2(AW2_AH2(y)))));} -//------------------------------------------------------------------------------------------------------------------------------ - AU2 ABsc0ToU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0)+AH2_(0.25/32768.0))); - return AU2(APermHGFA(AU2(d.x,b)),APermHGFC(AU2(d.y,b)));} - AU2 ABsc1ToU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0)+AH2_(0.25/32768.0))); - return AU2(APermHGAE(AU2(d.x,b)),APermHGCE(AU2(d.y,b)));} - AU2 ABsc2ToU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0)+AH2_(0.25/32768.0))); - return AU2(APermHAFE(AU2(d.x,b)),APermHCFE(AU2(d.y,b)));} - AU2 ABsc3ToU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0)+AH2_(0.25/32768.0))); - return AU2(APermAGFE(AU2(d.x,b)),APermCGFE(AU2(d.y,b)));} -//------------------------------------------------------------------------------------------------------------------------------ - AU2 ABsc0ToZbU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0)+AH2_(0.25/32768.0)))^0x00800080u; - return AU2(APermHGFA(AU2(d.x,b)),APermHGFC(AU2(d.y,b)));} - AU2 ABsc1ToZbU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0)+AH2_(0.25/32768.0)))^0x00800080u; - return AU2(APermHGAE(AU2(d.x,b)),APermHGCE(AU2(d.y,b)));} - AU2 ABsc2ToZbU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0)+AH2_(0.25/32768.0)))^0x00800080u; - return AU2(APermHAFE(AU2(d.x,b)),APermHCFE(AU2(d.y,b)));} - AU2 ABsc3ToZbU2(AU2 d,AH2 i){AU1 b=AU1_AW2(AW2_AH2(i*AH2_(1.0/32768.0)+AH2_(0.25/32768.0)))^0x00800080u; - return AU2(APermAGFE(AU2(d.x,b)),APermCGFE(AU2(d.y,b)));} -//------------------------------------------------------------------------------------------------------------------------------ - AH2 ABsc0FromU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0E0A(i)))*AH2_(32768.0)-AH2_(0.25);} - AH2 ABsc1FromU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0F0B(i)))*AH2_(32768.0)-AH2_(0.25);} - AH2 ABsc2FromU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0G0C(i)))*AH2_(32768.0)-AH2_(0.25);} - AH2 ABsc3FromU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0H0D(i)))*AH2_(32768.0)-AH2_(0.25);} -//------------------------------------------------------------------------------------------------------------------------------ - AH2 ABsc0FromZbU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0E0A(i)^0x00800080u))*AH2_(32768.0)-AH2_(0.25);} - AH2 ABsc1FromZbU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0F0B(i)^0x00800080u))*AH2_(32768.0)-AH2_(0.25);} - AH2 ABsc2FromZbU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0G0C(i)^0x00800080u))*AH2_(32768.0)-AH2_(0.25);} - AH2 ABsc3FromZbU2(AU2 i){return AH2_AW2(AW2_AU1(APerm0H0D(i)^0x00800080u))*AH2_(32768.0)-AH2_(0.25);} - #endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// HALF APPROXIMATIONS -//------------------------------------------------------------------------------------------------------------------------------ -// These support only positive inputs. -// Did not see value yet in specialization for range. -// Using quick testing, ended up mostly getting the same "best" approximation for various ranges. -// With hardware that can co-execute transcendentals, the value in approximations could be less than expected. -// However from a latency perspective, if execution of a transcendental is 4 clk, with no packed support, -> 8 clk total. -// And co-execution would require a compiler interleaving a lot of independent work for packed usage. -//------------------------------------------------------------------------------------------------------------------------------ -// The one Newton Raphson iteration form of rsq() was skipped (requires 6 ops total). -// Same with sqrt(), as this could be x*rsq() (7 ops). -//============================================================================================================================== - #ifdef A_HALF - // Minimize squared error across full positive range, 2 ops. - // The 0x1de2 based approximation maps {0 to 1} input maps to < 1 output. - AH1 APrxLoSqrtH1(AH1 a){return AH1_AW1((AW1_AH1(a)>>AW1_(1))+AW1_(0x1de2));} - AH2 APrxLoSqrtH2(AH2 a){return AH2_AW2((AW2_AH2(a)>>AW2_(1))+AW2_(0x1de2));} - AH3 APrxLoSqrtH3(AH3 a){return AH3_AW3((AW3_AH3(a)>>AW3_(1))+AW3_(0x1de2));} - AH4 APrxLoSqrtH4(AH4 a){return AH4_AW4((AW4_AH4(a)>>AW4_(1))+AW4_(0x1de2));} -//------------------------------------------------------------------------------------------------------------------------------ - // Lower precision estimation, 1 op. - // Minimize squared error across {smallest normal to 16384.0}. - AH1 APrxLoRcpH1(AH1 a){return AH1_AW1(AW1_(0x7784)-AW1_AH1(a));} - AH2 APrxLoRcpH2(AH2 a){return AH2_AW2(AW2_(0x7784)-AW2_AH2(a));} - AH3 APrxLoRcpH3(AH3 a){return AH3_AW3(AW3_(0x7784)-AW3_AH3(a));} - AH4 APrxLoRcpH4(AH4 a){return AH4_AW4(AW4_(0x7784)-AW4_AH4(a));} -//------------------------------------------------------------------------------------------------------------------------------ - // Medium precision estimation, one Newton Raphson iteration, 3 ops. - AH1 APrxMedRcpH1(AH1 a){AH1 b=AH1_AW1(AW1_(0x778d)-AW1_AH1(a));return b*(-b*a+AH1_(2.0));} - AH2 APrxMedRcpH2(AH2 a){AH2 b=AH2_AW2(AW2_(0x778d)-AW2_AH2(a));return b*(-b*a+AH2_(2.0));} - AH3 APrxMedRcpH3(AH3 a){AH3 b=AH3_AW3(AW3_(0x778d)-AW3_AH3(a));return b*(-b*a+AH3_(2.0));} - AH4 APrxMedRcpH4(AH4 a){AH4 b=AH4_AW4(AW4_(0x778d)-AW4_AH4(a));return b*(-b*a+AH4_(2.0));} -//------------------------------------------------------------------------------------------------------------------------------ - // Minimize squared error across {smallest normal to 16384.0}, 2 ops. - AH1 APrxLoRsqH1(AH1 a){return AH1_AW1(AW1_(0x59a3)-(AW1_AH1(a)>>AW1_(1)));} - AH2 APrxLoRsqH2(AH2 a){return AH2_AW2(AW2_(0x59a3)-(AW2_AH2(a)>>AW2_(1)));} - AH3 APrxLoRsqH3(AH3 a){return AH3_AW3(AW3_(0x59a3)-(AW3_AH3(a)>>AW3_(1)));} - AH4 APrxLoRsqH4(AH4 a){return AH4_AW4(AW4_(0x59a3)-(AW4_AH4(a)>>AW4_(1)));} - #endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// FLOAT APPROXIMATIONS -//------------------------------------------------------------------------------------------------------------------------------ -// Michal Drobot has an excellent presentation on these: "Low Level Optimizations For GCN", -// - Idea dates back to SGI, then to Quake 3, etc. -// - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf -// - sqrt(x)=rsqrt(x)*x -// - rcp(x)=rsqrt(x)*rsqrt(x) for positive x -// - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h -//------------------------------------------------------------------------------------------------------------------------------ -// These below are from perhaps less complete searching for optimal. -// Used FP16 normal range for testing with +4096 32-bit step size for sampling error. -// So these match up well with the half approximations. -//============================================================================================================================== - AF1 APrxLoSqrtF1(AF1 a){return AF1_AU1((AU1_AF1(a)>>AU1_(1))+AU1_(0x1fbc4639));} - AF1 APrxLoRcpF1(AF1 a){return AF1_AU1(AU1_(0x7ef07ebb)-AU1_AF1(a));} - AF1 APrxMedRcpF1(AF1 a){AF1 b=AF1_AU1(AU1_(0x7ef19fff)-AU1_AF1(a));return b*(-b*a+AF1_(2.0));} - AF1 APrxLoRsqF1(AF1 a){return AF1_AU1(AU1_(0x5f347d74)-(AU1_AF1(a)>>AU1_(1)));} -//------------------------------------------------------------------------------------------------------------------------------ - AF2 APrxLoSqrtF2(AF2 a){return AF2_AU2((AU2_AF2(a)>>AU2_(1))+AU2_(0x1fbc4639));} - AF2 APrxLoRcpF2(AF2 a){return AF2_AU2(AU2_(0x7ef07ebb)-AU2_AF2(a));} - AF2 APrxMedRcpF2(AF2 a){AF2 b=AF2_AU2(AU2_(0x7ef19fff)-AU2_AF2(a));return b*(-b*a+AF2_(2.0));} - AF2 APrxLoRsqF2(AF2 a){return AF2_AU2(AU2_(0x5f347d74)-(AU2_AF2(a)>>AU2_(1)));} -//------------------------------------------------------------------------------------------------------------------------------ - AF3 APrxLoSqrtF3(AF3 a){return AF3_AU3((AU3_AF3(a)>>AU3_(1))+AU3_(0x1fbc4639));} - AF3 APrxLoRcpF3(AF3 a){return AF3_AU3(AU3_(0x7ef07ebb)-AU3_AF3(a));} - AF3 APrxMedRcpF3(AF3 a){AF3 b=AF3_AU3(AU3_(0x7ef19fff)-AU3_AF3(a));return b*(-b*a+AF3_(2.0));} - AF3 APrxLoRsqF3(AF3 a){return AF3_AU3(AU3_(0x5f347d74)-(AU3_AF3(a)>>AU3_(1)));} -//------------------------------------------------------------------------------------------------------------------------------ - AF4 APrxLoSqrtF4(AF4 a){return AF4_AU4((AU4_AF4(a)>>AU4_(1))+AU4_(0x1fbc4639));} - AF4 APrxLoRcpF4(AF4 a){return AF4_AU4(AU4_(0x7ef07ebb)-AU4_AF4(a));} - AF4 APrxMedRcpF4(AF4 a){AF4 b=AF4_AU4(AU4_(0x7ef19fff)-AU4_AF4(a));return b*(-b*a+AF4_(2.0));} - AF4 APrxLoRsqF4(AF4 a){return AF4_AU4(AU4_(0x5f347d74)-(AU4_AF4(a)>>AU4_(1)));} -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// PQ APPROXIMATIONS -//------------------------------------------------------------------------------------------------------------------------------ -// PQ is very close to x^(1/8). The functions below Use the fast float approximation method to do -// PQ<~>Gamma2 (4th power and fast 4th root) and PQ<~>Linear (8th power and fast 8th root). Maximum error is ~0.2%. -//============================================================================================================================== -// Helpers - AF1 Quart(AF1 a) { a = a * a; return a * a;} - AF1 Oct(AF1 a) { a = a * a; a = a * a; return a * a; } - AF2 Quart(AF2 a) { a = a * a; return a * a; } - AF2 Oct(AF2 a) { a = a * a; a = a * a; return a * a; } - AF3 Quart(AF3 a) { a = a * a; return a * a; } - AF3 Oct(AF3 a) { a = a * a; a = a * a; return a * a; } - AF4 Quart(AF4 a) { a = a * a; return a * a; } - AF4 Oct(AF4 a) { a = a * a; a = a * a; return a * a; } - //------------------------------------------------------------------------------------------------------------------------------ - AF1 APrxPQToGamma2(AF1 a) { return Quart(a); } - AF1 APrxPQToLinear(AF1 a) { return Oct(a); } - AF1 APrxLoGamma2ToPQ(AF1 a) { return AF1_AU1((AU1_AF1(a) >> AU1_(2)) + AU1_(0x2F9A4E46)); } - AF1 APrxMedGamma2ToPQ(AF1 a) { AF1 b = AF1_AU1((AU1_AF1(a) >> AU1_(2)) + AU1_(0x2F9A4E46)); AF1 b4 = Quart(b); return b - b * (b4 - a) / (AF1_(4.0) * b4); } - AF1 APrxHighGamma2ToPQ(AF1 a) { return sqrt(sqrt(a)); } - AF1 APrxLoLinearToPQ(AF1 a) { return AF1_AU1((AU1_AF1(a) >> AU1_(3)) + AU1_(0x378D8723)); } - AF1 APrxMedLinearToPQ(AF1 a) { AF1 b = AF1_AU1((AU1_AF1(a) >> AU1_(3)) + AU1_(0x378D8723)); AF1 b8 = Oct(b); return b - b * (b8 - a) / (AF1_(8.0) * b8); } - AF1 APrxHighLinearToPQ(AF1 a) { return sqrt(sqrt(sqrt(a))); } - //------------------------------------------------------------------------------------------------------------------------------ - AF2 APrxPQToGamma2(AF2 a) { return Quart(a); } - AF2 APrxPQToLinear(AF2 a) { return Oct(a); } - AF2 APrxLoGamma2ToPQ(AF2 a) { return AF2_AU2((AU2_AF2(a) >> AU2_(2)) + AU2_(0x2F9A4E46)); } - AF2 APrxMedGamma2ToPQ(AF2 a) { AF2 b = AF2_AU2((AU2_AF2(a) >> AU2_(2)) + AU2_(0x2F9A4E46)); AF2 b4 = Quart(b); return b - b * (b4 - a) / (AF1_(4.0) * b4); } - AF2 APrxHighGamma2ToPQ(AF2 a) { return sqrt(sqrt(a)); } - AF2 APrxLoLinearToPQ(AF2 a) { return AF2_AU2((AU2_AF2(a) >> AU2_(3)) + AU2_(0x378D8723)); } - AF2 APrxMedLinearToPQ(AF2 a) { AF2 b = AF2_AU2((AU2_AF2(a) >> AU2_(3)) + AU2_(0x378D8723)); AF2 b8 = Oct(b); return b - b * (b8 - a) / (AF1_(8.0) * b8); } - AF2 APrxHighLinearToPQ(AF2 a) { return sqrt(sqrt(sqrt(a))); } - //------------------------------------------------------------------------------------------------------------------------------ - AF3 APrxPQToGamma2(AF3 a) { return Quart(a); } - AF3 APrxPQToLinear(AF3 a) { return Oct(a); } - AF3 APrxLoGamma2ToPQ(AF3 a) { return AF3_AU3((AU3_AF3(a) >> AU3_(2)) + AU3_(0x2F9A4E46)); } - AF3 APrxMedGamma2ToPQ(AF3 a) { AF3 b = AF3_AU3((AU3_AF3(a) >> AU3_(2)) + AU3_(0x2F9A4E46)); AF3 b4 = Quart(b); return b - b * (b4 - a) / (AF1_(4.0) * b4); } - AF3 APrxHighGamma2ToPQ(AF3 a) { return sqrt(sqrt(a)); } - AF3 APrxLoLinearToPQ(AF3 a) { return AF3_AU3((AU3_AF3(a) >> AU3_(3)) + AU3_(0x378D8723)); } - AF3 APrxMedLinearToPQ(AF3 a) { AF3 b = AF3_AU3((AU3_AF3(a) >> AU3_(3)) + AU3_(0x378D8723)); AF3 b8 = Oct(b); return b - b * (b8 - a) / (AF1_(8.0) * b8); } - AF3 APrxHighLinearToPQ(AF3 a) { return sqrt(sqrt(sqrt(a))); } - //------------------------------------------------------------------------------------------------------------------------------ - AF4 APrxPQToGamma2(AF4 a) { return Quart(a); } - AF4 APrxPQToLinear(AF4 a) { return Oct(a); } - AF4 APrxLoGamma2ToPQ(AF4 a) { return AF4_AU4((AU4_AF4(a) >> AU4_(2)) + AU4_(0x2F9A4E46)); } - AF4 APrxMedGamma2ToPQ(AF4 a) { AF4 b = AF4_AU4((AU4_AF4(a) >> AU4_(2)) + AU4_(0x2F9A4E46)); AF4 b4 = Quart(b); return b - b * (b4 - a) / (AF1_(4.0) * b4); } - AF4 APrxHighGamma2ToPQ(AF4 a) { return sqrt(sqrt(a)); } - AF4 APrxLoLinearToPQ(AF4 a) { return AF4_AU4((AU4_AF4(a) >> AU4_(3)) + AU4_(0x378D8723)); } - AF4 APrxMedLinearToPQ(AF4 a) { AF4 b = AF4_AU4((AU4_AF4(a) >> AU4_(3)) + AU4_(0x378D8723)); AF4 b8 = Oct(b); return b - b * (b8 - a) / (AF1_(8.0) * b8); } - AF4 APrxHighLinearToPQ(AF4 a) { return sqrt(sqrt(sqrt(a))); } -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// PARABOLIC SIN & COS -//------------------------------------------------------------------------------------------------------------------------------ -// Approximate answers to transcendental questions. -//------------------------------------------------------------------------------------------------------------------------------ -//============================================================================================================================== - #if 1 - // Valid input range is {-1 to 1} representing {0 to 2 pi}. - // Output range is {-1/4 to 1/4} representing {-1 to 1}. - AF1 APSinF1(AF1 x){return x*abs(x)-x;} // MAD. - AF2 APSinF2(AF2 x){return x*abs(x)-x;} - AF1 APCosF1(AF1 x){x=AFractF1(x*AF1_(0.5)+AF1_(0.75));x=x*AF1_(2.0)-AF1_(1.0);return APSinF1(x);} // 3x MAD, FRACT - AF2 APCosF2(AF2 x){x=AFractF2(x*AF2_(0.5)+AF2_(0.75));x=x*AF2_(2.0)-AF2_(1.0);return APSinF2(x);} - AF2 APSinCosF1(AF1 x){AF1 y=AFractF1(x*AF1_(0.5)+AF1_(0.75));y=y*AF1_(2.0)-AF1_(1.0);return APSinF2(AF2(x,y));} - #endif -//------------------------------------------------------------------------------------------------------------------------------ - #ifdef A_HALF - // For a packed {sin,cos} pair, - // - Native takes 16 clocks and 4 issue slots (no packed transcendentals). - // - Parabolic takes 8 clocks and 8 issue slots (only fract is non-packed). - AH1 APSinH1(AH1 x){return x*abs(x)-x;} - AH2 APSinH2(AH2 x){return x*abs(x)-x;} // AND,FMA - AH1 APCosH1(AH1 x){x=AFractH1(x*AH1_(0.5)+AH1_(0.75));x=x*AH1_(2.0)-AH1_(1.0);return APSinH1(x);} - AH2 APCosH2(AH2 x){x=AFractH2(x*AH2_(0.5)+AH2_(0.75));x=x*AH2_(2.0)-AH2_(1.0);return APSinH2(x);} // 3x FMA, 2xFRACT, AND - AH2 APSinCosH1(AH1 x){AH1 y=AFractH1(x*AH1_(0.5)+AH1_(0.75));y=y*AH1_(2.0)-AH1_(1.0);return APSinH2(AH2(x,y));} - #endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// [ZOL] ZERO ONE LOGIC -//------------------------------------------------------------------------------------------------------------------------------ -// Conditional free logic designed for easy 16-bit packing, and backwards porting to 32-bit. -//------------------------------------------------------------------------------------------------------------------------------ -// 0 := false -// 1 := true -//------------------------------------------------------------------------------------------------------------------------------ -// AndNot(x,y) -> !(x&y) .... One op. -// AndOr(x,y,z) -> (x&y)|z ... One op. -// GtZero(x) -> x>0.0 ..... One op. -// Sel(x,y,z) -> x?y:z ..... Two ops, has no precision loss. -// Signed(x) -> x<0.0 ..... One op. -// ZeroPass(x,y) -> x?0:y ..... Two ops, 'y' is a pass through safe for aliasing as integer. -//------------------------------------------------------------------------------------------------------------------------------ -// OPTIMIZATION NOTES -// ================== -// - On Vega to use 2 constants in a packed op, pass in as one AW2 or one AH2 'k.xy' and use as 'k.xx' and 'k.yy'. -// For example 'a.xy*k.xx+k.yy'. -//============================================================================================================================== - #if 1 - AU1 AZolAndU1(AU1 x,AU1 y){return min(x,y);} - AU2 AZolAndU2(AU2 x,AU2 y){return min(x,y);} - AU3 AZolAndU3(AU3 x,AU3 y){return min(x,y);} - AU4 AZolAndU4(AU4 x,AU4 y){return min(x,y);} -//------------------------------------------------------------------------------------------------------------------------------ - AU1 AZolNotU1(AU1 x){return x^AU1_(1);} - AU2 AZolNotU2(AU2 x){return x^AU2_(1);} - AU3 AZolNotU3(AU3 x){return x^AU3_(1);} - AU4 AZolNotU4(AU4 x){return x^AU4_(1);} -//------------------------------------------------------------------------------------------------------------------------------ - AU1 AZolOrU1(AU1 x,AU1 y){return max(x,y);} - AU2 AZolOrU2(AU2 x,AU2 y){return max(x,y);} - AU3 AZolOrU3(AU3 x,AU3 y){return max(x,y);} - AU4 AZolOrU4(AU4 x,AU4 y){return max(x,y);} -//============================================================================================================================== - AU1 AZolF1ToU1(AF1 x){return AU1(x);} - AU2 AZolF2ToU2(AF2 x){return AU2(x);} - AU3 AZolF3ToU3(AF3 x){return AU3(x);} - AU4 AZolF4ToU4(AF4 x){return AU4(x);} -//------------------------------------------------------------------------------------------------------------------------------ - // 2 ops, denormals don't work in 32-bit on PC (and if they are enabled, OMOD is disabled). - AU1 AZolNotF1ToU1(AF1 x){return AU1(AF1_(1.0)-x);} - AU2 AZolNotF2ToU2(AF2 x){return AU2(AF2_(1.0)-x);} - AU3 AZolNotF3ToU3(AF3 x){return AU3(AF3_(1.0)-x);} - AU4 AZolNotF4ToU4(AF4 x){return AU4(AF4_(1.0)-x);} -//------------------------------------------------------------------------------------------------------------------------------ - AF1 AZolU1ToF1(AU1 x){return AF1(x);} - AF2 AZolU2ToF2(AU2 x){return AF2(x);} - AF3 AZolU3ToF3(AU3 x){return AF3(x);} - AF4 AZolU4ToF4(AU4 x){return AF4(x);} -//============================================================================================================================== - AF1 AZolAndF1(AF1 x,AF1 y){return min(x,y);} - AF2 AZolAndF2(AF2 x,AF2 y){return min(x,y);} - AF3 AZolAndF3(AF3 x,AF3 y){return min(x,y);} - AF4 AZolAndF4(AF4 x,AF4 y){return min(x,y);} -//------------------------------------------------------------------------------------------------------------------------------ - AF1 ASolAndNotF1(AF1 x,AF1 y){return (-x)*y+AF1_(1.0);} - AF2 ASolAndNotF2(AF2 x,AF2 y){return (-x)*y+AF2_(1.0);} - AF3 ASolAndNotF3(AF3 x,AF3 y){return (-x)*y+AF3_(1.0);} - AF4 ASolAndNotF4(AF4 x,AF4 y){return (-x)*y+AF4_(1.0);} -//------------------------------------------------------------------------------------------------------------------------------ - AF1 AZolAndOrF1(AF1 x,AF1 y,AF1 z){return ASatF1(x*y+z);} - AF2 AZolAndOrF2(AF2 x,AF2 y,AF2 z){return ASatF2(x*y+z);} - AF3 AZolAndOrF3(AF3 x,AF3 y,AF3 z){return ASatF3(x*y+z);} - AF4 AZolAndOrF4(AF4 x,AF4 y,AF4 z){return ASatF4(x*y+z);} -//------------------------------------------------------------------------------------------------------------------------------ - AF1 AZolGtZeroF1(AF1 x){return ASatF1(x*AF1_(A_INFP_F));} - AF2 AZolGtZeroF2(AF2 x){return ASatF2(x*AF2_(A_INFP_F));} - AF3 AZolGtZeroF3(AF3 x){return ASatF3(x*AF3_(A_INFP_F));} - AF4 AZolGtZeroF4(AF4 x){return ASatF4(x*AF4_(A_INFP_F));} -//------------------------------------------------------------------------------------------------------------------------------ - AF1 AZolNotF1(AF1 x){return AF1_(1.0)-x;} - AF2 AZolNotF2(AF2 x){return AF2_(1.0)-x;} - AF3 AZolNotF3(AF3 x){return AF3_(1.0)-x;} - AF4 AZolNotF4(AF4 x){return AF4_(1.0)-x;} -//------------------------------------------------------------------------------------------------------------------------------ - AF1 AZolOrF1(AF1 x,AF1 y){return max(x,y);} - AF2 AZolOrF2(AF2 x,AF2 y){return max(x,y);} - AF3 AZolOrF3(AF3 x,AF3 y){return max(x,y);} - AF4 AZolOrF4(AF4 x,AF4 y){return max(x,y);} -//------------------------------------------------------------------------------------------------------------------------------ - AF1 AZolSelF1(AF1 x,AF1 y,AF1 z){AF1 r=(-x)*z+z;return x*y+r;} - AF2 AZolSelF2(AF2 x,AF2 y,AF2 z){AF2 r=(-x)*z+z;return x*y+r;} - AF3 AZolSelF3(AF3 x,AF3 y,AF3 z){AF3 r=(-x)*z+z;return x*y+r;} - AF4 AZolSelF4(AF4 x,AF4 y,AF4 z){AF4 r=(-x)*z+z;return x*y+r;} -//------------------------------------------------------------------------------------------------------------------------------ - AF1 AZolSignedF1(AF1 x){return ASatF1(x*AF1_(A_INFN_F));} - AF2 AZolSignedF2(AF2 x){return ASatF2(x*AF2_(A_INFN_F));} - AF3 AZolSignedF3(AF3 x){return ASatF3(x*AF3_(A_INFN_F));} - AF4 AZolSignedF4(AF4 x){return ASatF4(x*AF4_(A_INFN_F));} -//------------------------------------------------------------------------------------------------------------------------------ - AF1 AZolZeroPassF1(AF1 x,AF1 y){return AF1_AU1((AU1_AF1(x)!=AU1_(0))?AU1_(0):AU1_AF1(y));} - AF2 AZolZeroPassF2(AF2 x,AF2 y){return AF2_AU2((AU2_AF2(x)!=AU2_(0))?AU2_(0):AU2_AF2(y));} - AF3 AZolZeroPassF3(AF3 x,AF3 y){return AF3_AU3((AU3_AF3(x)!=AU3_(0))?AU3_(0):AU3_AF3(y));} - AF4 AZolZeroPassF4(AF4 x,AF4 y){return AF4_AU4((AU4_AF4(x)!=AU4_(0))?AU4_(0):AU4_AF4(y));} - #endif -//============================================================================================================================== - #ifdef A_HALF - AW1 AZolAndW1(AW1 x,AW1 y){return min(x,y);} - AW2 AZolAndW2(AW2 x,AW2 y){return min(x,y);} - AW3 AZolAndW3(AW3 x,AW3 y){return min(x,y);} - AW4 AZolAndW4(AW4 x,AW4 y){return min(x,y);} -//------------------------------------------------------------------------------------------------------------------------------ - AW1 AZolNotW1(AW1 x){return x^AW1_(1);} - AW2 AZolNotW2(AW2 x){return x^AW2_(1);} - AW3 AZolNotW3(AW3 x){return x^AW3_(1);} - AW4 AZolNotW4(AW4 x){return x^AW4_(1);} -//------------------------------------------------------------------------------------------------------------------------------ - AW1 AZolOrW1(AW1 x,AW1 y){return max(x,y);} - AW2 AZolOrW2(AW2 x,AW2 y){return max(x,y);} - AW3 AZolOrW3(AW3 x,AW3 y){return max(x,y);} - AW4 AZolOrW4(AW4 x,AW4 y){return max(x,y);} -//============================================================================================================================== - // Uses denormal trick. - AW1 AZolH1ToW1(AH1 x){return AW1_AH1(x*AH1_AW1(AW1_(1)));} - AW2 AZolH2ToW2(AH2 x){return AW2_AH2(x*AH2_AW2(AW2_(1)));} - AW3 AZolH3ToW3(AH3 x){return AW3_AH3(x*AH3_AW3(AW3_(1)));} - AW4 AZolH4ToW4(AH4 x){return AW4_AH4(x*AH4_AW4(AW4_(1)));} -//------------------------------------------------------------------------------------------------------------------------------ - // AMD arch lacks a packed conversion opcode. - AH1 AZolW1ToH1(AW1 x){return AH1_AW1(x*AW1_AH1(AH1_(1.0)));} - AH2 AZolW2ToH2(AW2 x){return AH2_AW2(x*AW2_AH2(AH2_(1.0)));} - AH3 AZolW1ToH3(AW3 x){return AH3_AW3(x*AW3_AH3(AH3_(1.0)));} - AH4 AZolW2ToH4(AW4 x){return AH4_AW4(x*AW4_AH4(AH4_(1.0)));} -//============================================================================================================================== - AH1 AZolAndH1(AH1 x,AH1 y){return min(x,y);} - AH2 AZolAndH2(AH2 x,AH2 y){return min(x,y);} - AH3 AZolAndH3(AH3 x,AH3 y){return min(x,y);} - AH4 AZolAndH4(AH4 x,AH4 y){return min(x,y);} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 ASolAndNotH1(AH1 x,AH1 y){return (-x)*y+AH1_(1.0);} - AH2 ASolAndNotH2(AH2 x,AH2 y){return (-x)*y+AH2_(1.0);} - AH3 ASolAndNotH3(AH3 x,AH3 y){return (-x)*y+AH3_(1.0);} - AH4 ASolAndNotH4(AH4 x,AH4 y){return (-x)*y+AH4_(1.0);} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 AZolAndOrH1(AH1 x,AH1 y,AH1 z){return ASatH1(x*y+z);} - AH2 AZolAndOrH2(AH2 x,AH2 y,AH2 z){return ASatH2(x*y+z);} - AH3 AZolAndOrH3(AH3 x,AH3 y,AH3 z){return ASatH3(x*y+z);} - AH4 AZolAndOrH4(AH4 x,AH4 y,AH4 z){return ASatH4(x*y+z);} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 AZolGtZeroH1(AH1 x){return ASatH1(x*AH1_(A_INFP_H));} - AH2 AZolGtZeroH2(AH2 x){return ASatH2(x*AH2_(A_INFP_H));} - AH3 AZolGtZeroH3(AH3 x){return ASatH3(x*AH3_(A_INFP_H));} - AH4 AZolGtZeroH4(AH4 x){return ASatH4(x*AH4_(A_INFP_H));} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 AZolNotH1(AH1 x){return AH1_(1.0)-x;} - AH2 AZolNotH2(AH2 x){return AH2_(1.0)-x;} - AH3 AZolNotH3(AH3 x){return AH3_(1.0)-x;} - AH4 AZolNotH4(AH4 x){return AH4_(1.0)-x;} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 AZolOrH1(AH1 x,AH1 y){return max(x,y);} - AH2 AZolOrH2(AH2 x,AH2 y){return max(x,y);} - AH3 AZolOrH3(AH3 x,AH3 y){return max(x,y);} - AH4 AZolOrH4(AH4 x,AH4 y){return max(x,y);} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 AZolSelH1(AH1 x,AH1 y,AH1 z){AH1 r=(-x)*z+z;return x*y+r;} - AH2 AZolSelH2(AH2 x,AH2 y,AH2 z){AH2 r=(-x)*z+z;return x*y+r;} - AH3 AZolSelH3(AH3 x,AH3 y,AH3 z){AH3 r=(-x)*z+z;return x*y+r;} - AH4 AZolSelH4(AH4 x,AH4 y,AH4 z){AH4 r=(-x)*z+z;return x*y+r;} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 AZolSignedH1(AH1 x){return ASatH1(x*AH1_(A_INFN_H));} - AH2 AZolSignedH2(AH2 x){return ASatH2(x*AH2_(A_INFN_H));} - AH3 AZolSignedH3(AH3 x){return ASatH3(x*AH3_(A_INFN_H));} - AH4 AZolSignedH4(AH4 x){return ASatH4(x*AH4_(A_INFN_H));} - #endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// COLOR CONVERSIONS -//------------------------------------------------------------------------------------------------------------------------------ -// These are all linear to/from some other space (where 'linear' has been shortened out of the function name). -// So 'ToGamma' is 'LinearToGamma', and 'FromGamma' is 'LinearFromGamma'. -// These are branch free implementations. -// The AToSrgbF1() function is useful for stores for compute shaders for GPUs without hardware linear->sRGB store conversion. -//------------------------------------------------------------------------------------------------------------------------------ -// TRANSFER FUNCTIONS -// ================== -// 709 ..... Rec709 used for some HDTVs -// Gamma ... Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native -// Pq ...... PQ native for HDR10 -// Srgb .... The sRGB output, typical of PC displays, useful for 10-bit output, or storing to 8-bit UNORM without SRGB type -// Two ..... Gamma 2.0, fastest conversion (useful for intermediate pass approximations) -// Three ... Gamma 3.0, less fast, but good for HDR. -//------------------------------------------------------------------------------------------------------------------------------ -// KEEPING TO SPEC -// =============== -// Both Rec.709 and sRGB have a linear segment which as spec'ed would intersect the curved segment 2 times. -// (a.) For 8-bit sRGB, steps {0 to 10.3} are in the linear region (4% of the encoding range). -// (b.) For 8-bit 709, steps {0 to 20.7} are in the linear region (8% of the encoding range). -// Also there is a slight step in the transition regions. -// Precision of the coefficients in the spec being the likely cause. -// Main usage case of the sRGB code is to do the linear->sRGB converstion in a compute shader before store. -// This is to work around lack of hardware (typically only ROP does the conversion for free). -// To "correct" the linear segment, would be to introduce error, because hardware decode of sRGB->linear is fixed (and free). -// So this header keeps with the spec. -// For linear->sRGB transforms, the linear segment in some respects reduces error, because rounding in that region is linear. -// Rounding in the curved region in hardware (and fast software code) introduces error due to rounding in non-linear. -//------------------------------------------------------------------------------------------------------------------------------ -// FOR PQ -// ====== -// Both input and output is {0.0-1.0}, and where output 1.0 represents 10000.0 cd/m^2. -// All constants are only specified to FP32 precision. -// External PQ source reference, -// - https://github.com/ampas/aces-dev/blob/master/transforms/ctl/utilities/ACESlib.Utilities_Color.a1.0.1.ctl -//------------------------------------------------------------------------------------------------------------------------------ -// PACKED VERSIONS -// =============== -// These are the A*H2() functions. -// There is no PQ functions as FP16 seemed to not have enough precision for the conversion. -// The remaining functions are "good enough" for 8-bit, and maybe 10-bit if not concerned about a few 1-bit errors. -// Precision is lowest in the 709 conversion, higher in sRGB, higher still in Two and Gamma (when using 2.2 at least). -//------------------------------------------------------------------------------------------------------------------------------ -// NOTES -// ===== -// Could be faster for PQ conversions to be in ALU or a texture lookup depending on usage case. -//============================================================================================================================== - #if 1 - AF1 ATo709F1(AF1 c){AF3 j=AF3(0.018*4.5,4.5,0.45);AF2 k=AF2(1.099,-0.099); - return clamp(j.x ,c*j.y ,pow(c,j.z )*k.x +k.y );} - AF2 ATo709F2(AF2 c){AF3 j=AF3(0.018*4.5,4.5,0.45);AF2 k=AF2(1.099,-0.099); - return clamp(j.xx ,c*j.yy ,pow(c,j.zz )*k.xx +k.yy );} - AF3 ATo709F3(AF3 c){AF3 j=AF3(0.018*4.5,4.5,0.45);AF2 k=AF2(1.099,-0.099); - return clamp(j.xxx,c*j.yyy,pow(c,j.zzz)*k.xxx+k.yyy);} -//------------------------------------------------------------------------------------------------------------------------------ - // Note 'rcpX' is '1/x', where the 'x' is what would be used in AFromGamma(). - AF1 AToGammaF1(AF1 c,AF1 rcpX){return pow(c,AF1_(rcpX));} - AF2 AToGammaF2(AF2 c,AF1 rcpX){return pow(c,AF2_(rcpX));} - AF3 AToGammaF3(AF3 c,AF1 rcpX){return pow(c,AF3_(rcpX));} -//------------------------------------------------------------------------------------------------------------------------------ - AF1 AToPqF1(AF1 x){AF1 p=pow(x,AF1_(0.159302)); - return pow((AF1_(0.835938)+AF1_(18.8516)*p)/(AF1_(1.0)+AF1_(18.6875)*p),AF1_(78.8438));} - AF2 AToPqF1(AF2 x){AF2 p=pow(x,AF2_(0.159302)); - return pow((AF2_(0.835938)+AF2_(18.8516)*p)/(AF2_(1.0)+AF2_(18.6875)*p),AF2_(78.8438));} - AF3 AToPqF1(AF3 x){AF3 p=pow(x,AF3_(0.159302)); - return pow((AF3_(0.835938)+AF3_(18.8516)*p)/(AF3_(1.0)+AF3_(18.6875)*p),AF3_(78.8438));} -//------------------------------------------------------------------------------------------------------------------------------ - AF1 AToSrgbF1(AF1 c){AF3 j=AF3(0.0031308*12.92,12.92,1.0/2.4);AF2 k=AF2(1.055,-0.055); - return clamp(j.x ,c*j.y ,pow(c,j.z )*k.x +k.y );} - AF2 AToSrgbF2(AF2 c){AF3 j=AF3(0.0031308*12.92,12.92,1.0/2.4);AF2 k=AF2(1.055,-0.055); - return clamp(j.xx ,c*j.yy ,pow(c,j.zz )*k.xx +k.yy );} - AF3 AToSrgbF3(AF3 c){AF3 j=AF3(0.0031308*12.92,12.92,1.0/2.4);AF2 k=AF2(1.055,-0.055); - return clamp(j.xxx,c*j.yyy,pow(c,j.zzz)*k.xxx+k.yyy);} -//------------------------------------------------------------------------------------------------------------------------------ - AF1 AToTwoF1(AF1 c){return sqrt(c);} - AF2 AToTwoF2(AF2 c){return sqrt(c);} - AF3 AToTwoF3(AF3 c){return sqrt(c);} -//------------------------------------------------------------------------------------------------------------------------------ - AF1 AToThreeF1(AF1 c){return pow(c,AF1_(1.0/3.0));} - AF2 AToThreeF2(AF2 c){return pow(c,AF2_(1.0/3.0));} - AF3 AToThreeF3(AF3 c){return pow(c,AF3_(1.0/3.0));} - #endif -//============================================================================================================================== - #if 1 - // Unfortunately median won't work here. - AF1 AFrom709F1(AF1 c){AF3 j=AF3(0.081/4.5,1.0/4.5,1.0/0.45);AF2 k=AF2(1.0/1.099,0.099/1.099); - return AZolSelF1(AZolSignedF1(c-j.x ),c*j.y ,pow(c*k.x +k.y ,j.z ));} - AF2 AFrom709F2(AF2 c){AF3 j=AF3(0.081/4.5,1.0/4.5,1.0/0.45);AF2 k=AF2(1.0/1.099,0.099/1.099); - return AZolSelF2(AZolSignedF2(c-j.xx ),c*j.yy ,pow(c*k.xx +k.yy ,j.zz ));} - AF3 AFrom709F3(AF3 c){AF3 j=AF3(0.081/4.5,1.0/4.5,1.0/0.45);AF2 k=AF2(1.0/1.099,0.099/1.099); - return AZolSelF3(AZolSignedF3(c-j.xxx),c*j.yyy,pow(c*k.xxx+k.yyy,j.zzz));} -//------------------------------------------------------------------------------------------------------------------------------ - AF1 AFromGammaF1(AF1 c,AF1 x){return pow(c,AF1_(x));} - AF2 AFromGammaF2(AF2 c,AF1 x){return pow(c,AF2_(x));} - AF3 AFromGammaF3(AF3 c,AF1 x){return pow(c,AF3_(x));} -//------------------------------------------------------------------------------------------------------------------------------ - AF1 AFromPqF1(AF1 x){AF1 p=pow(x,AF1_(0.0126833)); - return pow(ASatF1(p-AF1_(0.835938))/(AF1_(18.8516)-AF1_(18.6875)*p),AF1_(6.27739));} - AF2 AFromPqF1(AF2 x){AF2 p=pow(x,AF2_(0.0126833)); - return pow(ASatF2(p-AF2_(0.835938))/(AF2_(18.8516)-AF2_(18.6875)*p),AF2_(6.27739));} - AF3 AFromPqF1(AF3 x){AF3 p=pow(x,AF3_(0.0126833)); - return pow(ASatF3(p-AF3_(0.835938))/(AF3_(18.8516)-AF3_(18.6875)*p),AF3_(6.27739));} -//------------------------------------------------------------------------------------------------------------------------------ - // Unfortunately median won't work here. - AF1 AFromSrgbF1(AF1 c){AF3 j=AF3(0.04045/12.92,1.0/12.92,2.4);AF2 k=AF2(1.0/1.055,0.055/1.055); - return AZolSelF1(AZolSignedF1(c-j.x ),c*j.y ,pow(c*k.x +k.y ,j.z ));} - AF2 AFromSrgbF2(AF2 c){AF3 j=AF3(0.04045/12.92,1.0/12.92,2.4);AF2 k=AF2(1.0/1.055,0.055/1.055); - return AZolSelF2(AZolSignedF2(c-j.xx ),c*j.yy ,pow(c*k.xx +k.yy ,j.zz ));} - AF3 AFromSrgbF3(AF3 c){AF3 j=AF3(0.04045/12.92,1.0/12.92,2.4);AF2 k=AF2(1.0/1.055,0.055/1.055); - return AZolSelF3(AZolSignedF3(c-j.xxx),c*j.yyy,pow(c*k.xxx+k.yyy,j.zzz));} -//------------------------------------------------------------------------------------------------------------------------------ - AF1 AFromTwoF1(AF1 c){return c*c;} - AF2 AFromTwoF2(AF2 c){return c*c;} - AF3 AFromTwoF3(AF3 c){return c*c;} -//------------------------------------------------------------------------------------------------------------------------------ - AF1 AFromThreeF1(AF1 c){return c*c*c;} - AF2 AFromThreeF2(AF2 c){return c*c*c;} - AF3 AFromThreeF3(AF3 c){return c*c*c;} - #endif -//============================================================================================================================== - #ifdef A_HALF - AH1 ATo709H1(AH1 c){AH3 j=AH3(0.018*4.5,4.5,0.45);AH2 k=AH2(1.099,-0.099); - return clamp(j.x ,c*j.y ,pow(c,j.z )*k.x +k.y );} - AH2 ATo709H2(AH2 c){AH3 j=AH3(0.018*4.5,4.5,0.45);AH2 k=AH2(1.099,-0.099); - return clamp(j.xx ,c*j.yy ,pow(c,j.zz )*k.xx +k.yy );} - AH3 ATo709H3(AH3 c){AH3 j=AH3(0.018*4.5,4.5,0.45);AH2 k=AH2(1.099,-0.099); - return clamp(j.xxx,c*j.yyy,pow(c,j.zzz)*k.xxx+k.yyy);} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 AToGammaH1(AH1 c,AH1 rcpX){return pow(c,AH1_(rcpX));} - AH2 AToGammaH2(AH2 c,AH1 rcpX){return pow(c,AH2_(rcpX));} - AH3 AToGammaH3(AH3 c,AH1 rcpX){return pow(c,AH3_(rcpX));} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 AToSrgbH1(AH1 c){AH3 j=AH3(0.0031308*12.92,12.92,1.0/2.4);AH2 k=AH2(1.055,-0.055); - return clamp(j.x ,c*j.y ,pow(c,j.z )*k.x +k.y );} - AH2 AToSrgbH2(AH2 c){AH3 j=AH3(0.0031308*12.92,12.92,1.0/2.4);AH2 k=AH2(1.055,-0.055); - return clamp(j.xx ,c*j.yy ,pow(c,j.zz )*k.xx +k.yy );} - AH3 AToSrgbH3(AH3 c){AH3 j=AH3(0.0031308*12.92,12.92,1.0/2.4);AH2 k=AH2(1.055,-0.055); - return clamp(j.xxx,c*j.yyy,pow(c,j.zzz)*k.xxx+k.yyy);} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 AToTwoH1(AH1 c){return sqrt(c);} - AH2 AToTwoH2(AH2 c){return sqrt(c);} - AH3 AToTwoH3(AH3 c){return sqrt(c);} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 AToThreeF1(AH1 c){return pow(c,AH1_(1.0/3.0));} - AH2 AToThreeF2(AH2 c){return pow(c,AH2_(1.0/3.0));} - AH3 AToThreeF3(AH3 c){return pow(c,AH3_(1.0/3.0));} - #endif -//============================================================================================================================== - #ifdef A_HALF - AH1 AFrom709H1(AH1 c){AH3 j=AH3(0.081/4.5,1.0/4.5,1.0/0.45);AH2 k=AH2(1.0/1.099,0.099/1.099); - return AZolSelH1(AZolSignedH1(c-j.x ),c*j.y ,pow(c*k.x +k.y ,j.z ));} - AH2 AFrom709H2(AH2 c){AH3 j=AH3(0.081/4.5,1.0/4.5,1.0/0.45);AH2 k=AH2(1.0/1.099,0.099/1.099); - return AZolSelH2(AZolSignedH2(c-j.xx ),c*j.yy ,pow(c*k.xx +k.yy ,j.zz ));} - AH3 AFrom709H3(AH3 c){AH3 j=AH3(0.081/4.5,1.0/4.5,1.0/0.45);AH2 k=AH2(1.0/1.099,0.099/1.099); - return AZolSelH3(AZolSignedH3(c-j.xxx),c*j.yyy,pow(c*k.xxx+k.yyy,j.zzz));} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 AFromGammaH1(AH1 c,AH1 x){return pow(c,AH1_(x));} - AH2 AFromGammaH2(AH2 c,AH1 x){return pow(c,AH2_(x));} - AH3 AFromGammaH3(AH3 c,AH1 x){return pow(c,AH3_(x));} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 AHromSrgbF1(AH1 c){AH3 j=AH3(0.04045/12.92,1.0/12.92,2.4);AH2 k=AH2(1.0/1.055,0.055/1.055); - return AZolSelH1(AZolSignedH1(c-j.x ),c*j.y ,pow(c*k.x +k.y ,j.z ));} - AH2 AHromSrgbF2(AH2 c){AH3 j=AH3(0.04045/12.92,1.0/12.92,2.4);AH2 k=AH2(1.0/1.055,0.055/1.055); - return AZolSelH2(AZolSignedH2(c-j.xx ),c*j.yy ,pow(c*k.xx +k.yy ,j.zz ));} - AH3 AHromSrgbF3(AH3 c){AH3 j=AH3(0.04045/12.92,1.0/12.92,2.4);AH2 k=AH2(1.0/1.055,0.055/1.055); - return AZolSelH3(AZolSignedH3(c-j.xxx),c*j.yyy,pow(c*k.xxx+k.yyy,j.zzz));} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 AFromTwoH1(AH1 c){return c*c;} - AH2 AFromTwoH2(AH2 c){return c*c;} - AH3 AFromTwoH3(AH3 c){return c*c;} -//------------------------------------------------------------------------------------------------------------------------------ - AH1 AFromThreeH1(AH1 c){return c*c*c;} - AH2 AFromThreeH2(AH2 c){return c*c*c;} - AH3 AFromThreeH3(AH3 c){return c*c*c;} - #endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// CS REMAP -//============================================================================================================================== - // Simple remap 64x1 to 8x8 with rotated 2x2 pixel quads in quad linear. - // 543210 - // ====== - // ..xxx. - // yy...y - AU2 ARmp8x8(AU1 a){return AU2(ABfe(a,1u,3u),ABfiM(ABfe(a,3u,3u),a,1u));} -//============================================================================================================================== - // More complex remap 64x1 to 8x8 which is necessary for 2D wave reductions. - // 543210 - // ====== - // .xx..x - // y..yy. - // Details, - // LANE TO 8x8 MAPPING - // =================== - // 00 01 08 09 10 11 18 19 - // 02 03 0a 0b 12 13 1a 1b - // 04 05 0c 0d 14 15 1c 1d - // 06 07 0e 0f 16 17 1e 1f - // 20 21 28 29 30 31 38 39 - // 22 23 2a 2b 32 33 3a 3b - // 24 25 2c 2d 34 35 3c 3d - // 26 27 2e 2f 36 37 3e 3f - AU2 ARmpRed8x8(AU1 a){return AU2(ABfiM(ABfe(a,2u,3u),a,1u),ABfiM(ABfe(a,3u,3u),ABfe(a,1u,2u),2u));} -//============================================================================================================================== - #ifdef A_HALF - AW2 ARmp8x8H(AU1 a){return AW2(ABfe(a,1u,3u),ABfiM(ABfe(a,3u,3u),a,1u));} - AW2 ARmpRed8x8H(AU1 a){return AW2(ABfiM(ABfe(a,2u,3u),a,1u),ABfiM(ABfe(a,3u,3u),ABfe(a,1u,2u),2u));} - #endif -#endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// -// REFERENCE -// -//------------------------------------------------------------------------------------------------------------------------------ -// IEEE FLOAT RULES -// ================ -// - saturate(NaN)=0, saturate(-INF)=0, saturate(+INF)=1 -// - {+/-}0 * {+/-}INF = NaN -// - -INF + (+INF) = NaN -// - {+/-}0 / {+/-}0 = NaN -// - {+/-}INF / {+/-}INF = NaN -// - a<(-0) := sqrt(a) = NaN (a=-0.0 won't NaN) -// - 0 == -0 -// - 4/0 = +INF -// - 4/-0 = -INF -// - 4+INF = +INF -// - 4-INF = -INF -// - 4*(+INF) = +INF -// - 4*(-INF) = -INF -// - -4*(+INF) = -INF -// - sqrt(+INF) = +INF -//------------------------------------------------------------------------------------------------------------------------------ -// FP16 ENCODING -// ============= -// fedcba9876543210 -// ---------------- -// ......mmmmmmmmmm 10-bit mantissa (encodes 11-bit 0.5 to 1.0 except for denormals) -// .eeeee.......... 5-bit exponent -// .00000.......... denormals -// .00001.......... -14 exponent -// .11110.......... 15 exponent -// .111110000000000 infinity -// .11111nnnnnnnnnn NaN with n!=0 -// s............... sign -//------------------------------------------------------------------------------------------------------------------------------ -// FP16/INT16 ALIASING DENORMAL -// ============================ -// 11-bit unsigned integers alias with half float denormal/normal values, -// 1 = 2^(-24) = 1/16777216 ....................... first denormal value -// 2 = 2^(-23) -// ... -// 1023 = 2^(-14)*(1-2^(-10)) = 2^(-14)*(1-1/1024) ... last denormal value -// 1024 = 2^(-14) = 1/16384 .......................... first normal value that still maps to integers -// 2047 .............................................. last normal value that still maps to integers -// Scaling limits, -// 2^15 = 32768 ...................................... largest power of 2 scaling -// Largest pow2 conversion mapping is at *32768, -// 1 : 2^(-9) = 1/512 -// 2 : 1/256 -// 4 : 1/128 -// 8 : 1/64 -// 16 : 1/32 -// 32 : 1/16 -// 64 : 1/8 -// 128 : 1/4 -// 256 : 1/2 -// 512 : 1 -// 1024 : 2 -// 2047 : a little less than 4 -//============================================================================================================================== -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// -// -// GPU/CPU PORTABILITY -// -// -//------------------------------------------------------------------------------------------------------------------------------ -// This is the GPU implementation. -// See the CPU implementation for docs. -//============================================================================================================================== -#ifdef A_GPU - #define A_TRUE true - #define A_FALSE false - #define A_STATIC -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// VECTOR ARGUMENT/RETURN/INITIALIZATION PORTABILITY -//============================================================================================================================== - #define retAD2 AD2 - #define retAD3 AD3 - #define retAD4 AD4 - #define retAF2 AF2 - #define retAF3 AF3 - #define retAF4 AF4 - #define retAL2 AL2 - #define retAL3 AL3 - #define retAL4 AL4 - #define retAU2 AU2 - #define retAU3 AU3 - #define retAU4 AU4 -//------------------------------------------------------------------------------------------------------------------------------ - #define inAD2 in AD2 - #define inAD3 in AD3 - #define inAD4 in AD4 - #define inAF2 in AF2 - #define inAF3 in AF3 - #define inAF4 in AF4 - #define inAL2 in AL2 - #define inAL3 in AL3 - #define inAL4 in AL4 - #define inAU2 in AU2 - #define inAU3 in AU3 - #define inAU4 in AU4 -//------------------------------------------------------------------------------------------------------------------------------ - #define inoutAD2 inout AD2 - #define inoutAD3 inout AD3 - #define inoutAD4 inout AD4 - #define inoutAF2 inout AF2 - #define inoutAF3 inout AF3 - #define inoutAF4 inout AF4 - #define inoutAL2 inout AL2 - #define inoutAL3 inout AL3 - #define inoutAL4 inout AL4 - #define inoutAU2 inout AU2 - #define inoutAU3 inout AU3 - #define inoutAU4 inout AU4 -//------------------------------------------------------------------------------------------------------------------------------ - #define outAD2 out AD2 - #define outAD3 out AD3 - #define outAD4 out AD4 - #define outAF2 out AF2 - #define outAF3 out AF3 - #define outAF4 out AF4 - #define outAL2 out AL2 - #define outAL3 out AL3 - #define outAL4 out AL4 - #define outAU2 out AU2 - #define outAU3 out AU3 - #define outAU4 out AU4 -//------------------------------------------------------------------------------------------------------------------------------ - #define varAD2(x) AD2 x - #define varAD3(x) AD3 x - #define varAD4(x) AD4 x - #define varAF2(x) AF2 x - #define varAF3(x) AF3 x - #define varAF4(x) AF4 x - #define varAL2(x) AL2 x - #define varAL3(x) AL3 x - #define varAL4(x) AL4 x - #define varAU2(x) AU2 x - #define varAU3(x) AU3 x - #define varAU4(x) AU4 x -//------------------------------------------------------------------------------------------------------------------------------ - #define initAD2(x,y) AD2(x,y) - #define initAD3(x,y,z) AD3(x,y,z) - #define initAD4(x,y,z,w) AD4(x,y,z,w) - #define initAF2(x,y) AF2(x,y) - #define initAF3(x,y,z) AF3(x,y,z) - #define initAF4(x,y,z,w) AF4(x,y,z,w) - #define initAL2(x,y) AL2(x,y) - #define initAL3(x,y,z) AL3(x,y,z) - #define initAL4(x,y,z,w) AL4(x,y,z,w) - #define initAU2(x,y) AU2(x,y) - #define initAU3(x,y,z) AU3(x,y,z) - #define initAU4(x,y,z,w) AU4(x,y,z,w) -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// SCALAR RETURN OPS -//============================================================================================================================== - #define AAbsD1(a) abs(AD1(a)) - #define AAbsF1(a) abs(AF1(a)) -//------------------------------------------------------------------------------------------------------------------------------ - #define ACosD1(a) cos(AD1(a)) - #define ACosF1(a) cos(AF1(a)) -//------------------------------------------------------------------------------------------------------------------------------ - #define ADotD2(a,b) dot(AD2(a),AD2(b)) - #define ADotD3(a,b) dot(AD3(a),AD3(b)) - #define ADotD4(a,b) dot(AD4(a),AD4(b)) - #define ADotF2(a,b) dot(AF2(a),AF2(b)) - #define ADotF3(a,b) dot(AF3(a),AF3(b)) - #define ADotF4(a,b) dot(AF4(a),AF4(b)) -//------------------------------------------------------------------------------------------------------------------------------ - #define AExp2D1(a) exp2(AD1(a)) - #define AExp2F1(a) exp2(AF1(a)) -//------------------------------------------------------------------------------------------------------------------------------ - #define AFloorD1(a) floor(AD1(a)) - #define AFloorF1(a) floor(AF1(a)) -//------------------------------------------------------------------------------------------------------------------------------ - #define ALog2D1(a) log2(AD1(a)) - #define ALog2F1(a) log2(AF1(a)) -//------------------------------------------------------------------------------------------------------------------------------ - #define AMaxD1(a,b) max(a,b) - #define AMaxF1(a,b) max(a,b) - #define AMaxL1(a,b) max(a,b) - #define AMaxU1(a,b) max(a,b) -//------------------------------------------------------------------------------------------------------------------------------ - #define AMinD1(a,b) min(a,b) - #define AMinF1(a,b) min(a,b) - #define AMinL1(a,b) min(a,b) - #define AMinU1(a,b) min(a,b) -//------------------------------------------------------------------------------------------------------------------------------ - #define ASinD1(a) sin(AD1(a)) - #define ASinF1(a) sin(AF1(a)) -//------------------------------------------------------------------------------------------------------------------------------ - #define ASqrtD1(a) sqrt(AD1(a)) - #define ASqrtF1(a) sqrt(AF1(a)) -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// SCALAR RETURN OPS - DEPENDENT -//============================================================================================================================== - #define APowD1(a,b) pow(AD1(a),AF1(b)) - #define APowF1(a,b) pow(AF1(a),AF1(b)) -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// VECTOR OPS -//------------------------------------------------------------------------------------------------------------------------------ -// These are added as needed for production or prototyping, so not necessarily a complete set. -// They follow a convention of taking in a destination and also returning the destination value to increase utility. -//============================================================================================================================== - #ifdef A_DUBL - AD2 opAAbsD2(outAD2 d,inAD2 a){d=abs(a);return d;} - AD3 opAAbsD3(outAD3 d,inAD3 a){d=abs(a);return d;} - AD4 opAAbsD4(outAD4 d,inAD4 a){d=abs(a);return d;} -//------------------------------------------------------------------------------------------------------------------------------ - AD2 opAAddD2(outAD2 d,inAD2 a,inAD2 b){d=a+b;return d;} - AD3 opAAddD3(outAD3 d,inAD3 a,inAD3 b){d=a+b;return d;} - AD4 opAAddD4(outAD4 d,inAD4 a,inAD4 b){d=a+b;return d;} -//------------------------------------------------------------------------------------------------------------------------------ - AD2 opAAddOneD2(outAD2 d,inAD2 a,AD1 b){d=a+AD2_(b);return d;} - AD3 opAAddOneD3(outAD3 d,inAD3 a,AD1 b){d=a+AD3_(b);return d;} - AD4 opAAddOneD4(outAD4 d,inAD4 a,AD1 b){d=a+AD4_(b);return d;} -//------------------------------------------------------------------------------------------------------------------------------ - AD2 opACpyD2(outAD2 d,inAD2 a){d=a;return d;} - AD3 opACpyD3(outAD3 d,inAD3 a){d=a;return d;} - AD4 opACpyD4(outAD4 d,inAD4 a){d=a;return d;} -//------------------------------------------------------------------------------------------------------------------------------ - AD2 opALerpD2(outAD2 d,inAD2 a,inAD2 b,inAD2 c){d=ALerpD2(a,b,c);return d;} - AD3 opALerpD3(outAD3 d,inAD3 a,inAD3 b,inAD3 c){d=ALerpD3(a,b,c);return d;} - AD4 opALerpD4(outAD4 d,inAD4 a,inAD4 b,inAD4 c){d=ALerpD4(a,b,c);return d;} -//------------------------------------------------------------------------------------------------------------------------------ - AD2 opALerpOneD2(outAD2 d,inAD2 a,inAD2 b,AD1 c){d=ALerpD2(a,b,AD2_(c));return d;} - AD3 opALerpOneD3(outAD3 d,inAD3 a,inAD3 b,AD1 c){d=ALerpD3(a,b,AD3_(c));return d;} - AD4 opALerpOneD4(outAD4 d,inAD4 a,inAD4 b,AD1 c){d=ALerpD4(a,b,AD4_(c));return d;} -//------------------------------------------------------------------------------------------------------------------------------ - AD2 opAMaxD2(outAD2 d,inAD2 a,inAD2 b){d=max(a,b);return d;} - AD3 opAMaxD3(outAD3 d,inAD3 a,inAD3 b){d=max(a,b);return d;} - AD4 opAMaxD4(outAD4 d,inAD4 a,inAD4 b){d=max(a,b);return d;} -//------------------------------------------------------------------------------------------------------------------------------ - AD2 opAMinD2(outAD2 d,inAD2 a,inAD2 b){d=min(a,b);return d;} - AD3 opAMinD3(outAD3 d,inAD3 a,inAD3 b){d=min(a,b);return d;} - AD4 opAMinD4(outAD4 d,inAD4 a,inAD4 b){d=min(a,b);return d;} -//------------------------------------------------------------------------------------------------------------------------------ - AD2 opAMulD2(outAD2 d,inAD2 a,inAD2 b){d=a*b;return d;} - AD3 opAMulD3(outAD3 d,inAD3 a,inAD3 b){d=a*b;return d;} - AD4 opAMulD4(outAD4 d,inAD4 a,inAD4 b){d=a*b;return d;} -//------------------------------------------------------------------------------------------------------------------------------ - AD2 opAMulOneD2(outAD2 d,inAD2 a,AD1 b){d=a*AD2_(b);return d;} - AD3 opAMulOneD3(outAD3 d,inAD3 a,AD1 b){d=a*AD3_(b);return d;} - AD4 opAMulOneD4(outAD4 d,inAD4 a,AD1 b){d=a*AD4_(b);return d;} -//------------------------------------------------------------------------------------------------------------------------------ - AD2 opANegD2(outAD2 d,inAD2 a){d=-a;return d;} - AD3 opANegD3(outAD3 d,inAD3 a){d=-a;return d;} - AD4 opANegD4(outAD4 d,inAD4 a){d=-a;return d;} -//------------------------------------------------------------------------------------------------------------------------------ - AD2 opARcpD2(outAD2 d,inAD2 a){d=ARcpD2(a);return d;} - AD3 opARcpD3(outAD3 d,inAD3 a){d=ARcpD3(a);return d;} - AD4 opARcpD4(outAD4 d,inAD4 a){d=ARcpD4(a);return d;} - #endif -//============================================================================================================================== - AF2 opAAbsF2(outAF2 d,inAF2 a){d=abs(a);return d;} - AF3 opAAbsF3(outAF3 d,inAF3 a){d=abs(a);return d;} - AF4 opAAbsF4(outAF4 d,inAF4 a){d=abs(a);return d;} -//------------------------------------------------------------------------------------------------------------------------------ - AF2 opAAddF2(outAF2 d,inAF2 a,inAF2 b){d=a+b;return d;} - AF3 opAAddF3(outAF3 d,inAF3 a,inAF3 b){d=a+b;return d;} - AF4 opAAddF4(outAF4 d,inAF4 a,inAF4 b){d=a+b;return d;} -//------------------------------------------------------------------------------------------------------------------------------ - AF2 opAAddOneF2(outAF2 d,inAF2 a,AF1 b){d=a+AF2_(b);return d;} - AF3 opAAddOneF3(outAF3 d,inAF3 a,AF1 b){d=a+AF3_(b);return d;} - AF4 opAAddOneF4(outAF4 d,inAF4 a,AF1 b){d=a+AF4_(b);return d;} -//------------------------------------------------------------------------------------------------------------------------------ - AF2 opACpyF2(outAF2 d,inAF2 a){d=a;return d;} - AF3 opACpyF3(outAF3 d,inAF3 a){d=a;return d;} - AF4 opACpyF4(outAF4 d,inAF4 a){d=a;return d;} -//------------------------------------------------------------------------------------------------------------------------------ - AF2 opALerpF2(outAF2 d,inAF2 a,inAF2 b,inAF2 c){d=ALerpF2(a,b,c);return d;} - AF3 opALerpF3(outAF3 d,inAF3 a,inAF3 b,inAF3 c){d=ALerpF3(a,b,c);return d;} - AF4 opALerpF4(outAF4 d,inAF4 a,inAF4 b,inAF4 c){d=ALerpF4(a,b,c);return d;} -//------------------------------------------------------------------------------------------------------------------------------ - AF2 opALerpOneF2(outAF2 d,inAF2 a,inAF2 b,AF1 c){d=ALerpF2(a,b,AF2_(c));return d;} - AF3 opALerpOneF3(outAF3 d,inAF3 a,inAF3 b,AF1 c){d=ALerpF3(a,b,AF3_(c));return d;} - AF4 opALerpOneF4(outAF4 d,inAF4 a,inAF4 b,AF1 c){d=ALerpF4(a,b,AF4_(c));return d;} -//------------------------------------------------------------------------------------------------------------------------------ - AF2 opAMaxF2(outAF2 d,inAF2 a,inAF2 b){d=max(a,b);return d;} - AF3 opAMaxF3(outAF3 d,inAF3 a,inAF3 b){d=max(a,b);return d;} - AF4 opAMaxF4(outAF4 d,inAF4 a,inAF4 b){d=max(a,b);return d;} -//------------------------------------------------------------------------------------------------------------------------------ - AF2 opAMinF2(outAF2 d,inAF2 a,inAF2 b){d=min(a,b);return d;} - AF3 opAMinF3(outAF3 d,inAF3 a,inAF3 b){d=min(a,b);return d;} - AF4 opAMinF4(outAF4 d,inAF4 a,inAF4 b){d=min(a,b);return d;} -//------------------------------------------------------------------------------------------------------------------------------ - AF2 opAMulF2(outAF2 d,inAF2 a,inAF2 b){d=a*b;return d;} - AF3 opAMulF3(outAF3 d,inAF3 a,inAF3 b){d=a*b;return d;} - AF4 opAMulF4(outAF4 d,inAF4 a,inAF4 b){d=a*b;return d;} -//------------------------------------------------------------------------------------------------------------------------------ - AF2 opAMulOneF2(outAF2 d,inAF2 a,AF1 b){d=a*AF2_(b);return d;} - AF3 opAMulOneF3(outAF3 d,inAF3 a,AF1 b){d=a*AF3_(b);return d;} - AF4 opAMulOneF4(outAF4 d,inAF4 a,AF1 b){d=a*AF4_(b);return d;} -//------------------------------------------------------------------------------------------------------------------------------ - AF2 opANegF2(outAF2 d,inAF2 a){d=-a;return d;} - AF3 opANegF3(outAF3 d,inAF3 a){d=-a;return d;} - AF4 opANegF4(outAF4 d,inAF4 a){d=-a;return d;} -//------------------------------------------------------------------------------------------------------------------------------ - AF2 opARcpF2(outAF2 d,inAF2 a){d=ARcpF2(a);return d;} - AF3 opARcpF3(outAF3 d,inAF3 a){d=ARcpF3(a);return d;} - AF4 opARcpF4(outAF4 d,inAF4 a){d=ARcpF4(a);return d;} -#endif diff --git a/thirdparty/amd-fsr/ffx_fsr1.h b/thirdparty/amd-fsr/ffx_fsr1.h deleted file mode 100644 index 4e0b3d548553..000000000000 --- a/thirdparty/amd-fsr/ffx_fsr1.h +++ /dev/null @@ -1,1199 +0,0 @@ -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// -// -// AMD FidelityFX SUPER RESOLUTION [FSR 1] ::: SPATIAL SCALING & EXTRAS - v1.20210629 -// -// -//------------------------------------------------------------------------------------------------------------------------------ -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//------------------------------------------------------------------------------------------------------------------------------ -// FidelityFX Super Resolution Sample -// -// Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved. -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files(the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions : -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. -//------------------------------------------------------------------------------------------------------------------------------ -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//------------------------------------------------------------------------------------------------------------------------------ -// ABOUT -// ===== -// FSR is a collection of algorithms relating to generating a higher resolution image. -// This specific header focuses on single-image non-temporal image scaling, and related tools. -// -// The core functions are EASU and RCAS: -// [EASU] Edge Adaptive Spatial Upsampling ....... 1x to 4x area range spatial scaling, clamped adaptive elliptical filter. -// [RCAS] Robust Contrast Adaptive Sharpening .... A non-scaling variation on CAS. -// RCAS needs to be applied after EASU as a separate pass. -// -// Optional utility functions are: -// [LFGA] Linear Film Grain Applicator ........... Tool to apply film grain after scaling. -// [SRTM] Simple Reversible Tone-Mapper .......... Linear HDR {0 to FP16_MAX} to {0 to 1} and back. -// [TEPD] Temporal Energy Preserving Dither ...... Temporally energy preserving dithered {0 to 1} linear to gamma 2.0 conversion. -// See each individual sub-section for inline documentation. -//------------------------------------------------------------------------------------------------------------------------------ -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//------------------------------------------------------------------------------------------------------------------------------ -// FUNCTION PERMUTATIONS -// ===================== -// *F() ..... Single item computation with 32-bit. -// *H() ..... Single item computation with 16-bit, with packing (aka two 16-bit ops in parallel) when possible. -// *Hx2() ... Processing two items in parallel with 16-bit, easier packing. -// Not all interfaces in this file have a *Hx2() form. -//============================================================================================================================== -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// -// FSR - [EASU] EDGE ADAPTIVE SPATIAL UPSAMPLING -// -//------------------------------------------------------------------------------------------------------------------------------ -// EASU provides a high quality spatial-only scaling at relatively low cost. -// Meaning EASU is appropiate for laptops and other low-end GPUs. -// Quality from 1x to 4x area scaling is good. -//------------------------------------------------------------------------------------------------------------------------------ -// The scalar uses a modified fast approximation to the standard lanczos(size=2) kernel. -// EASU runs in a single pass, so it applies a directionally and anisotropically adaptive radial lanczos. -// This is also kept as simple as possible to have minimum runtime. -//------------------------------------------------------------------------------------------------------------------------------ -// The lanzcos filter has negative lobes, so by itself it will introduce ringing. -// To remove all ringing, the algorithm uses the nearest 2x2 input texels as a neighborhood, -// and limits output to the minimum and maximum of that neighborhood. -//------------------------------------------------------------------------------------------------------------------------------ -// Input image requirements: -// -// Color needs to be encoded as 3 channel[red, green, blue](e.g.XYZ not supported) -// Each channel needs to be in the range[0, 1] -// Any color primaries are supported -// Display / tonemapping curve needs to be as if presenting to sRGB display or similar(e.g.Gamma 2.0) -// There should be no banding in the input -// There should be no high amplitude noise in the input -// There should be no noise in the input that is not at input pixel granularity -// For performance purposes, use 32bpp formats -//------------------------------------------------------------------------------------------------------------------------------ -// Best to apply EASU at the end of the frame after tonemapping -// but before film grain or composite of the UI. -//------------------------------------------------------------------------------------------------------------------------------ -// Example of including this header for D3D HLSL : -// -// #define A_GPU 1 -// #define A_HLSL 1 -// #define A_HALF 1 -// #include "ffx_a.h" -// #define FSR_EASU_H 1 -// #define FSR_RCAS_H 1 -// //declare input callbacks -// #include "ffx_fsr1.h" -// -// Example of including this header for Vulkan GLSL : -// -// #define A_GPU 1 -// #define A_GLSL 1 -// #define A_HALF 1 -// #include "ffx_a.h" -// #define FSR_EASU_H 1 -// #define FSR_RCAS_H 1 -// //declare input callbacks -// #include "ffx_fsr1.h" -// -// Example of including this header for Vulkan HLSL : -// -// #define A_GPU 1 -// #define A_HLSL 1 -// #define A_HLSL_6_2 1 -// #define A_NO_16_BIT_CAST 1 -// #define A_HALF 1 -// #include "ffx_a.h" -// #define FSR_EASU_H 1 -// #define FSR_RCAS_H 1 -// //declare input callbacks -// #include "ffx_fsr1.h" -// -// Example of declaring the required input callbacks for GLSL : -// The callbacks need to gather4 for each color channel using the specified texture coordinate 'p'. -// EASU uses gather4 to reduce position computation logic and for free Arrays of Structures to Structures of Arrays conversion. -// -// AH4 FsrEasuRH(AF2 p){return AH4(textureGather(sampler2D(tex,sam),p,0));} -// AH4 FsrEasuGH(AF2 p){return AH4(textureGather(sampler2D(tex,sam),p,1));} -// AH4 FsrEasuBH(AF2 p){return AH4(textureGather(sampler2D(tex,sam),p,2));} -// ... -// The FsrEasuCon function needs to be called from the CPU or GPU to set up constants. -// The difference in viewport and input image size is there to support Dynamic Resolution Scaling. -// To use FsrEasuCon() on the CPU, define A_CPU before including ffx_a and ffx_fsr1. -// Including a GPU example here, the 'con0' through 'con3' values would be stored out to a constant buffer. -// AU4 con0,con1,con2,con3; -// FsrEasuCon(con0,con1,con2,con3, -// 1920.0,1080.0, // Viewport size (top left aligned) in the input image which is to be scaled. -// 3840.0,2160.0, // The size of the input image. -// 2560.0,1440.0); // The output resolution. -//============================================================================================================================== -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// CONSTANT SETUP -//============================================================================================================================== -// Call to setup required constant values (works on CPU or GPU). -A_STATIC void FsrEasuCon( -outAU4 con0, -outAU4 con1, -outAU4 con2, -outAU4 con3, -// This the rendered image resolution being upscaled -AF1 inputViewportInPixelsX, -AF1 inputViewportInPixelsY, -// This is the resolution of the resource containing the input image (useful for dynamic resolution) -AF1 inputSizeInPixelsX, -AF1 inputSizeInPixelsY, -// This is the display resolution which the input image gets upscaled to -AF1 outputSizeInPixelsX, -AF1 outputSizeInPixelsY){ - // Output integer position to a pixel position in viewport. - con0[0]=AU1_AF1(inputViewportInPixelsX*ARcpF1(outputSizeInPixelsX)); - con0[1]=AU1_AF1(inputViewportInPixelsY*ARcpF1(outputSizeInPixelsY)); - con0[2]=AU1_AF1(AF1_(0.5)*inputViewportInPixelsX*ARcpF1(outputSizeInPixelsX)-AF1_(0.5)); - con0[3]=AU1_AF1(AF1_(0.5)*inputViewportInPixelsY*ARcpF1(outputSizeInPixelsY)-AF1_(0.5)); - // Viewport pixel position to normalized image space. - // This is used to get upper-left of 'F' tap. - con1[0]=AU1_AF1(ARcpF1(inputSizeInPixelsX)); - con1[1]=AU1_AF1(ARcpF1(inputSizeInPixelsY)); - // Centers of gather4, first offset from upper-left of 'F'. - // +---+---+ - // | | | - // +--(0)--+ - // | b | c | - // +---F---+---+---+ - // | e | f | g | h | - // +--(1)--+--(2)--+ - // | i | j | k | l | - // +---+---+---+---+ - // | n | o | - // +--(3)--+ - // | | | - // +---+---+ - con1[2]=AU1_AF1(AF1_( 1.0)*ARcpF1(inputSizeInPixelsX)); - con1[3]=AU1_AF1(AF1_(-1.0)*ARcpF1(inputSizeInPixelsY)); - // These are from (0) instead of 'F'. - con2[0]=AU1_AF1(AF1_(-1.0)*ARcpF1(inputSizeInPixelsX)); - con2[1]=AU1_AF1(AF1_( 2.0)*ARcpF1(inputSizeInPixelsY)); - con2[2]=AU1_AF1(AF1_( 1.0)*ARcpF1(inputSizeInPixelsX)); - con2[3]=AU1_AF1(AF1_( 2.0)*ARcpF1(inputSizeInPixelsY)); - con3[0]=AU1_AF1(AF1_( 0.0)*ARcpF1(inputSizeInPixelsX)); - con3[1]=AU1_AF1(AF1_( 4.0)*ARcpF1(inputSizeInPixelsY)); - con3[2]=con3[3]=0;} - -//If the an offset into the input image resource -A_STATIC void FsrEasuConOffset( - outAU4 con0, - outAU4 con1, - outAU4 con2, - outAU4 con3, - // This the rendered image resolution being upscaled - AF1 inputViewportInPixelsX, - AF1 inputViewportInPixelsY, - // This is the resolution of the resource containing the input image (useful for dynamic resolution) - AF1 inputSizeInPixelsX, - AF1 inputSizeInPixelsY, - // This is the display resolution which the input image gets upscaled to - AF1 outputSizeInPixelsX, - AF1 outputSizeInPixelsY, - // This is the input image offset into the resource containing it (useful for dynamic resolution) - AF1 inputOffsetInPixelsX, - AF1 inputOffsetInPixelsY) { - FsrEasuCon(con0, con1, con2, con3, inputViewportInPixelsX, inputViewportInPixelsY, inputSizeInPixelsX, inputSizeInPixelsY, outputSizeInPixelsX, outputSizeInPixelsY); - con0[2] = AU1_AF1(AF1_(0.5) * inputViewportInPixelsX * ARcpF1(outputSizeInPixelsX) - AF1_(0.5) + inputOffsetInPixelsX); - con0[3] = AU1_AF1(AF1_(0.5) * inputViewportInPixelsY * ARcpF1(outputSizeInPixelsY) - AF1_(0.5) + inputOffsetInPixelsY); -} -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// NON-PACKED 32-BIT VERSION -//============================================================================================================================== -#if defined(A_GPU)&&defined(FSR_EASU_F) - // Input callback prototypes, need to be implemented by calling shader - AF4 FsrEasuRF(AF2 p); - AF4 FsrEasuGF(AF2 p); - AF4 FsrEasuBF(AF2 p); -//------------------------------------------------------------------------------------------------------------------------------ - // Filtering for a given tap for the scalar. - void FsrEasuTapF( - inout AF3 aC, // Accumulated color, with negative lobe. - inout AF1 aW, // Accumulated weight. - AF2 off, // Pixel offset from resolve position to tap. - AF2 dir, // Gradient direction. - AF2 len, // Length. - AF1 lob, // Negative lobe strength. - AF1 clp, // Clipping point. - AF3 c){ // Tap color. - // Rotate offset by direction. - AF2 v; - v.x=(off.x*( dir.x))+(off.y*dir.y); - v.y=(off.x*(-dir.y))+(off.y*dir.x); - // Anisotropy. - v*=len; - // Compute distance^2. - AF1 d2=v.x*v.x+v.y*v.y; - // Limit to the window as at corner, 2 taps can easily be outside. - d2=min(d2,clp); - // Approximation of lancos2 without sin() or rcp(), or sqrt() to get x. - // (25/16 * (2/5 * x^2 - 1)^2 - (25/16 - 1)) * (1/4 * x^2 - 1)^2 - // |_______________________________________| |_______________| - // base window - // The general form of the 'base' is, - // (a*(b*x^2-1)^2-(a-1)) - // Where 'a=1/(2*b-b^2)' and 'b' moves around the negative lobe. - AF1 wB=AF1_(2.0/5.0)*d2+AF1_(-1.0); - AF1 wA=lob*d2+AF1_(-1.0); - wB*=wB; - wA*=wA; - wB=AF1_(25.0/16.0)*wB+AF1_(-(25.0/16.0-1.0)); - AF1 w=wB*wA; - // Do weighted average. - aC+=c*w;aW+=w;} -//------------------------------------------------------------------------------------------------------------------------------ - // Accumulate direction and length. - void FsrEasuSetF( - inout AF2 dir, - inout AF1 len, - AF2 pp, - AP1 biS,AP1 biT,AP1 biU,AP1 biV, - AF1 lA,AF1 lB,AF1 lC,AF1 lD,AF1 lE){ - // Compute bilinear weight, branches factor out as predicates are compiler time immediates. - // s t - // u v - AF1 w = AF1_(0.0); - if(biS)w=(AF1_(1.0)-pp.x)*(AF1_(1.0)-pp.y); - if(biT)w= pp.x *(AF1_(1.0)-pp.y); - if(biU)w=(AF1_(1.0)-pp.x)* pp.y ; - if(biV)w= pp.x * pp.y ; - // Direction is the '+' diff. - // a - // b c d - // e - // Then takes magnitude from abs average of both sides of 'c'. - // Length converts gradient reversal to 0, smoothly to non-reversal at 1, shaped, then adding horz and vert terms. - AF1 dc=lD-lC; - AF1 cb=lC-lB; - AF1 lenX=max(abs(dc),abs(cb)); - lenX=APrxLoRcpF1(lenX); - AF1 dirX=lD-lB; - dir.x+=dirX*w; - lenX=ASatF1(abs(dirX)*lenX); - lenX*=lenX; - len+=lenX*w; - // Repeat for the y axis. - AF1 ec=lE-lC; - AF1 ca=lC-lA; - AF1 lenY=max(abs(ec),abs(ca)); - lenY=APrxLoRcpF1(lenY); - AF1 dirY=lE-lA; - dir.y+=dirY*w; - lenY=ASatF1(abs(dirY)*lenY); - lenY*=lenY; - len+=lenY*w;} -//------------------------------------------------------------------------------------------------------------------------------ - void FsrEasuF( - out AF3 pix, - AU2 ip, // Integer pixel position in output. - AU4 con0, // Constants generated by FsrEasuCon(). - AU4 con1, - AU4 con2, - AU4 con3){ -//------------------------------------------------------------------------------------------------------------------------------ - // Get position of 'f'. - AF2 pp=AF2(ip)*AF2_AU2(con0.xy)+AF2_AU2(con0.zw); - AF2 fp=floor(pp); - pp-=fp; -//------------------------------------------------------------------------------------------------------------------------------ - // 12-tap kernel. - // b c - // e f g h - // i j k l - // n o - // Gather 4 ordering. - // a b - // r g - // For packed FP16, need either {rg} or {ab} so using the following setup for gather in all versions, - // a b <- unused (z) - // r g - // a b a b - // r g r g - // a b - // r g <- unused (z) - // Allowing dead-code removal to remove the 'z's. - AF2 p0=fp*AF2_AU2(con1.xy)+AF2_AU2(con1.zw); - // These are from p0 to avoid pulling two constants on pre-Navi hardware. - AF2 p1=p0+AF2_AU2(con2.xy); - AF2 p2=p0+AF2_AU2(con2.zw); - AF2 p3=p0+AF2_AU2(con3.xy); - AF4 bczzR=FsrEasuRF(p0); - AF4 bczzG=FsrEasuGF(p0); - AF4 bczzB=FsrEasuBF(p0); - AF4 ijfeR=FsrEasuRF(p1); - AF4 ijfeG=FsrEasuGF(p1); - AF4 ijfeB=FsrEasuBF(p1); - AF4 klhgR=FsrEasuRF(p2); - AF4 klhgG=FsrEasuGF(p2); - AF4 klhgB=FsrEasuBF(p2); - AF4 zzonR=FsrEasuRF(p3); - AF4 zzonG=FsrEasuGF(p3); - AF4 zzonB=FsrEasuBF(p3); -//------------------------------------------------------------------------------------------------------------------------------ - // Simplest multi-channel approximate luma possible (luma times 2, in 2 FMA/MAD). - AF4 bczzL=bczzB*AF4_(0.5)+(bczzR*AF4_(0.5)+bczzG); - AF4 ijfeL=ijfeB*AF4_(0.5)+(ijfeR*AF4_(0.5)+ijfeG); - AF4 klhgL=klhgB*AF4_(0.5)+(klhgR*AF4_(0.5)+klhgG); - AF4 zzonL=zzonB*AF4_(0.5)+(zzonR*AF4_(0.5)+zzonG); - // Rename. - AF1 bL=bczzL.x; - AF1 cL=bczzL.y; - AF1 iL=ijfeL.x; - AF1 jL=ijfeL.y; - AF1 fL=ijfeL.z; - AF1 eL=ijfeL.w; - AF1 kL=klhgL.x; - AF1 lL=klhgL.y; - AF1 hL=klhgL.z; - AF1 gL=klhgL.w; - AF1 oL=zzonL.z; - AF1 nL=zzonL.w; - // Accumulate for bilinear interpolation. - AF2 dir=AF2_(0.0); - AF1 len=AF1_(0.0); - FsrEasuSetF(dir,len,pp,true, false,false,false,bL,eL,fL,gL,jL); - FsrEasuSetF(dir,len,pp,false,true ,false,false,cL,fL,gL,hL,kL); - FsrEasuSetF(dir,len,pp,false,false,true ,false,fL,iL,jL,kL,nL); - FsrEasuSetF(dir,len,pp,false,false,false,true ,gL,jL,kL,lL,oL); -//------------------------------------------------------------------------------------------------------------------------------ - // Normalize with approximation, and cleanup close to zero. - AF2 dir2=dir*dir; - AF1 dirR=dir2.x+dir2.y; - AP1 zro=dirR w = -m/(n+e+w+s) -// 1 == (w*(n+e+w+s)+m)/(4*w+1) -> w = (1-m)/(n+e+w+s-4*1) -// Then chooses the 'w' which results in no clipping, limits 'w', and multiplies by the 'sharp' amount. -// This solution above has issues with MSAA input as the steps along the gradient cause edge detection issues. -// So RCAS uses 4x the maximum and 4x the minimum (depending on equation)in place of the individual taps. -// As well as switching from 'm' to either the minimum or maximum (depending on side), to help in energy conservation. -// This stabilizes RCAS. -// RCAS does a simple highpass which is normalized against the local contrast then shaped, -// 0.25 -// 0.25 -1 0.25 -// 0.25 -// This is used as a noise detection filter, to reduce the effect of RCAS on grain, and focus on real edges. -// -// GLSL example for the required callbacks : -// -// AH4 FsrRcasLoadH(ASW2 p){return AH4(imageLoad(imgSrc,ASU2(p)));} -// void FsrRcasInputH(inout AH1 r,inout AH1 g,inout AH1 b) -// { -// //do any simple input color conversions here or leave empty if none needed -// } -// -// FsrRcasCon need to be called from the CPU or GPU to set up constants. -// Including a GPU example here, the 'con' value would be stored out to a constant buffer. -// -// AU4 con; -// FsrRcasCon(con, -// 0.0); // The scale is {0.0 := maximum sharpness, to N>0, where N is the number of stops (halving) of the reduction of sharpness}. -// --------------- -// RCAS sharpening supports a CAS-like pass-through alpha via, -// #define FSR_RCAS_PASSTHROUGH_ALPHA 1 -// RCAS also supports a define to enable a more expensive path to avoid some sharpening of noise. -// Would suggest it is better to apply film grain after RCAS sharpening (and after scaling) instead of using this define, -// #define FSR_RCAS_DENOISE 1 -//============================================================================================================================== -// This is set at the limit of providing unnatural results for sharpening. -#define FSR_RCAS_LIMIT (0.25-(1.0/16.0)) -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// CONSTANT SETUP -//============================================================================================================================== -// Call to setup required constant values (works on CPU or GPU). -A_STATIC void FsrRcasCon( -outAU4 con, -// The scale is {0.0 := maximum, to N>0, where N is the number of stops (halving) of the reduction of sharpness}. -AF1 sharpness){ - // Transform from stops to linear value. - sharpness=AExp2F1(-sharpness); - varAF2(hSharp)=initAF2(sharpness,sharpness); - con[0]=AU1_AF1(sharpness); - con[1]=AU1_AH2_AF2(hSharp); - con[2]=0; - con[3]=0;} -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// NON-PACKED 32-BIT VERSION -//============================================================================================================================== -#if defined(A_GPU)&&defined(FSR_RCAS_F) - // Input callback prototypes that need to be implemented by calling shader - AF4 FsrRcasLoadF(ASU2 p); - void FsrRcasInputF(inout AF1 r,inout AF1 g,inout AF1 b); -//------------------------------------------------------------------------------------------------------------------------------ - void FsrRcasF( - out AF1 pixR, // Output values, non-vector so port between RcasFilter() and RcasFilterH() is easy. - out AF1 pixG, - out AF1 pixB, - #ifdef FSR_RCAS_PASSTHROUGH_ALPHA - out AF1 pixA, - #endif - AU2 ip, // Integer pixel position in output. - AU4 con){ // Constant generated by RcasSetup(). - // Algorithm uses minimal 3x3 pixel neighborhood. - // b - // d e f - // h - ASU2 sp=ASU2(ip); - AF3 b=FsrRcasLoadF(sp+ASU2( 0,-1)).rgb; - AF3 d=FsrRcasLoadF(sp+ASU2(-1, 0)).rgb; - #ifdef FSR_RCAS_PASSTHROUGH_ALPHA - AF4 ee=FsrRcasLoadF(sp); - AF3 e=ee.rgb;pixA=ee.a; - #else - AF3 e=FsrRcasLoadF(sp).rgb; - #endif - AF3 f=FsrRcasLoadF(sp+ASU2( 1, 0)).rgb; - AF3 h=FsrRcasLoadF(sp+ASU2( 0, 1)).rgb; - // Rename (32-bit) or regroup (16-bit). - AF1 bR=b.r; - AF1 bG=b.g; - AF1 bB=b.b; - AF1 dR=d.r; - AF1 dG=d.g; - AF1 dB=d.b; - AF1 eR=e.r; - AF1 eG=e.g; - AF1 eB=e.b; - AF1 fR=f.r; - AF1 fG=f.g; - AF1 fB=f.b; - AF1 hR=h.r; - AF1 hG=h.g; - AF1 hB=h.b; - // Run optional input transform. - FsrRcasInputF(bR,bG,bB); - FsrRcasInputF(dR,dG,dB); - FsrRcasInputF(eR,eG,eB); - FsrRcasInputF(fR,fG,fB); - FsrRcasInputF(hR,hG,hB); - // Luma times 2. - AF1 bL=bB*AF1_(0.5)+(bR*AF1_(0.5)+bG); - AF1 dL=dB*AF1_(0.5)+(dR*AF1_(0.5)+dG); - AF1 eL=eB*AF1_(0.5)+(eR*AF1_(0.5)+eG); - AF1 fL=fB*AF1_(0.5)+(fR*AF1_(0.5)+fG); - AF1 hL=hB*AF1_(0.5)+(hR*AF1_(0.5)+hG); - // Noise detection. - AF1 nz=AF1_(0.25)*bL+AF1_(0.25)*dL+AF1_(0.25)*fL+AF1_(0.25)*hL-eL; - nz=ASatF1(abs(nz)*APrxMedRcpF1(AMax3F1(AMax3F1(bL,dL,eL),fL,hL)-AMin3F1(AMin3F1(bL,dL,eL),fL,hL))); - nz=AF1_(-0.5)*nz+AF1_(1.0); - // Min and max of ring. - AF1 mn4R=min(AMin3F1(bR,dR,fR),hR); - AF1 mn4G=min(AMin3F1(bG,dG,fG),hG); - AF1 mn4B=min(AMin3F1(bB,dB,fB),hB); - AF1 mx4R=max(AMax3F1(bR,dR,fR),hR); - AF1 mx4G=max(AMax3F1(bG,dG,fG),hG); - AF1 mx4B=max(AMax3F1(bB,dB,fB),hB); - // Immediate constants for peak range. - AF2 peakC=AF2(1.0,-1.0*4.0); - // Limiters, these need to be high precision RCPs. - AF1 hitMinR=min(mn4R,eR)*ARcpF1(AF1_(4.0)*mx4R); - AF1 hitMinG=min(mn4G,eG)*ARcpF1(AF1_(4.0)*mx4G); - AF1 hitMinB=min(mn4B,eB)*ARcpF1(AF1_(4.0)*mx4B); - AF1 hitMaxR=(peakC.x-max(mx4R,eR))*ARcpF1(AF1_(4.0)*mn4R+peakC.y); - AF1 hitMaxG=(peakC.x-max(mx4G,eG))*ARcpF1(AF1_(4.0)*mn4G+peakC.y); - AF1 hitMaxB=(peakC.x-max(mx4B,eB))*ARcpF1(AF1_(4.0)*mn4B+peakC.y); - AF1 lobeR=max(-hitMinR,hitMaxR); - AF1 lobeG=max(-hitMinG,hitMaxG); - AF1 lobeB=max(-hitMinB,hitMaxB); - AF1 lobe=max(AF1_(-FSR_RCAS_LIMIT),min(AMax3F1(lobeR,lobeG,lobeB),AF1_(0.0)))*AF1_AU1(con.x); - // Apply noise removal. - #ifdef FSR_RCAS_DENOISE - lobe*=nz; - #endif - // Resolve, which needs the medium precision rcp approximation to avoid visible tonality changes. - AF1 rcpL=APrxMedRcpF1(AF1_(4.0)*lobe+AF1_(1.0)); - pixR=(lobe*bR+lobe*dR+lobe*hR+lobe*fR+eR)*rcpL; - pixG=(lobe*bG+lobe*dG+lobe*hG+lobe*fG+eG)*rcpL; - pixB=(lobe*bB+lobe*dB+lobe*hB+lobe*fB+eB)*rcpL; - return;} -#endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// NON-PACKED 16-BIT VERSION -//============================================================================================================================== -#if defined(A_GPU)&&defined(A_HALF)&&defined(FSR_RCAS_H) - // Input callback prototypes that need to be implemented by calling shader - AH4 FsrRcasLoadH(ASW2 p); - void FsrRcasInputH(inout AH1 r,inout AH1 g,inout AH1 b); -//------------------------------------------------------------------------------------------------------------------------------ - void FsrRcasH( - out AH1 pixR, // Output values, non-vector so port between RcasFilter() and RcasFilterH() is easy. - out AH1 pixG, - out AH1 pixB, - #ifdef FSR_RCAS_PASSTHROUGH_ALPHA - out AH1 pixA, - #endif - AU2 ip, // Integer pixel position in output. - AU4 con){ // Constant generated by RcasSetup(). - // Sharpening algorithm uses minimal 3x3 pixel neighborhood. - // b - // d e f - // h - ASW2 sp=ASW2(ip); - AH3 b=FsrRcasLoadH(sp+ASW2( 0,-1)).rgb; - AH3 d=FsrRcasLoadH(sp+ASW2(-1, 0)).rgb; - #ifdef FSR_RCAS_PASSTHROUGH_ALPHA - AH4 ee=FsrRcasLoadH(sp); - AH3 e=ee.rgb;pixA=ee.a; - #else - AH3 e=FsrRcasLoadH(sp).rgb; - #endif - AH3 f=FsrRcasLoadH(sp+ASW2( 1, 0)).rgb; - AH3 h=FsrRcasLoadH(sp+ASW2( 0, 1)).rgb; - // Rename (32-bit) or regroup (16-bit). - AH1 bR=b.r; - AH1 bG=b.g; - AH1 bB=b.b; - AH1 dR=d.r; - AH1 dG=d.g; - AH1 dB=d.b; - AH1 eR=e.r; - AH1 eG=e.g; - AH1 eB=e.b; - AH1 fR=f.r; - AH1 fG=f.g; - AH1 fB=f.b; - AH1 hR=h.r; - AH1 hG=h.g; - AH1 hB=h.b; - // Run optional input transform. - FsrRcasInputH(bR,bG,bB); - FsrRcasInputH(dR,dG,dB); - FsrRcasInputH(eR,eG,eB); - FsrRcasInputH(fR,fG,fB); - FsrRcasInputH(hR,hG,hB); - // Luma times 2. - AH1 bL=bB*AH1_(0.5)+(bR*AH1_(0.5)+bG); - AH1 dL=dB*AH1_(0.5)+(dR*AH1_(0.5)+dG); - AH1 eL=eB*AH1_(0.5)+(eR*AH1_(0.5)+eG); - AH1 fL=fB*AH1_(0.5)+(fR*AH1_(0.5)+fG); - AH1 hL=hB*AH1_(0.5)+(hR*AH1_(0.5)+hG); - // Noise detection. - AH1 nz=AH1_(0.25)*bL+AH1_(0.25)*dL+AH1_(0.25)*fL+AH1_(0.25)*hL-eL; - nz=ASatH1(abs(nz)*APrxMedRcpH1(AMax3H1(AMax3H1(bL,dL,eL),fL,hL)-AMin3H1(AMin3H1(bL,dL,eL),fL,hL))); - nz=AH1_(-0.5)*nz+AH1_(1.0); - // Min and max of ring. - AH1 mn4R=min(AMin3H1(bR,dR,fR),hR); - AH1 mn4G=min(AMin3H1(bG,dG,fG),hG); - AH1 mn4B=min(AMin3H1(bB,dB,fB),hB); - AH1 mx4R=max(AMax3H1(bR,dR,fR),hR); - AH1 mx4G=max(AMax3H1(bG,dG,fG),hG); - AH1 mx4B=max(AMax3H1(bB,dB,fB),hB); - // Immediate constants for peak range. - AH2 peakC=AH2(1.0,-1.0*4.0); - // Limiters, these need to be high precision RCPs. - AH1 hitMinR=min(mn4R,eR)*ARcpH1(AH1_(4.0)*mx4R); - AH1 hitMinG=min(mn4G,eG)*ARcpH1(AH1_(4.0)*mx4G); - AH1 hitMinB=min(mn4B,eB)*ARcpH1(AH1_(4.0)*mx4B); - AH1 hitMaxR=(peakC.x-max(mx4R,eR))*ARcpH1(AH1_(4.0)*mn4R+peakC.y); - AH1 hitMaxG=(peakC.x-max(mx4G,eG))*ARcpH1(AH1_(4.0)*mn4G+peakC.y); - AH1 hitMaxB=(peakC.x-max(mx4B,eB))*ARcpH1(AH1_(4.0)*mn4B+peakC.y); - AH1 lobeR=max(-hitMinR,hitMaxR); - AH1 lobeG=max(-hitMinG,hitMaxG); - AH1 lobeB=max(-hitMinB,hitMaxB); - AH1 lobe=max(AH1_(-FSR_RCAS_LIMIT),min(AMax3H1(lobeR,lobeG,lobeB),AH1_(0.0)))*AH2_AU1(con.y).x; - // Apply noise removal. - #ifdef FSR_RCAS_DENOISE - lobe*=nz; - #endif - // Resolve, which needs the medium precision rcp approximation to avoid visible tonality changes. - AH1 rcpL=APrxMedRcpH1(AH1_(4.0)*lobe+AH1_(1.0)); - pixR=(lobe*bR+lobe*dR+lobe*hR+lobe*fR+eR)*rcpL; - pixG=(lobe*bG+lobe*dG+lobe*hG+lobe*fG+eG)*rcpL; - pixB=(lobe*bB+lobe*dB+lobe*hB+lobe*fB+eB)*rcpL;} -#endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// PACKED 16-BIT VERSION -//============================================================================================================================== -#if defined(A_GPU)&&defined(A_HALF)&&defined(FSR_RCAS_HX2) - // Input callback prototypes that need to be implemented by the calling shader - AH4 FsrRcasLoadHx2(ASW2 p); - void FsrRcasInputHx2(inout AH2 r,inout AH2 g,inout AH2 b); -//------------------------------------------------------------------------------------------------------------------------------ - // Can be used to convert from packed Structures of Arrays to Arrays of Structures for store. - void FsrRcasDepackHx2(out AH4 pix0,out AH4 pix1,AH2 pixR,AH2 pixG,AH2 pixB){ - #ifdef A_HLSL - // Invoke a slower path for DX only, since it won't allow uninitialized values. - pix0.a=pix1.a=0.0; - #endif - pix0.rgb=AH3(pixR.x,pixG.x,pixB.x); - pix1.rgb=AH3(pixR.y,pixG.y,pixB.y);} -//------------------------------------------------------------------------------------------------------------------------------ - void FsrRcasHx2( - // Output values are for 2 8x8 tiles in a 16x8 region. - // pix.x = left 8x8 tile - // pix.y = right 8x8 tile - // This enables later processing to easily be packed as well. - out AH2 pixR, - out AH2 pixG, - out AH2 pixB, - #ifdef FSR_RCAS_PASSTHROUGH_ALPHA - out AH2 pixA, - #endif - AU2 ip, // Integer pixel position in output. - AU4 con){ // Constant generated by RcasSetup(). - // No scaling algorithm uses minimal 3x3 pixel neighborhood. - ASW2 sp0=ASW2(ip); - AH3 b0=FsrRcasLoadHx2(sp0+ASW2( 0,-1)).rgb; - AH3 d0=FsrRcasLoadHx2(sp0+ASW2(-1, 0)).rgb; - #ifdef FSR_RCAS_PASSTHROUGH_ALPHA - AH4 ee0=FsrRcasLoadHx2(sp0); - AH3 e0=ee0.rgb;pixA.r=ee0.a; - #else - AH3 e0=FsrRcasLoadHx2(sp0).rgb; - #endif - AH3 f0=FsrRcasLoadHx2(sp0+ASW2( 1, 0)).rgb; - AH3 h0=FsrRcasLoadHx2(sp0+ASW2( 0, 1)).rgb; - ASW2 sp1=sp0+ASW2(8,0); - AH3 b1=FsrRcasLoadHx2(sp1+ASW2( 0,-1)).rgb; - AH3 d1=FsrRcasLoadHx2(sp1+ASW2(-1, 0)).rgb; - #ifdef FSR_RCAS_PASSTHROUGH_ALPHA - AH4 ee1=FsrRcasLoadHx2(sp1); - AH3 e1=ee1.rgb;pixA.g=ee1.a; - #else - AH3 e1=FsrRcasLoadHx2(sp1).rgb; - #endif - AH3 f1=FsrRcasLoadHx2(sp1+ASW2( 1, 0)).rgb; - AH3 h1=FsrRcasLoadHx2(sp1+ASW2( 0, 1)).rgb; - // Arrays of Structures to Structures of Arrays conversion. - AH2 bR=AH2(b0.r,b1.r); - AH2 bG=AH2(b0.g,b1.g); - AH2 bB=AH2(b0.b,b1.b); - AH2 dR=AH2(d0.r,d1.r); - AH2 dG=AH2(d0.g,d1.g); - AH2 dB=AH2(d0.b,d1.b); - AH2 eR=AH2(e0.r,e1.r); - AH2 eG=AH2(e0.g,e1.g); - AH2 eB=AH2(e0.b,e1.b); - AH2 fR=AH2(f0.r,f1.r); - AH2 fG=AH2(f0.g,f1.g); - AH2 fB=AH2(f0.b,f1.b); - AH2 hR=AH2(h0.r,h1.r); - AH2 hG=AH2(h0.g,h1.g); - AH2 hB=AH2(h0.b,h1.b); - // Run optional input transform. - FsrRcasInputHx2(bR,bG,bB); - FsrRcasInputHx2(dR,dG,dB); - FsrRcasInputHx2(eR,eG,eB); - FsrRcasInputHx2(fR,fG,fB); - FsrRcasInputHx2(hR,hG,hB); - // Luma times 2. - AH2 bL=bB*AH2_(0.5)+(bR*AH2_(0.5)+bG); - AH2 dL=dB*AH2_(0.5)+(dR*AH2_(0.5)+dG); - AH2 eL=eB*AH2_(0.5)+(eR*AH2_(0.5)+eG); - AH2 fL=fB*AH2_(0.5)+(fR*AH2_(0.5)+fG); - AH2 hL=hB*AH2_(0.5)+(hR*AH2_(0.5)+hG); - // Noise detection. - AH2 nz=AH2_(0.25)*bL+AH2_(0.25)*dL+AH2_(0.25)*fL+AH2_(0.25)*hL-eL; - nz=ASatH2(abs(nz)*APrxMedRcpH2(AMax3H2(AMax3H2(bL,dL,eL),fL,hL)-AMin3H2(AMin3H2(bL,dL,eL),fL,hL))); - nz=AH2_(-0.5)*nz+AH2_(1.0); - // Min and max of ring. - AH2 mn4R=min(AMin3H2(bR,dR,fR),hR); - AH2 mn4G=min(AMin3H2(bG,dG,fG),hG); - AH2 mn4B=min(AMin3H2(bB,dB,fB),hB); - AH2 mx4R=max(AMax3H2(bR,dR,fR),hR); - AH2 mx4G=max(AMax3H2(bG,dG,fG),hG); - AH2 mx4B=max(AMax3H2(bB,dB,fB),hB); - // Immediate constants for peak range. - AH2 peakC=AH2(1.0,-1.0*4.0); - // Limiters, these need to be high precision RCPs. - AH2 hitMinR=min(mn4R,eR)*ARcpH2(AH2_(4.0)*mx4R); - AH2 hitMinG=min(mn4G,eG)*ARcpH2(AH2_(4.0)*mx4G); - AH2 hitMinB=min(mn4B,eB)*ARcpH2(AH2_(4.0)*mx4B); - AH2 hitMaxR=(peakC.x-max(mx4R,eR))*ARcpH2(AH2_(4.0)*mn4R+peakC.y); - AH2 hitMaxG=(peakC.x-max(mx4G,eG))*ARcpH2(AH2_(4.0)*mn4G+peakC.y); - AH2 hitMaxB=(peakC.x-max(mx4B,eB))*ARcpH2(AH2_(4.0)*mn4B+peakC.y); - AH2 lobeR=max(-hitMinR,hitMaxR); - AH2 lobeG=max(-hitMinG,hitMaxG); - AH2 lobeB=max(-hitMinB,hitMaxB); - AH2 lobe=max(AH2_(-FSR_RCAS_LIMIT),min(AMax3H2(lobeR,lobeG,lobeB),AH2_(0.0)))*AH2_(AH2_AU1(con.y).x); - // Apply noise removal. - #ifdef FSR_RCAS_DENOISE - lobe*=nz; - #endif - // Resolve, which needs the medium precision rcp approximation to avoid visible tonality changes. - AH2 rcpL=APrxMedRcpH2(AH2_(4.0)*lobe+AH2_(1.0)); - pixR=(lobe*bR+lobe*dR+lobe*hR+lobe*fR+eR)*rcpL; - pixG=(lobe*bG+lobe*dG+lobe*hG+lobe*fG+eG)*rcpL; - pixB=(lobe*bB+lobe*dB+lobe*hB+lobe*fB+eB)*rcpL;} -#endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// -// FSR - [LFGA] LINEAR FILM GRAIN APPLICATOR -// -//------------------------------------------------------------------------------------------------------------------------------ -// Adding output-resolution film grain after scaling is a good way to mask both rendering and scaling artifacts. -// Suggest using tiled blue noise as film grain input, with peak noise frequency set for a specific look and feel. -// The 'Lfga*()' functions provide a convenient way to introduce grain. -// These functions limit grain based on distance to signal limits. -// This is done so that the grain is temporally energy preserving, and thus won't modify image tonality. -// Grain application should be done in a linear colorspace. -// The grain should be temporally changing, but have a temporal sum per pixel that adds to zero (non-biased). -//------------------------------------------------------------------------------------------------------------------------------ -// Usage, -// FsrLfga*( -// color, // In/out linear colorspace color {0 to 1} ranged. -// grain, // Per pixel grain texture value {-0.5 to 0.5} ranged, input is 3-channel to support colored grain. -// amount); // Amount of grain (0 to 1} ranged. -//------------------------------------------------------------------------------------------------------------------------------ -// Example if grain texture is monochrome: 'FsrLfgaF(color,AF3_(grain),amount)' -//============================================================================================================================== -#if defined(A_GPU) - // Maximum grain is the minimum distance to the signal limit. - void FsrLfgaF(inout AF3 c,AF3 t,AF1 a){c+=(t*AF3_(a))*min(AF3_(1.0)-c,c);} -#endif -//============================================================================================================================== -#if defined(A_GPU)&&defined(A_HALF) - // Half precision version (slower). - void FsrLfgaH(inout AH3 c,AH3 t,AH1 a){c+=(t*AH3_(a))*min(AH3_(1.0)-c,c);} -//------------------------------------------------------------------------------------------------------------------------------ - // Packed half precision version (faster). - void FsrLfgaHx2(inout AH2 cR,inout AH2 cG,inout AH2 cB,AH2 tR,AH2 tG,AH2 tB,AH1 a){ - cR+=(tR*AH2_(a))*min(AH2_(1.0)-cR,cR);cG+=(tG*AH2_(a))*min(AH2_(1.0)-cG,cG);cB+=(tB*AH2_(a))*min(AH2_(1.0)-cB,cB);} -#endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// -// FSR - [SRTM] SIMPLE REVERSIBLE TONE-MAPPER -// -//------------------------------------------------------------------------------------------------------------------------------ -// This provides a way to take linear HDR color {0 to FP16_MAX} and convert it into a temporary {0 to 1} ranged post-tonemapped linear. -// The tonemapper preserves RGB ratio, which helps maintain HDR color bleed during filtering. -//------------------------------------------------------------------------------------------------------------------------------ -// Reversible tonemapper usage, -// FsrSrtm*(color); // {0 to FP16_MAX} converted to {0 to 1}. -// FsrSrtmInv*(color); // {0 to 1} converted into {0 to 32768, output peak safe for FP16}. -//============================================================================================================================== -#if defined(A_GPU) - void FsrSrtmF(inout AF3 c){c*=AF3_(ARcpF1(AMax3F1(c.r,c.g,c.b)+AF1_(1.0)));} - // The extra max solves the c=1.0 case (which is a /0). - void FsrSrtmInvF(inout AF3 c){c*=AF3_(ARcpF1(max(AF1_(1.0/32768.0),AF1_(1.0)-AMax3F1(c.r,c.g,c.b))));} -#endif -//============================================================================================================================== -#if defined(A_GPU)&&defined(A_HALF) - void FsrSrtmH(inout AH3 c){c*=AH3_(ARcpH1(AMax3H1(c.r,c.g,c.b)+AH1_(1.0)));} - void FsrSrtmInvH(inout AH3 c){c*=AH3_(ARcpH1(max(AH1_(1.0/32768.0),AH1_(1.0)-AMax3H1(c.r,c.g,c.b))));} -//------------------------------------------------------------------------------------------------------------------------------ - void FsrSrtmHx2(inout AH2 cR,inout AH2 cG,inout AH2 cB){ - AH2 rcp=ARcpH2(AMax3H2(cR,cG,cB)+AH2_(1.0));cR*=rcp;cG*=rcp;cB*=rcp;} - void FsrSrtmInvHx2(inout AH2 cR,inout AH2 cG,inout AH2 cB){ - AH2 rcp=ARcpH2(max(AH2_(1.0/32768.0),AH2_(1.0)-AMax3H2(cR,cG,cB)));cR*=rcp;cG*=rcp;cB*=rcp;} -#endif -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -//_____________________________________________________________/\_______________________________________________________________ -//============================================================================================================================== -// -// FSR - [TEPD] TEMPORAL ENERGY PRESERVING DITHER -// -//------------------------------------------------------------------------------------------------------------------------------ -// Temporally energy preserving dithered {0 to 1} linear to gamma 2.0 conversion. -// Gamma 2.0 is used so that the conversion back to linear is just to square the color. -// The conversion comes in 8-bit and 10-bit modes, designed for output to 8-bit UNORM or 10:10:10:2 respectively. -// Given good non-biased temporal blue noise as dither input, -// the output dither will temporally conserve energy. -// This is done by choosing the linear nearest step point instead of perceptual nearest. -// See code below for details. -//------------------------------------------------------------------------------------------------------------------------------ -// DX SPEC RULES FOR FLOAT->UNORM 8-BIT CONVERSION -// =============================================== -// - Output is 'uint(floor(saturate(n)*255.0+0.5))'. -// - Thus rounding is to nearest. -// - NaN gets converted to zero. -// - INF is clamped to {0.0 to 1.0}. -//============================================================================================================================== -#if defined(A_GPU) - // Hand tuned integer position to dither value, with more values than simple checkerboard. - // Only 32-bit has enough precision for this compddation. - // Output is {0 to <1}. - AF1 FsrTepdDitF(AU2 p,AU1 f){ - AF1 x=AF1_(p.x+f); - AF1 y=AF1_(p.y); - // The 1.61803 golden ratio. - AF1 a=AF1_((1.0+sqrt(5.0))/2.0); - // Number designed to provide a good visual pattern. - AF1 b=AF1_(1.0/3.69); - x=x*a+(y*b); - return AFractF1(x);} -//------------------------------------------------------------------------------------------------------------------------------ - // This version is 8-bit gamma 2.0. - // The 'c' input is {0 to 1}. - // Output is {0 to 1} ready for image store. - void FsrTepdC8F(inout AF3 c,AF1 dit){ - AF3 n=sqrt(c); - n=floor(n*AF3_(255.0))*AF3_(1.0/255.0); - AF3 a=n*n; - AF3 b=n+AF3_(1.0/255.0);b=b*b; - // Ratio of 'a' to 'b' required to produce 'c'. - // APrxLoRcpF1() won't work here (at least for very high dynamic ranges). - // APrxMedRcpF1() is an IADD,FMA,MUL. - AF3 r=(c-b)*APrxMedRcpF3(a-b); - // Use the ratio as a cutoff to choose 'a' or 'b'. - // AGtZeroF1() is a MUL. - c=ASatF3(n+AGtZeroF3(AF3_(dit)-r)*AF3_(1.0/255.0));} -//------------------------------------------------------------------------------------------------------------------------------ - // This version is 10-bit gamma 2.0. - // The 'c' input is {0 to 1}. - // Output is {0 to 1} ready for image store. - void FsrTepdC10F(inout AF3 c,AF1 dit){ - AF3 n=sqrt(c); - n=floor(n*AF3_(1023.0))*AF3_(1.0/1023.0); - AF3 a=n*n; - AF3 b=n+AF3_(1.0/1023.0);b=b*b; - AF3 r=(c-b)*APrxMedRcpF3(a-b); - c=ASatF3(n+AGtZeroF3(AF3_(dit)-r)*AF3_(1.0/1023.0));} -#endif -//============================================================================================================================== -#if defined(A_GPU)&&defined(A_HALF) - AH1 FsrTepdDitH(AU2 p,AU1 f){ - AF1 x=AF1_(p.x+f); - AF1 y=AF1_(p.y); - AF1 a=AF1_((1.0+sqrt(5.0))/2.0); - AF1 b=AF1_(1.0/3.69); - x=x*a+(y*b); - return AH1(AFractF1(x));} -//------------------------------------------------------------------------------------------------------------------------------ - void FsrTepdC8H(inout AH3 c,AH1 dit){ - AH3 n=sqrt(c); - n=floor(n*AH3_(255.0))*AH3_(1.0/255.0); - AH3 a=n*n; - AH3 b=n+AH3_(1.0/255.0);b=b*b; - AH3 r=(c-b)*APrxMedRcpH3(a-b); - c=ASatH3(n+AGtZeroH3(AH3_(dit)-r)*AH3_(1.0/255.0));} -//------------------------------------------------------------------------------------------------------------------------------ - void FsrTepdC10H(inout AH3 c,AH1 dit){ - AH3 n=sqrt(c); - n=floor(n*AH3_(1023.0))*AH3_(1.0/1023.0); - AH3 a=n*n; - AH3 b=n+AH3_(1.0/1023.0);b=b*b; - AH3 r=(c-b)*APrxMedRcpH3(a-b); - c=ASatH3(n+AGtZeroH3(AH3_(dit)-r)*AH3_(1.0/1023.0));} -//============================================================================================================================== - // This computes dither for positions 'p' and 'p+{8,0}'. - AH2 FsrTepdDitHx2(AU2 p,AU1 f){ - AF2 x; - x.x=AF1_(p.x+f); - x.y=x.x+AF1_(8.0); - AF1 y=AF1_(p.y); - AF1 a=AF1_((1.0+sqrt(5.0))/2.0); - AF1 b=AF1_(1.0/3.69); - x=x*AF2_(a)+AF2_(y*b); - return AH2(AFractF2(x));} -//------------------------------------------------------------------------------------------------------------------------------ - void FsrTepdC8Hx2(inout AH2 cR,inout AH2 cG,inout AH2 cB,AH2 dit){ - AH2 nR=sqrt(cR); - AH2 nG=sqrt(cG); - AH2 nB=sqrt(cB); - nR=floor(nR*AH2_(255.0))*AH2_(1.0/255.0); - nG=floor(nG*AH2_(255.0))*AH2_(1.0/255.0); - nB=floor(nB*AH2_(255.0))*AH2_(1.0/255.0); - AH2 aR=nR*nR; - AH2 aG=nG*nG; - AH2 aB=nB*nB; - AH2 bR=nR+AH2_(1.0/255.0);bR=bR*bR; - AH2 bG=nG+AH2_(1.0/255.0);bG=bG*bG; - AH2 bB=nB+AH2_(1.0/255.0);bB=bB*bB; - AH2 rR=(cR-bR)*APrxMedRcpH2(aR-bR); - AH2 rG=(cG-bG)*APrxMedRcpH2(aG-bG); - AH2 rB=(cB-bB)*APrxMedRcpH2(aB-bB); - cR=ASatH2(nR+AGtZeroH2(dit-rR)*AH2_(1.0/255.0)); - cG=ASatH2(nG+AGtZeroH2(dit-rG)*AH2_(1.0/255.0)); - cB=ASatH2(nB+AGtZeroH2(dit-rB)*AH2_(1.0/255.0));} -//------------------------------------------------------------------------------------------------------------------------------ - void FsrTepdC10Hx2(inout AH2 cR,inout AH2 cG,inout AH2 cB,AH2 dit){ - AH2 nR=sqrt(cR); - AH2 nG=sqrt(cG); - AH2 nB=sqrt(cB); - nR=floor(nR*AH2_(1023.0))*AH2_(1.0/1023.0); - nG=floor(nG*AH2_(1023.0))*AH2_(1.0/1023.0); - nB=floor(nB*AH2_(1023.0))*AH2_(1.0/1023.0); - AH2 aR=nR*nR; - AH2 aG=nG*nG; - AH2 aB=nB*nB; - AH2 bR=nR+AH2_(1.0/1023.0);bR=bR*bR; - AH2 bG=nG+AH2_(1.0/1023.0);bG=bG*bG; - AH2 bB=nB+AH2_(1.0/1023.0);bB=bB*bB; - AH2 rR=(cR-bR)*APrxMedRcpH2(aR-bR); - AH2 rG=(cG-bG)*APrxMedRcpH2(aG-bG); - AH2 rB=(cB-bB)*APrxMedRcpH2(aB-bB); - cR=ASatH2(nR+AGtZeroH2(dit-rR)*AH2_(1.0/1023.0)); - cG=ASatH2(nG+AGtZeroH2(dit-rG)*AH2_(1.0/1023.0)); - cB=ASatH2(nB+AGtZeroH2(dit-rB)*AH2_(1.0/1023.0));} -#endif diff --git a/thirdparty/amd-fsr2/LICENSE.txt b/thirdparty/amd-fsr2/LICENSE.txt deleted file mode 100644 index c066ae106362..000000000000 --- a/thirdparty/amd-fsr2/LICENSE.txt +++ /dev/null @@ -1,21 +0,0 @@ -FidelityFX Super Resolution 2.2 -================================= -Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. diff --git a/thirdparty/amd-fsr2/ffx_error.h b/thirdparty/amd-fsr2/ffx_error.h deleted file mode 100644 index 7ba7d9c4ea47..000000000000 --- a/thirdparty/amd-fsr2/ffx_error.h +++ /dev/null @@ -1,59 +0,0 @@ -// This file is part of the FidelityFX SDK. -// -// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#pragma once - -#include "ffx_types.h" - -/// Typedef for error codes returned from functions in the FidelityFX SDK. -typedef int32_t FfxErrorCode; - -static const FfxErrorCode FFX_OK = 0; ///< The operation completed successfully. -static const FfxErrorCode FFX_ERROR_INVALID_POINTER = 0x80000000; ///< The operation failed due to an invalid pointer. -static const FfxErrorCode FFX_ERROR_INVALID_ALIGNMENT = 0x80000001; ///< The operation failed due to an invalid alignment. -static const FfxErrorCode FFX_ERROR_INVALID_SIZE = 0x80000002; ///< The operation failed due to an invalid size. -static const FfxErrorCode FFX_EOF = 0x80000003; ///< The end of the file was encountered. -static const FfxErrorCode FFX_ERROR_INVALID_PATH = 0x80000004; ///< The operation failed because the specified path was invalid. -static const FfxErrorCode FFX_ERROR_EOF = 0x80000005; ///< The operation failed because end of file was reached. -static const FfxErrorCode FFX_ERROR_MALFORMED_DATA = 0x80000006; ///< The operation failed because of some malformed data. -static const FfxErrorCode FFX_ERROR_OUT_OF_MEMORY = 0x80000007; ///< The operation failed because it ran out memory. -static const FfxErrorCode FFX_ERROR_INCOMPLETE_INTERFACE = 0x80000008; ///< The operation failed because the interface was not fully configured. -static const FfxErrorCode FFX_ERROR_INVALID_ENUM = 0x80000009; ///< The operation failed because of an invalid enumeration value. -static const FfxErrorCode FFX_ERROR_INVALID_ARGUMENT = 0x8000000a; ///< The operation failed because an argument was invalid. -static const FfxErrorCode FFX_ERROR_OUT_OF_RANGE = 0x8000000b; ///< The operation failed because a value was out of range. -static const FfxErrorCode FFX_ERROR_NULL_DEVICE = 0x8000000c; ///< The operation failed because a device was null. -static const FfxErrorCode FFX_ERROR_BACKEND_API_ERROR = 0x8000000d; ///< The operation failed because the backend API returned an error code. -static const FfxErrorCode FFX_ERROR_INSUFFICIENT_MEMORY = 0x8000000e; ///< The operation failed because there was not enough memory. - -/// Helper macro to return error code y from a function when a specific condition, x, is not met. -#define FFX_RETURN_ON_ERROR(x, y) \ - if (!(x)) \ - { \ - return (y); \ - } - -/// Helper macro to return error code x from a function when it is not FFX_OK. -#define FFX_VALIDATE(x) \ - { \ - FfxErrorCode ret = x; \ - FFX_RETURN_ON_ERROR(ret == FFX_OK, ret); \ - } - diff --git a/thirdparty/amd-fsr2/ffx_fsr2_interface.h b/thirdparty/amd-fsr2/ffx_fsr2_interface.h deleted file mode 100644 index b6be9760a719..000000000000 --- a/thirdparty/amd-fsr2/ffx_fsr2_interface.h +++ /dev/null @@ -1,395 +0,0 @@ -// This file is part of the FidelityFX SDK. -// -// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#pragma once - -#include "ffx_assert.h" -#include "ffx_types.h" -#include "ffx_error.h" - -// Include the FSR2 resources defined in the HLSL code. This shared here to avoid getting out of sync. -#define FFX_CPU -#include "shaders/ffx_fsr2_resources.h" -#include "shaders/ffx_fsr2_common.h" - -#if defined(__cplusplus) -extern "C" { -#endif // #if defined(__cplusplus) - -FFX_FORWARD_DECLARE(FfxFsr2Interface); - -/// An enumeration of all the passes which constitute the FSR2 algorithm. -/// -/// FSR2 is implemented as a composite of several compute passes each -/// computing a key part of the final result. Each call to the -/// FfxFsr2ScheduleGpuJobFunc callback function will -/// correspond to a single pass included in FfxFsr2Pass. For a -/// more comprehensive description of each pass, please refer to the FSR2 -/// reference documentation. -/// -/// Please note in some cases e.g.: FFX_FSR2_PASS_ACCUMULATE -/// and FFX_FSR2_PASS_ACCUMULATE_SHARPEN either one pass or the -/// other will be used (they are mutually exclusive). The choice of which will -/// depend on the way the FfxFsr2Context is created and the -/// precise contents of FfxFsr2DispatchParamters each time a call -/// is made to ffxFsr2ContextDispatch. -/// -/// @ingroup FSR2 -typedef enum FfxFsr2Pass { - - FFX_FSR2_PASS_DEPTH_CLIP = 0, ///< A pass which performs depth clipping. - FFX_FSR2_PASS_RECONSTRUCT_PREVIOUS_DEPTH = 1, ///< A pass which performs reconstruction of previous frame's depth. - FFX_FSR2_PASS_LOCK = 2, ///< A pass which calculates pixel locks. - FFX_FSR2_PASS_ACCUMULATE = 3, ///< A pass which performs upscaling. - FFX_FSR2_PASS_ACCUMULATE_SHARPEN = 4, ///< A pass which performs upscaling when sharpening is used. - FFX_FSR2_PASS_RCAS = 5, ///< A pass which performs sharpening. - FFX_FSR2_PASS_COMPUTE_LUMINANCE_PYRAMID = 6, ///< A pass which generates the luminance mipmap chain for the current frame. - FFX_FSR2_PASS_GENERATE_REACTIVE = 7, ///< An optional pass to generate a reactive mask - FFX_FSR2_PASS_TCR_AUTOGENERATE = 8, ///< An optional pass to generate a texture-and-composition and reactive masks - - FFX_FSR2_PASS_COUNT ///< The number of passes performed by FSR2. -} FfxFsr2Pass; - -typedef enum FfxFsr2MsgType { - FFX_FSR2_MESSAGE_TYPE_ERROR = 0, - FFX_FSR2_MESSAGE_TYPE_WARNING = 1, - FFX_FSR2_MESSAGE_TYPE_COUNT -} FfxFsr2MsgType; - -/// Create and initialize the backend context. -/// -/// The callback function sets up the backend context for rendering. -/// It will create or reference the device and create required internal data structures. -/// -/// @param [in] backendInterface A pointer to the backend interface. -/// @param [in] device The FfxDevice obtained by ffxGetDevice(DX12/VK/...). -/// -/// @retval -/// FFX_OK The operation completed successfully. -/// @retval -/// Anything else The operation failed. -/// -/// @ingroup FSR2 -typedef FfxErrorCode (*FfxFsr2CreateBackendContextFunc)( - FfxFsr2Interface* backendInterface, - FfxDevice device); - -/// Get a list of capabilities of the device. -/// -/// When creating an FfxFsr2Context it is desirable for the FSR2 -/// core implementation to be aware of certain characteristics of the platform -/// that is being targetted. This is because some optimizations which FSR2 -/// attempts to perform are more effective on certain classes of hardware than -/// others, or are not supported by older hardware. In order to avoid cases -/// where optimizations actually have the effect of decreasing performance, or -/// reduce the breadth of support provided by FSR2, FSR2 queries the -/// capabilities of the device to make such decisions. -/// -/// For target platforms with fixed hardware support you need not implement -/// this callback function by querying the device, but instead may hardcore -/// what features are available on the platform. -/// -/// @param [in] backendInterface A pointer to the backend interface. -/// @param [out] outDeviceCapabilities The device capabilities structure to fill out. -/// @param [in] device The device to query for capabilities. -/// -/// @retval -/// FFX_OK The operation completed successfully. -/// @retval -/// Anything else The operation failed. -/// -/// @ingroup FSR2 -typedef FfxErrorCode(*FfxFsr2GetDeviceCapabilitiesFunc)( - FfxFsr2Interface* backendInterface, - FfxDeviceCapabilities* outDeviceCapabilities, - FfxDevice device); - -/// Destroy the backend context and dereference the device. -/// -/// This function is called when the FfxFsr2Context is destroyed. -/// -/// @param [in] backendInterface A pointer to the backend interface. -/// -/// @retval -/// FFX_OK The operation completed successfully. -/// @retval -/// Anything else The operation failed. -/// -/// @ingroup FSR2 -typedef FfxErrorCode(*FfxFsr2DestroyBackendContextFunc)( - FfxFsr2Interface* backendInterface); - -/// Create a resource. -/// -/// This callback is intended for the backend to create internal resources. -/// -/// Please note: It is also possible that the creation of resources might -/// itself cause additional resources to be created by simply calling the -/// FfxFsr2CreateResourceFunc function pointer again. This is -/// useful when handling the initial creation of resources which must be -/// initialized. The flow in such a case would be an initial call to create the -/// CPU-side resource, another to create the GPU-side resource, and then a call -/// to schedule a copy render job to move the data between the two. Typically -/// this type of function call flow is only seen during the creation of an -/// FfxFsr2Context. -/// -/// @param [in] backendInterface A pointer to the backend interface. -/// @param [in] createResourceDescription A pointer to a FfxCreateResourceDescription. -/// @param [out] outResource A pointer to a FfxResource object. -/// -/// @retval -/// FFX_OK The operation completed successfully. -/// @retval -/// Anything else The operation failed. -/// -/// @ingroup FSR2 -typedef FfxErrorCode (*FfxFsr2CreateResourceFunc)( - FfxFsr2Interface* backendInterface, - const FfxCreateResourceDescription* createResourceDescription, - FfxResourceInternal* outResource); - -/// Register a resource in the backend for the current frame. -/// -/// Since FSR2 and the backend are not aware how many different -/// resources will get passed to FSR2 over time, it's not safe -/// to register all resources simultaneously in the backend. -/// Also passed resources may not be valid after the dispatch call. -/// As a result it's safest to register them as FfxResourceInternal -/// and clear them at the end of the dispatch call. -/// -/// @param [in] backendInterface A pointer to the backend interface. -/// @param [in] inResource A pointer to a FfxResource. -/// @param [out] outResource A pointer to a FfxResourceInternal object. -/// -/// @retval -/// FFX_OK The operation completed successfully. -/// @retval -/// Anything else The operation failed. -/// -/// @ingroup FSR2 -typedef FfxErrorCode(*FfxFsr2RegisterResourceFunc)( - FfxFsr2Interface* backendInterface, - const FfxResource* inResource, - FfxResourceInternal* outResource); - -/// Unregister all temporary FfxResourceInternal from the backend. -/// -/// Unregister FfxResourceInternal referencing resources passed to -/// a function as a parameter. -/// -/// @param [in] backendInterface A pointer to the backend interface. -/// -/// @retval -/// FFX_OK The operation completed successfully. -/// @retval -/// Anything else The operation failed. -/// -/// @ingroup FSR2 -typedef FfxErrorCode(*FfxFsr2UnregisterResourcesFunc)( - FfxFsr2Interface* backendInterface); - -/// Retrieve a FfxResourceDescription matching a -/// FfxResource structure. -/// -/// @param [in] backendInterface A pointer to the backend interface. -/// @param [in] resource A pointer to a FfxResource object. -/// -/// @returns -/// A description of the resource. -/// -/// @ingroup FSR2 -typedef FfxResourceDescription (*FfxFsr2GetResourceDescriptionFunc)( - FfxFsr2Interface* backendInterface, - FfxResourceInternal resource); - -/// Destroy a resource -/// -/// This callback is intended for the backend to release an internal resource. -/// -/// @param [in] backendInterface A pointer to the backend interface. -/// @param [in] resource A pointer to a FfxResource object. -/// -/// @retval -/// FFX_OK The operation completed successfully. -/// @retval -/// Anything else The operation failed. -/// -/// @ingroup FSR2 -typedef FfxErrorCode (*FfxFsr2DestroyResourceFunc)( - FfxFsr2Interface* backendInterface, - FfxResourceInternal resource); - -/// Create a render pipeline. -/// -/// A rendering pipeline contains the shader as well as resource bindpoints -/// and samplers. -/// -/// @param [in] backendInterface A pointer to the backend interface. -/// @param [in] pass The identifier for the pass. -/// @param [in] pipelineDescription A pointer to a FfxPipelineDescription describing the pipeline to be created. -/// @param [out] outPipeline A pointer to a FfxPipelineState structure which should be populated. -/// -/// @retval -/// FFX_OK The operation completed successfully. -/// @retval -/// Anything else The operation failed. -/// -/// @ingroup FSR2 -typedef FfxErrorCode (*FfxFsr2CreatePipelineFunc)( - FfxFsr2Interface* backendInterface, - FfxFsr2Pass pass, - const FfxPipelineDescription* pipelineDescription, - FfxPipelineState* outPipeline); - -/// Destroy a render pipeline. -/// -/// @param [in] backendInterface A pointer to the backend interface. -/// @param [out] pipeline A pointer to a FfxPipelineState structure which should be released. -/// -/// @retval -/// FFX_OK The operation completed successfully. -/// @retval -/// Anything else The operation failed. -/// -/// @ingroup FSR2 -typedef FfxErrorCode (*FfxFsr2DestroyPipelineFunc)( - FfxFsr2Interface* backendInterface, - FfxPipelineState* pipeline); - -/// Schedule a render job to be executed on the next call of -/// FfxFsr2ExecuteGpuJobsFunc. -/// -/// Render jobs can perform one of three different tasks: clear, copy or -/// compute dispatches. -/// -/// @param [in] backendInterface A pointer to the backend interface. -/// @param [in] job A pointer to a FfxGpuJobDescription structure. -/// -/// @retval -/// FFX_OK The operation completed successfully. -/// @retval -/// Anything else The operation failed. -/// -/// @ingroup FSR2 -typedef FfxErrorCode (*FfxFsr2ScheduleGpuJobFunc)( - FfxFsr2Interface* backendInterface, - const FfxGpuJobDescription* job); - -/// Execute scheduled render jobs on the comandList provided. -/// -/// The recording of the graphics API commands should take place in this -/// callback function, the render jobs which were previously enqueued (via -/// callbacks made to FfxFsr2ScheduleGpuJobFunc) should be -/// processed in the order they were received. Advanced users might choose to -/// reorder the rendering jobs, but should do so with care to respect the -/// resource dependencies. -/// -/// Depending on the precise contents of FfxFsr2DispatchDescription a -/// different number of render jobs might have previously been enqueued (for -/// example if sharpening is toggled on and off). -/// -/// @param [in] backendInterface A pointer to the backend interface. -/// @param [in] commandList A pointer to a FfxCommandList structure. -/// -/// @retval -/// FFX_OK The operation completed successfully. -/// @retval -/// Anything else The operation failed. -/// -/// @ingroup FSR2 -typedef FfxErrorCode (*FfxFsr2ExecuteGpuJobsFunc)( - FfxFsr2Interface* backendInterface, - FfxCommandList commandList); - -/// Pass a string message -/// -/// Used for debug messages. -/// -/// @param [in] type The type of message. -/// @param [in] message A string message to pass. -/// -/// -/// @ingroup FSR2 -typedef void(*FfxFsr2Message)( - FfxFsr2MsgType type, - const wchar_t* message); - -/// A structure encapsulating the interface between the core implentation of -/// the FSR2 algorithm and any graphics API that it should ultimately call. -/// -/// This set of functions serves as an abstraction layer between FSR2 and the -/// API used to implement it. While FSR2 ships with backends for DirectX12 and -/// Vulkan, it is possible to implement your own backend for other platforms or -/// which sits ontop of your engine's own abstraction layer. For details on the -/// expectations of what each function should do you should refer the -/// description of the following function pointer types: -/// -/// FfxFsr2CreateDeviceFunc -/// FfxFsr2GetDeviceCapabilitiesFunc -/// FfxFsr2DestroyDeviceFunc -/// FfxFsr2CreateResourceFunc -/// FfxFsr2GetResourceDescriptionFunc -/// FfxFsr2DestroyResourceFunc -/// FfxFsr2CreatePipelineFunc -/// FfxFsr2DestroyPipelineFunc -/// FfxFsr2ScheduleGpuJobFunc -/// FfxFsr2ExecuteGpuJobsFunc -/// -/// Depending on the graphics API that is abstracted by the backend, it may be -/// required that the backend is to some extent stateful. To ensure that -/// applications retain full control to manage the memory used by FSR2, the -/// scratchBuffer and scratchBufferSize fields are -/// provided. A backend should provide a means of specifying how much scratch -/// memory is required for its internal implementation (e.g: via a function -/// or constant value). The application is that responsible for allocating that -/// memory and providing it when setting up the FSR2 backend. Backends provided -/// with FSR2 do not perform dynamic memory allocations, and instead -/// suballocate all memory from the scratch buffers provided. -/// -/// The scratchBuffer and scratchBufferSize fields -/// should be populated according to the requirements of each backend. For -/// example, if using the DirectX 12 backend you should call the -/// ffxFsr2GetScratchMemorySizeDX12 function. It is not required -/// that custom backend implementations use a scratch buffer. -/// -/// @ingroup FSR2 -typedef struct FfxFsr2Interface { - - FfxFsr2CreateBackendContextFunc fpCreateBackendContext; ///< A callback function to create and initialize the backend context. - FfxFsr2GetDeviceCapabilitiesFunc fpGetDeviceCapabilities; ///< A callback function to query device capabilites. - FfxFsr2DestroyBackendContextFunc fpDestroyBackendContext; ///< A callback function to destroy the backendcontext. This also dereferences the device. - FfxFsr2CreateResourceFunc fpCreateResource; ///< A callback function to create a resource. - FfxFsr2RegisterResourceFunc fpRegisterResource; ///< A callback function to register an external resource. - FfxFsr2UnregisterResourcesFunc fpUnregisterResources; ///< A callback function to unregister external resource. - FfxFsr2GetResourceDescriptionFunc fpGetResourceDescription; ///< A callback function to retrieve a resource description. - FfxFsr2DestroyResourceFunc fpDestroyResource; ///< A callback function to destroy a resource. - FfxFsr2CreatePipelineFunc fpCreatePipeline; ///< A callback function to create a render or compute pipeline. - FfxFsr2DestroyPipelineFunc fpDestroyPipeline; ///< A callback function to destroy a render or compute pipeline. - FfxFsr2ScheduleGpuJobFunc fpScheduleGpuJob; ///< A callback function to schedule a render job. - FfxFsr2ExecuteGpuJobsFunc fpExecuteGpuJobs; ///< A callback function to execute all queued render jobs. - - void* scratchBuffer; ///< A preallocated buffer for memory utilized internally by the backend. - size_t scratchBufferSize; ///< Size of the buffer pointed to by scratchBuffer. -} FfxFsr2Interface; - -#if defined(__cplusplus) -} -#endif // #if defined(__cplusplus) diff --git a/thirdparty/amd-fsr2/ffx_types.h b/thirdparty/amd-fsr2/ffx_types.h deleted file mode 100644 index f71b259cce9b..000000000000 --- a/thirdparty/amd-fsr2/ffx_types.h +++ /dev/null @@ -1,365 +0,0 @@ -// This file is part of the FidelityFX SDK. -// -// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#pragma once - -#include -#include - -#if defined (FFX_GCC) -/// FidelityFX exported functions -#define FFX_API -#else -/// FidelityFX exported functions -#define FFX_API __declspec(dllexport) -#endif // #if defined (FFX_GCC) - -/// Maximum supported number of simultaneously bound SRVs. -#define FFX_MAX_NUM_SRVS 16 - -/// Maximum supported number of simultaneously bound UAVs. -#define FFX_MAX_NUM_UAVS 8 - -/// Maximum number of constant buffers bound. -#define FFX_MAX_NUM_CONST_BUFFERS 2 - -/// Maximum size of bound constant buffers. -#define FFX_MAX_CONST_SIZE 64 - -/// Off by default warnings -#if defined(_MSC_VER) -#pragma warning(disable : 4365 4710 4820 5039) -#elif defined(__clang__) -#pragma clang diagnostic ignored "-Wunused-parameter" -#pragma clang diagnostic ignored "-Wmissing-field-initializers" -#pragma clang diagnostic ignored "-Wsign-compare" -#pragma clang diagnostic ignored "-Wunused-function" -#pragma clang diagnostic ignored "-Wignored-qualifiers" -#elif defined(__GNUC__) -#pragma GCC diagnostic ignored "-Wunused-function" -#endif - -#ifdef __cplusplus -extern "C" { -#endif // #ifdef __cplusplus - -/// An enumeration of surface formats. -typedef enum FfxSurfaceFormat { - - FFX_SURFACE_FORMAT_UNKNOWN, ///< Unknown format - FFX_SURFACE_FORMAT_R32G32B32A32_TYPELESS, ///< 32 bit per channel, 4 channel typeless format - FFX_SURFACE_FORMAT_R32G32B32A32_FLOAT, ///< 32 bit per channel, 4 channel float format - FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT, ///< 16 bit per channel, 4 channel float format - FFX_SURFACE_FORMAT_R16G16B16A16_UNORM, ///< 16 bit per channel, 4 channel unsigned normalized format - FFX_SURFACE_FORMAT_R32G32_FLOAT, ///< 32 bit per channel, 2 channel float format - FFX_SURFACE_FORMAT_R32_UINT, ///< 32 bit per channel, 1 channel float format - FFX_SURFACE_FORMAT_R8G8B8A8_TYPELESS, ///< 8 bit per channel, 4 channel float format - FFX_SURFACE_FORMAT_R8G8B8A8_UNORM, ///< 8 bit per channel, 4 channel unsigned normalized format - FFX_SURFACE_FORMAT_R11G11B10_FLOAT, ///< 32 bit 3 channel float format - FFX_SURFACE_FORMAT_R16G16_FLOAT, ///< 16 bit per channel, 2 channel float format - FFX_SURFACE_FORMAT_R16G16_UINT, ///< 16 bit per channel, 2 channel unsigned int format - FFX_SURFACE_FORMAT_R16_FLOAT, ///< 16 bit per channel, 1 channel float format - FFX_SURFACE_FORMAT_R16_UINT, ///< 16 bit per channel, 1 channel unsigned int format - FFX_SURFACE_FORMAT_R16_UNORM, ///< 16 bit per channel, 1 channel unsigned normalized format - FFX_SURFACE_FORMAT_R16_SNORM, ///< 16 bit per channel, 1 channel signed normalized format - FFX_SURFACE_FORMAT_R8_UNORM, ///< 8 bit per channel, 1 channel unsigned normalized format - FFX_SURFACE_FORMAT_R8_UINT, ///< 8 bit per channel, 1 channel unsigned int format - FFX_SURFACE_FORMAT_R8G8_UNORM, ///< 8 bit per channel, 2 channel unsigned normalized format - FFX_SURFACE_FORMAT_R32_FLOAT ///< 32 bit per channel, 1 channel float format -} FfxSurfaceFormat; - -/// An enumeration of resource usage. -typedef enum FfxResourceUsage { - - FFX_RESOURCE_USAGE_READ_ONLY = 0, ///< No usage flags indicate a resource is read only. - FFX_RESOURCE_USAGE_RENDERTARGET = (1<<0), ///< Indicates a resource will be used as render target. - FFX_RESOURCE_USAGE_UAV = (1<<1), ///< Indicates a resource will be used as UAV. -} FfxResourceUsage; - -/// An enumeration of resource states. -typedef enum FfxResourceStates { - - FFX_RESOURCE_STATE_UNORDERED_ACCESS = (1<<0), ///< Indicates a resource is in the state to be used as UAV. - FFX_RESOURCE_STATE_COMPUTE_READ = (1 << 1), ///< Indicates a resource is in the state to be read by compute shaders. - FFX_RESOURCE_STATE_COPY_SRC = (1 << 2), ///< Indicates a resource is in the state to be used as source in a copy command. - FFX_RESOURCE_STATE_COPY_DEST = (1 << 3), ///< Indicates a resource is in the state to be used as destination in a copy command. - FFX_RESOURCE_STATE_GENERIC_READ = (FFX_RESOURCE_STATE_COPY_SRC | FFX_RESOURCE_STATE_COMPUTE_READ), ///< Indicates a resource is in generic (slow) read state. -} FfxResourceStates; - -/// An enumeration of surface dimensions. -typedef enum FfxResourceDimension { - - FFX_RESOURCE_DIMENSION_TEXTURE_1D, ///< A resource with a single dimension. - FFX_RESOURCE_DIMENSION_TEXTURE_2D, ///< A resource with two dimensions. -} FfxResourceDimension; - -/// An enumeration of surface dimensions. -typedef enum FfxResourceFlags { - - FFX_RESOURCE_FLAGS_NONE = 0, ///< No flags. - FFX_RESOURCE_FLAGS_ALIASABLE = (1<<0), ///< A bit indicating a resource does not need to persist across frames. -} FfxResourceFlags; - -/// An enumeration of all resource view types. -typedef enum FfxResourceViewType { - - FFX_RESOURCE_VIEW_UNORDERED_ACCESS, ///< The resource view is an unordered access view (UAV). - FFX_RESOURCE_VIEW_SHADER_READ, ///< The resource view is a shader resource view (SRV). -} FfxResourceViewType; - -/// The type of filtering to perform when reading a texture. -typedef enum FfxFilterType { - - FFX_FILTER_TYPE_POINT, ///< Point sampling. - FFX_FILTER_TYPE_LINEAR ///< Sampling with interpolation. -} FfxFilterType; - -/// An enumeration of all supported shader models. -typedef enum FfxShaderModel { - - FFX_SHADER_MODEL_5_1, ///< Shader model 5.1. - FFX_SHADER_MODEL_6_0, ///< Shader model 6.0. - FFX_SHADER_MODEL_6_1, ///< Shader model 6.1. - FFX_SHADER_MODEL_6_2, ///< Shader model 6.2. - FFX_SHADER_MODEL_6_3, ///< Shader model 6.3. - FFX_SHADER_MODEL_6_4, ///< Shader model 6.4. - FFX_SHADER_MODEL_6_5, ///< Shader model 6.5. - FFX_SHADER_MODEL_6_6, ///< Shader model 6.6. - FFX_SHADER_MODEL_6_7, ///< Shader model 6.7. -} FfxShaderModel; - -// An enumeration for different resource types -typedef enum FfxResourceType { - - FFX_RESOURCE_TYPE_BUFFER, ///< The resource is a buffer. - FFX_RESOURCE_TYPE_TEXTURE1D, ///< The resource is a 1-dimensional texture. - FFX_RESOURCE_TYPE_TEXTURE2D, ///< The resource is a 2-dimensional texture. - FFX_RESOURCE_TYPE_TEXTURE3D, ///< The resource is a 3-dimensional texture. -} FfxResourceType; - -/// An enumeration for different heap types -typedef enum FfxHeapType { - - FFX_HEAP_TYPE_DEFAULT = 0, ///< Local memory. - FFX_HEAP_TYPE_UPLOAD ///< Heap used for uploading resources. -} FfxHeapType; - -/// An enumberation for different render job types -typedef enum FfxGpuJobType { - - FFX_GPU_JOB_CLEAR_FLOAT = 0, ///< The GPU job is performing a floating-point clear. - FFX_GPU_JOB_COPY = 1, ///< The GPU job is performing a copy. - FFX_GPU_JOB_COMPUTE = 2, ///< The GPU job is performing a compute dispatch. -} FfxGpuJobType; - -/// A typedef representing the graphics device. -typedef void* FfxDevice; - -/// A typedef representing a command list or command buffer. -typedef void* FfxCommandList; - -/// A typedef for a root signature. -typedef void* FfxRootSignature; - -/// A typedef for a pipeline state object. -typedef void* FfxPipeline; - -/// A structure encapasulating a collection of device capabilities. -typedef struct FfxDeviceCapabilities { - - FfxShaderModel minimumSupportedShaderModel; ///< The minimum shader model supported by the device. - uint32_t waveLaneCountMin; ///< The minimum supported wavefront width. - uint32_t waveLaneCountMax; ///< The maximum supported wavefront width. - bool fp16Supported; ///< The device supports FP16 in hardware. - bool raytracingSupported; ///< The device supports raytracing. -} FfxDeviceCapabilities; - -/// A structure encapsulating a 2-dimensional point, using 32bit unsigned integers. -typedef struct FfxDimensions2D { - - uint32_t width; ///< The width of a 2-dimensional range. - uint32_t height; ///< The height of a 2-dimensional range. -} FfxDimensions2D; - -/// A structure encapsulating a 2-dimensional point, -typedef struct FfxIntCoords2D { - - int32_t x; ///< The x coordinate of a 2-dimensional point. - int32_t y; ///< The y coordinate of a 2-dimensional point. -} FfxIntCoords2D; - -/// A structure encapsulating a 2-dimensional set of floating point coordinates. -typedef struct FfxFloatCoords2D { - - float x; ///< The x coordinate of a 2-dimensional point. - float y; ///< The y coordinate of a 2-dimensional point. -} FfxFloatCoords2D; - -/// A structure describing a resource. -typedef struct FfxResourceDescription { - - FfxResourceType type; ///< The type of the resource. - FfxSurfaceFormat format; ///< The surface format. - uint32_t width; ///< The width of the resource. - uint32_t height; ///< The height of the resource. - uint32_t depth; ///< The depth of the resource. - uint32_t mipCount; ///< Number of mips (or 0 for full mipchain). - FfxResourceFlags flags; ///< A set of FfxResourceFlags flags. -} FfxResourceDescription; - -/// An outward facing structure containing a resource -typedef struct FfxResource { - void* resource; ///< pointer to the resource. - wchar_t name[64]; - FfxResourceDescription description; - FfxResourceStates state; - bool isDepth; - uint64_t descriptorData; -} FfxResource; - -/// An internal structure containing a handle to a resource and resource views -typedef struct FfxResourceInternal { - int32_t internalIndex; ///< The index of the resource. -} FfxResourceInternal; - - -/// A structure defining a resource bind point -typedef struct FfxResourceBinding -{ - uint32_t slotIndex; - uint32_t resourceIdentifier; - wchar_t name[64]; -}FfxResourceBinding; - -/// A structure encapsulating a single pass of an algorithm. -typedef struct FfxPipelineState { - - FfxRootSignature rootSignature; ///< The pipelines rootSignature - FfxPipeline pipeline; ///< The pipeline object - uint32_t uavCount; ///< Count of UAVs used in this pipeline - uint32_t srvCount; ///< Count of SRVs used in this pipeline - uint32_t constCount; ///< Count of constant buffers used in this pipeline - - FfxResourceBinding uavResourceBindings[FFX_MAX_NUM_UAVS]; ///< Array of ResourceIdentifiers bound as UAVs - FfxResourceBinding srvResourceBindings[FFX_MAX_NUM_SRVS]; ///< Array of ResourceIdentifiers bound as SRVs - FfxResourceBinding cbResourceBindings[FFX_MAX_NUM_CONST_BUFFERS]; ///< Array of ResourceIdentifiers bound as CBs -} FfxPipelineState; - -/// A structure containing the data required to create a resource. -typedef struct FfxCreateResourceDescription { - - FfxHeapType heapType; ///< The heap type to hold the resource, typically FFX_HEAP_TYPE_DEFAULT. - FfxResourceDescription resourceDescription; ///< A resource description. - FfxResourceStates initalState; ///< The initial resource state. - uint32_t initDataSize; ///< Size of initial data buffer. - void* initData; ///< Buffer containing data to fill the resource. - const wchar_t* name; ///< Name of the resource. - FfxResourceUsage usage; ///< Resource usage flags. - uint32_t id; ///< Internal resource ID. -} FfxCreateResourceDescription; - -/// A structure containing the description used to create a -/// FfxPipeline structure. -/// -/// A pipeline is the name given to a shader and the collection of state that -/// is required to dispatch it. In the context of FSR2 and its architecture -/// this means that a FfxPipelineDescription will map to either a -/// monolithic object in an explicit API (such as a -/// PipelineStateObject in DirectX 12). Or a shader and some -/// ancillary API objects (in something like DirectX 11). -/// -/// The contextFlags field contains a copy of the flags passed -/// to ffxFsr2ContextCreate via the flags field of -/// the FfxFsr2InitializationParams structure. These flags are -/// used to determine which permutation of a pipeline for a specific -/// FfxFsr2Pass should be used to implement the features required -/// by each application, as well as to acheive the best performance on specific -/// target hardware configurations. -/// -/// When using one of the provided backends for FSR2 (such as DirectX 12 or -/// Vulkan) the data required to create a pipeline is compiled offline and -/// included into the backend library that you are using. For cases where the -/// backend interface is overriden by providing custom callback function -/// implementations care should be taken to respect the contents of the -/// contextFlags field in order to correctly support the options -/// provided by FSR2, and acheive best performance. -/// -/// @ingroup FSR2 -typedef struct FfxPipelineDescription { - - uint32_t contextFlags; ///< A collection of FfxFsr2InitializationFlagBits which were passed to the context. - FfxFilterType* samplers; ///< Array of static samplers. - size_t samplerCount; ///< The number of samples contained inside samplers. - const uint32_t* rootConstantBufferSizes; ///< Array containing the sizes of the root constant buffers (count of 32 bit elements). - uint32_t rootConstantBufferCount; ///< The number of root constants contained within rootConstantBufferSizes. -} FfxPipelineDescription; - -/// A structure containing a constant buffer. -typedef struct FfxConstantBuffer { - - uint32_t uint32Size; ///< Size of 32 bit chunks used in the constant buffer - uint32_t data[FFX_MAX_CONST_SIZE]; ///< Constant buffer data -}FfxConstantBuffer; - -/// A structure describing a clear render job. -typedef struct FfxClearFloatJobDescription { - - float color[4]; ///< The clear color of the resource. - FfxResourceInternal target; ///< The resource to be cleared. -} FfxClearFloatJobDescription; - -/// A structure describing a compute render job. -typedef struct FfxComputeJobDescription { - - FfxPipelineState pipeline; ///< Compute pipeline for the render job. - uint32_t dimensions[3]; ///< Dispatch dimensions. - FfxResourceInternal srvs[FFX_MAX_NUM_SRVS]; ///< SRV resources to be bound in the compute job. - wchar_t srvNames[FFX_MAX_NUM_SRVS][64]; - FfxResourceInternal uavs[FFX_MAX_NUM_UAVS]; ///< UAV resources to be bound in the compute job. - uint32_t uavMip[FFX_MAX_NUM_UAVS]; ///< Mip level of UAV resources to be bound in the compute job. - wchar_t uavNames[FFX_MAX_NUM_UAVS][64]; - FfxConstantBuffer cbs[FFX_MAX_NUM_CONST_BUFFERS]; ///< Constant buffers to be bound in the compute job. - wchar_t cbNames[FFX_MAX_NUM_CONST_BUFFERS][64]; - uint32_t cbSlotIndex[FFX_MAX_NUM_CONST_BUFFERS]; ///< Slot index in the descriptor table -} FfxComputeJobDescription; - -/// A structure describing a copy render job. -typedef struct FfxCopyJobDescription -{ - FfxResourceInternal src; ///< Source resource for the copy. - FfxResourceInternal dst; ///< Destination resource for the copy. -} FfxCopyJobDescription; - -/// A structure describing a single render job. -typedef struct FfxGpuJobDescription{ - - FfxGpuJobType jobType; ///< Type of the job. - - union { - FfxClearFloatJobDescription clearJobDescriptor; ///< Clear job descriptor. Valid when jobType is FFX_RENDER_JOB_CLEAR_FLOAT. - FfxCopyJobDescription copyJobDescriptor; ///< Copy job descriptor. Valid when jobType is FFX_RENDER_JOB_COPY. - FfxComputeJobDescription computeJobDescriptor; ///< Compute job descriptor. Valid when jobType is FFX_RENDER_JOB_COMPUTE. - }; -} FfxGpuJobDescription; - -#ifdef __cplusplus -} -#endif // #ifdef __cplusplus diff --git a/thirdparty/amd-fsr2/ffx_util.h b/thirdparty/amd-fsr2/ffx_util.h deleted file mode 100644 index ca4324ea832e..000000000000 --- a/thirdparty/amd-fsr2/ffx_util.h +++ /dev/null @@ -1,78 +0,0 @@ -// This file is part of the FidelityFX SDK. -// -// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#pragma once - -#include "ffx_types.h" - -/// The value of Pi. -const float FFX_PI = 3.141592653589793f; - -/// An epsilon value for floating point numbers. -const float FFX_EPSILON = 1e-06f; - -/// Helper macro to create the version number. -#define FFX_MAKE_VERSION(major, minor, patch) ((major << 22) | (minor << 12) | patch) - -///< Use this to specify no version. -#define FFX_UNSPECIFIED_VERSION 0xFFFFAD00 - -/// Helper macro to avoid warnings about unused variables. -#define FFX_UNUSED(x) ((void)(x)) - -/// Helper macro to align an integer to the specified power of 2 boundary -#define FFX_ALIGN_UP(x, y) (((x) + ((y)-1)) & ~((y)-1)) - -/// Helper macro to check if a value is aligned. -#define FFX_IS_ALIGNED(x) (((x) != 0) && ((x) & ((x)-1))) - -/// Helper macro to stringify a value. -#define FFX_STR(s) FFX_XSTR(s) -#define FFX_XSTR(s) #s - -/// Helper macro to forward declare a structure. -#define FFX_FORWARD_DECLARE(x) typedef struct x x - -/// Helper macro to return the maximum of two values. -#define FFX_MAXIMUM(x, y) (((x) > (y)) ? (x) : (y)) - -/// Helper macro to return the minimum of two values. -#define FFX_MINIMUM(x, y) (((x) < (y)) ? (x) : (y)) - -/// Helper macro to do safe free on a pointer. -#define FFX_SAFE_FREE(x) \ - if (x) \ - free(x) - -/// Helper macro to return the abs of an integer value. -#define FFX_ABSOLUTE(x) (((x) < 0) ? (-(x)) : (x)) - -/// Helper macro to return sign of a value. -#define FFX_SIGN(x) (((x) < 0) ? -1 : 1) - -/// Helper macro to work out the number of elements in an array. -#define FFX_ARRAY_ELEMENTS(x) (int32_t)((sizeof(x) / sizeof(0 [x])) / ((size_t)(!(sizeof(x) % sizeof(0 [x]))))) - -/// The maximum length of a path that can be specified to the FidelityFX API. -#define FFX_MAXIMUM_PATH (260) - -/// Helper macro to check if the specified key is set in a bitfield. -#define FFX_CONTAINS_FLAG(options, key) ((options & key) == key) diff --git a/thirdparty/amd-fsr2/patches/0001-build-fixes.patch b/thirdparty/amd-fsr2/patches/0001-build-fixes.patch deleted file mode 100644 index 0cf64770e149..000000000000 --- a/thirdparty/amd-fsr2/patches/0001-build-fixes.patch +++ /dev/null @@ -1,136 +0,0 @@ -diff --git a/thirdparty/amd-fsr2/ffx_fsr2.cpp b/thirdparty/amd-fsr2/ffx_fsr2.cpp -index 051018e437..3970aa7f5b 100644 ---- a/thirdparty/amd-fsr2/ffx_fsr2.cpp -+++ b/thirdparty/amd-fsr2/ffx_fsr2.cpp -@@ -36,6 +36,15 @@ - #pragma clang diagnostic ignored "-Wunused-variable" - #endif - -+#ifndef _countof -+#define _countof(array) (sizeof(array) / sizeof(array[0])) -+#endif -+ -+#ifndef _MSC_VER -+#include -+#define wcscpy_s wcscpy -+#endif -+ - // max queued frames for descriptor management - static const uint32_t FSR2_MAX_QUEUED_FRAMES = 16; - -diff --git a/thirdparty/amd-fsr2/ffx_types.h b/thirdparty/amd-fsr2/ffx_types.h -index 74edd192c4..f71b259cce 100644 ---- a/thirdparty/amd-fsr2/ffx_types.h -+++ b/thirdparty/amd-fsr2/ffx_types.h -@@ -22,6 +22,7 @@ - #pragma once - - #include -+#include - - #if defined (FFX_GCC) - /// FidelityFX exported functions -diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_accumulate_pass.glsl b/thirdparty/amd-fsr2/shaders/ffx_fsr2_accumulate_pass.glsl -index ebbe610ffa..31d68292d4 100644 ---- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_accumulate_pass.glsl -+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_accumulate_pass.glsl -@@ -19,7 +19,7 @@ - // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - // THE SOFTWARE. - --#version 450 -+//#version 450 - - #extension GL_GOOGLE_include_directive : require - #extension GL_EXT_samplerless_texture_functions : require -diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_autogen_reactive_pass.glsl b/thirdparty/amd-fsr2/shaders/ffx_fsr2_autogen_reactive_pass.glsl -index 7ae41cf0c1..3b86c17d4d 100644 ---- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_autogen_reactive_pass.glsl -+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_autogen_reactive_pass.glsl -@@ -19,7 +19,7 @@ - // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - // THE SOFTWARE. - --#version 450 -+//#version 450 - - #extension GL_GOOGLE_include_directive : require - #extension GL_EXT_samplerless_texture_functions : require -diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_compute_luminance_pyramid_pass.glsl b/thirdparty/amd-fsr2/shaders/ffx_fsr2_compute_luminance_pyramid_pass.glsl -index 15186e3bb6..8439c4e9d4 100644 ---- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_compute_luminance_pyramid_pass.glsl -+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_compute_luminance_pyramid_pass.glsl -@@ -19,7 +19,7 @@ - // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - // THE SOFTWARE. - --#version 450 -+//#version 450 - - #extension GL_GOOGLE_include_directive : require - #extension GL_EXT_samplerless_texture_functions : require -diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_depth_clip_pass.glsl b/thirdparty/amd-fsr2/shaders/ffx_fsr2_depth_clip_pass.glsl -index fcb2b76528..45ec5bdb86 100644 ---- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_depth_clip_pass.glsl -+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_depth_clip_pass.glsl -@@ -19,7 +19,7 @@ - // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - // THE SOFTWARE. - --#version 450 -+//#version 450 - - #extension GL_GOOGLE_include_directive : require - #extension GL_EXT_samplerless_texture_functions : require -diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_lock_pass.glsl b/thirdparty/amd-fsr2/shaders/ffx_fsr2_lock_pass.glsl -index f7cad59c20..7c3a4c2740 100644 ---- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_lock_pass.glsl -+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_lock_pass.glsl -@@ -19,7 +19,7 @@ - // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - // THE SOFTWARE. - --#version 450 -+//#version 450 - - #extension GL_GOOGLE_include_directive : require - #extension GL_EXT_samplerless_texture_functions : require -diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_rcas_pass.glsl b/thirdparty/amd-fsr2/shaders/ffx_fsr2_rcas_pass.glsl -index f0823c2bc8..8b4ebc6afc 100644 ---- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_rcas_pass.glsl -+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_rcas_pass.glsl -@@ -19,7 +19,7 @@ - // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - // THE SOFTWARE. - --#version 450 -+//#version 450 - - #extension GL_GOOGLE_include_directive : require - #extension GL_EXT_samplerless_texture_functions : require -diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_reconstruct_previous_depth_pass.glsl b/thirdparty/amd-fsr2/shaders/ffx_fsr2_reconstruct_previous_depth_pass.glsl -index 20e17eef8c..be4395aaed 100644 ---- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_reconstruct_previous_depth_pass.glsl -+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_reconstruct_previous_depth_pass.glsl -@@ -19,7 +19,7 @@ - // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - // THE SOFTWARE. - --#version 450 -+//#version 450 - - #extension GL_GOOGLE_include_directive : require - #extension GL_EXT_samplerless_texture_functions : require -diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_tcr_autogen_pass.glsl b/thirdparty/amd-fsr2/shaders/ffx_fsr2_tcr_autogen_pass.glsl -index bebca91099..7d6a66b8ac 100644 ---- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_tcr_autogen_pass.glsl -+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_tcr_autogen_pass.glsl -@@ -19,7 +19,7 @@ - // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - // THE SOFTWARE. - --#version 450 -+//#version 450 - - #extension GL_GOOGLE_include_directive : require - #extension GL_EXT_samplerless_texture_functions : require diff --git a/thirdparty/amd-fsr2/patches/0002-godot-fsr2-options.patch b/thirdparty/amd-fsr2/patches/0002-godot-fsr2-options.patch deleted file mode 100644 index dd98f0ece529..000000000000 --- a/thirdparty/amd-fsr2/patches/0002-godot-fsr2-options.patch +++ /dev/null @@ -1,121 +0,0 @@ -diff --git a/thirdparty/amd-fsr2/ffx_fsr2.cpp b/thirdparty/amd-fsr2/ffx_fsr2.cpp -index 3970aa7f5b..ec571b9cd2 100644 ---- a/thirdparty/amd-fsr2/ffx_fsr2.cpp -+++ b/thirdparty/amd-fsr2/ffx_fsr2.cpp -@@ -952,6 +952,8 @@ static FfxErrorCode fsr2Dispatch(FfxFsr2Context_Private* context, const FfxFsr2D - context->constants.lumaMipDimensions[0] = uint32_t(context->constants.maxRenderSize[0] / mipDiv); - context->constants.lumaMipDimensions[1] = uint32_t(context->constants.maxRenderSize[1] / mipDiv); - -+ memcpy(context->constants.reprojectionMatrix, params->reprojectionMatrix, sizeof(context->constants.reprojectionMatrix)); -+ - // reactive mask bias - const int32_t threadGroupWorkRegionDim = 8; - const int32_t dispatchSrcX = (context->constants.renderSize[0] + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim; -diff --git a/thirdparty/amd-fsr2/ffx_fsr2.h b/thirdparty/amd-fsr2/ffx_fsr2.h -index 2a1c74abb1..dfcd4caf35 100644 ---- a/thirdparty/amd-fsr2/ffx_fsr2.h -+++ b/thirdparty/amd-fsr2/ffx_fsr2.h -@@ -146,6 +146,7 @@ typedef struct FfxFsr2DispatchDescription { - float autoReactiveScale; ///< A value to scale the reactive mask - float autoReactiveMax; ///< A value to clamp the reactive mask - -+ float reprojectionMatrix[16]; ///< The matrix used for reprojecting pixels with invalid motion vectors by using the depth. - } FfxFsr2DispatchDescription; - - /// A structure encapsulating the parameters for automatic generation of a reactive mask -diff --git a/thirdparty/amd-fsr2/ffx_fsr2_private.h b/thirdparty/amd-fsr2/ffx_fsr2_private.h -index 6b5fbc5117..8a9aec5778 100644 ---- a/thirdparty/amd-fsr2/ffx_fsr2_private.h -+++ b/thirdparty/amd-fsr2/ffx_fsr2_private.h -@@ -44,6 +44,9 @@ typedef struct Fsr2Constants { - float deltaTime; - float dynamicResChangeFactor; - float viewSpaceToMetersFactor; -+ -+ float pad; -+ float reprojectionMatrix[16]; - } Fsr2Constants; - - struct FfxFsr2ContextDescription; -diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_accumulate_pass.glsl b/thirdparty/amd-fsr2/shaders/ffx_fsr2_accumulate_pass.glsl -index 31d68292d4..2e98c8a6c5 100644 ---- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_accumulate_pass.glsl -+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_accumulate_pass.glsl -@@ -35,7 +35,7 @@ - #endif - #define FSR2_BIND_SRV_INTERNAL_UPSCALED 3 - #define FSR2_BIND_SRV_LOCK_STATUS 4 --#define FSR2_BIND_SRV_INPUT_DEPTH_CLIP 5 -+//#define FSR2_BIND_SRV_INPUT_DEPTH_CLIP 5 - #define FSR2_BIND_SRV_PREPARED_INPUT_COLOR 6 - #define FSR2_BIND_SRV_LUMA_INSTABILITY 7 - #define FSR2_BIND_SRV_LANCZOS_LUT 8 -@@ -52,6 +52,10 @@ - - #define FSR2_BIND_CB_FSR2 18 - -+#if FFX_FSR2_OPTION_GODOT_DERIVE_INVALID_MOTION_VECTORS -+#define FSR2_BIND_SRV_INPUT_DEPTH 5 -+#endif -+ - #include "ffx_fsr2_callbacks_glsl.h" - #include "ffx_fsr2_common.h" - #include "ffx_fsr2_sample.h" -diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_callbacks_glsl.h b/thirdparty/amd-fsr2/shaders/ffx_fsr2_callbacks_glsl.h -index 10da13fb81..b610037cc6 100644 ---- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_callbacks_glsl.h -+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_callbacks_glsl.h -@@ -52,6 +52,9 @@ - FfxFloat32 fDeltaTime; - FfxFloat32 fDynamicResChangeFactor; - FfxFloat32 fViewSpaceToMetersFactor; -+ -+ FfxFloat32 fPad; -+ mat4 mReprojectionMatrix; - } cbFSR2; - #endif - -@@ -317,7 +320,11 @@ FfxFloat32 LoadInputDepth(FfxInt32x2 iPxPos) - #if defined(FSR2_BIND_SRV_REACTIVE_MASK) - FfxFloat32 LoadReactiveMask(FfxInt32x2 iPxPos) - { -+#if FFX_FSR2_OPTION_GODOT_REACTIVE_MASK_CLAMP -+ return min(texelFetch(r_reactive_mask, FfxInt32x2(iPxPos), 0).r, 0.9f); -+#else - return texelFetch(r_reactive_mask, FfxInt32x2(iPxPos), 0).r; -+#endif - } - #endif - -@@ -354,6 +361,16 @@ FfxFloat32x2 LoadInputMotionVector(FfxInt32x2 iPxDilatedMotionVectorPos) - { - FfxFloat32x2 fSrcMotionVector = texelFetch(r_input_motion_vectors, iPxDilatedMotionVectorPos, 0).xy; - -+#if FFX_FSR2_OPTION_GODOT_DERIVE_INVALID_MOTION_VECTORS -+ bool bInvalidMotionVector = all(lessThanEqual(fSrcMotionVector, vec2(-1.0f, -1.0f))); -+ if (bInvalidMotionVector) -+ { -+ FfxFloat32 fSrcDepth = LoadInputDepth(iPxDilatedMotionVectorPos); -+ FfxFloat32x2 fUv = (iPxDilatedMotionVectorPos + FfxFloat32(0.5)) / RenderSize(); -+ fSrcMotionVector = FFX_FSR2_OPTION_GODOT_DERIVE_INVALID_MOTION_VECTORS_FUNCTION(fUv, fSrcDepth, cbFSR2.mReprojectionMatrix); -+ } -+#endif -+ - FfxFloat32x2 fUvMotionVector = fSrcMotionVector * MotionVectorScale(); - - #if FFX_FSR2_OPTION_JITTERED_MOTION_VECTORS -diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_tcr_autogen_pass.glsl b/thirdparty/amd-fsr2/shaders/ffx_fsr2_tcr_autogen_pass.glsl -index 7d6a66b8ac..5c042c332a 100644 ---- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_tcr_autogen_pass.glsl -+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_tcr_autogen_pass.glsl -@@ -40,6 +40,10 @@ - #define FSR2_BIND_CB_FSR2 11 - #define FSR2_BIND_CB_REACTIVE 12 - -+#if FFX_FSR2_OPTION_GODOT_DERIVE_INVALID_MOTION_VECTORS -+#define FSR2_BIND_SRV_INPUT_DEPTH 13 -+#endif -+ - #include "ffx_fsr2_callbacks_glsl.h" - #include "ffx_fsr2_common.h" - diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_compute_luminance_pyramid_pass.glsl b/thirdparty/amd-fsr2/shaders/ffx_fsr2_compute_luminance_pyramid_pass.glsl deleted file mode 100644 index 40232155c32f..000000000000 --- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_compute_luminance_pyramid_pass.glsl +++ /dev/null @@ -1,134 +0,0 @@ -// This file is part of the FidelityFX SDK. -// -// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -//#version 450 - -#extension GL_GOOGLE_include_directive : require -#extension GL_EXT_samplerless_texture_functions : require - -#define FSR2_BIND_SRV_INPUT_COLOR 0 -#define FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC 1 -#define FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE 2 -#define FSR2_BIND_UAV_EXPOSURE_MIP_5 3 -#define FSR2_BIND_UAV_AUTO_EXPOSURE 4 -#define FSR2_BIND_CB_FSR2 5 -#define FSR2_BIND_CB_SPD 6 - -#include "ffx_fsr2_callbacks_glsl.h" -#include "ffx_fsr2_common.h" - -#if defined(FSR2_BIND_CB_SPD) - layout (set = 1, binding = FSR2_BIND_CB_SPD, std140) uniform cbSPD_t - { - uint mips; - uint numWorkGroups; - uvec2 workGroupOffset; - uvec2 renderSize; - } cbSPD; - - uint MipCount() - { - return cbSPD.mips; - } - - uint NumWorkGroups() - { - return cbSPD.numWorkGroups; - } - - uvec2 WorkGroupOffset() - { - return cbSPD.workGroupOffset; - } - - uvec2 SPD_RenderSize() - { - return cbSPD.renderSize; - } -#endif - -vec2 SPD_LoadExposureBuffer() -{ - return imageLoad(rw_auto_exposure, ivec2(0,0)).xy; -} - -void SPD_SetExposureBuffer(vec2 value) -{ - imageStore(rw_auto_exposure, ivec2(0,0), vec4(value, 0.0f, 0.0f)); -} - -vec4 SPD_LoadMipmap5(ivec2 iPxPos) -{ - return vec4(imageLoad(rw_img_mip_5, iPxPos).x, 0.0f, 0.0f, 0.0f); -} - -void SPD_SetMipmap(ivec2 iPxPos, uint slice, float value) -{ - switch (slice) - { - case FFX_FSR2_SHADING_CHANGE_MIP_LEVEL: - imageStore(rw_img_mip_shading_change, iPxPos, vec4(value, 0.0f, 0.0f, 0.0f)); - break; - case 5: - imageStore(rw_img_mip_5, iPxPos, vec4(value, 0.0f, 0.0f, 0.0f)); - break; - default: - - // avoid flattened side effect -#if defined(FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE) - imageStore(rw_img_mip_shading_change, iPxPos, vec4(imageLoad(rw_img_mip_shading_change, iPxPos).x, 0.0f, 0.0f, 0.0f)); -#elif defined(FSR2_BIND_UAV_EXPOSURE_MIP_5) - imageStore(rw_img_mip_5, iPxPos, vec4(imageLoad(rw_img_mip_5, iPxPos).x, 0.0f, 0.0f, 0.0f)); -#endif - break; - } -} - -void SPD_IncreaseAtomicCounter(inout uint spdCounter) -{ - spdCounter = imageAtomicAdd(rw_spd_global_atomic, ivec2(0,0), 1); -} - -void SPD_ResetAtomicCounter() -{ - imageStore(rw_spd_global_atomic, ivec2(0,0), uvec4(0)); -} - -#include "ffx_fsr2_compute_luminance_pyramid.h" - -#ifndef FFX_FSR2_THREAD_GROUP_WIDTH -#define FFX_FSR2_THREAD_GROUP_WIDTH 256 -#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH -#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT -#define FFX_FSR2_THREAD_GROUP_HEIGHT 1 -#endif // #ifndef FFX_FSR2_THREAD_GROUP_HEIGHT -#ifndef FFX_FSR2_THREAD_GROUP_DEPTH -#define FFX_FSR2_THREAD_GROUP_DEPTH 1 -#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH -#ifndef FFX_FSR2_NUM_THREADS -#define FFX_FSR2_NUM_THREADS layout (local_size_x = FFX_FSR2_THREAD_GROUP_WIDTH, local_size_y = FFX_FSR2_THREAD_GROUP_HEIGHT, local_size_z = FFX_FSR2_THREAD_GROUP_DEPTH) in; -#endif // #ifndef FFX_FSR2_NUM_THREADS - -FFX_FSR2_NUM_THREADS -void main() -{ - ComputeAutoExposure(gl_WorkGroupID.xyz, gl_LocalInvocationIndex); -} \ No newline at end of file diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_force16_begin.h b/thirdparty/amd-fsr2/shaders/ffx_fsr2_force16_begin.h deleted file mode 100644 index 3bd4d5d91290..000000000000 --- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_force16_begin.h +++ /dev/null @@ -1 +0,0 @@ -// This file doesn't exist in this version of FSR. \ No newline at end of file diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_force16_end.h b/thirdparty/amd-fsr2/shaders/ffx_fsr2_force16_end.h deleted file mode 100644 index 3bd4d5d91290..000000000000 --- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_force16_end.h +++ /dev/null @@ -1 +0,0 @@ -// This file doesn't exist in this version of FSR. \ No newline at end of file diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_rcas.h b/thirdparty/amd-fsr2/shaders/ffx_fsr2_rcas.h deleted file mode 100644 index d9006cd8ee00..000000000000 --- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_rcas.h +++ /dev/null @@ -1,67 +0,0 @@ -// This file is part of the FidelityFX SDK. -// -// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#define GROUP_SIZE 8 - -#define FSR_RCAS_DENOISE 1 - -void WriteUpscaledOutput(FFX_MIN16_U2 iPxHrPos, FfxFloat32x3 fUpscaledColor) -{ - StoreUpscaledOutput(FFX_MIN16_I2(iPxHrPos), fUpscaledColor); -} - -#define FSR_RCAS_F -FfxFloat32x4 FsrRcasLoadF(FfxInt32x2 p) -{ - FfxFloat32x4 fColor = LoadRCAS_Input(p); - - fColor.rgb = PrepareRgb(fColor.rgb, Exposure(), PreExposure()); - - return fColor; -} - -void FsrRcasInputF(inout FfxFloat32 r, inout FfxFloat32 g, inout FfxFloat32 b) {} - -#include "ffx_fsr1.h" - - -void CurrFilter(FFX_MIN16_U2 pos) -{ - FfxFloat32x3 c; - FsrRcasF(c.r, c.g, c.b, pos, RCASConfig()); - - c = UnprepareRgb(c, Exposure()); - - WriteUpscaledOutput(pos, c); -} - -void RCAS(FfxUInt32x3 LocalThreadId, FfxUInt32x3 WorkGroupId, FfxUInt32x3 Dtid) -{ - // Do remapping of local xy in workgroup for a more PS-like swizzle pattern. - FfxUInt32x2 gxy = ffxRemapForQuad(LocalThreadId.x) + FfxUInt32x2(WorkGroupId.x << 4u, WorkGroupId.y << 4u); - CurrFilter(FFX_MIN16_U2(gxy)); - gxy.x += 8u; - CurrFilter(FFX_MIN16_U2(gxy)); - gxy.y += 8u; - CurrFilter(FFX_MIN16_U2(gxy)); - gxy.x -= 8u; - CurrFilter(FFX_MIN16_U2(gxy)); -} diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_upsample.h b/thirdparty/amd-fsr2/shaders/ffx_fsr2_upsample.h deleted file mode 100644 index abdb8888a9a4..000000000000 --- a/thirdparty/amd-fsr2/shaders/ffx_fsr2_upsample.h +++ /dev/null @@ -1,194 +0,0 @@ -// This file is part of the FidelityFX SDK. -// -// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -#ifndef FFX_FSR2_UPSAMPLE_H -#define FFX_FSR2_UPSAMPLE_H - -FFX_STATIC const FfxUInt32 iLanczos2SampleCount = 16; - -void Deringing(RectificationBox clippingBox, FFX_PARAMETER_INOUT FfxFloat32x3 fColor) -{ - fColor = clamp(fColor, clippingBox.aabbMin, clippingBox.aabbMax); -} -#if FFX_HALF -void Deringing(RectificationBoxMin16 clippingBox, FFX_PARAMETER_INOUT FFX_MIN16_F3 fColor) -{ - fColor = clamp(fColor, clippingBox.aabbMin, clippingBox.aabbMax); -} -#endif - -#ifndef FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE -#define FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE 2 // Approximate -#endif - -FfxFloat32 GetUpsampleLanczosWeight(FfxFloat32x2 fSrcSampleOffset, FfxFloat32 fKernelWeight) -{ - FfxFloat32x2 fSrcSampleOffsetBiased = fSrcSampleOffset * fKernelWeight.xx; -#if FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 0 // LANCZOS_TYPE_REFERENCE - FfxFloat32 fSampleWeight = Lanczos2(length(fSrcSampleOffsetBiased)); -#elif FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 1 // LANCZOS_TYPE_LUT - FfxFloat32 fSampleWeight = Lanczos2_UseLUT(length(fSrcSampleOffsetBiased)); -#elif FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 2 // LANCZOS_TYPE_APPROXIMATE - FfxFloat32 fSampleWeight = Lanczos2ApproxSq(dot(fSrcSampleOffsetBiased, fSrcSampleOffsetBiased)); -#else -#error "Invalid Lanczos type" -#endif - return fSampleWeight; -} - -#if FFX_HALF -FFX_MIN16_F GetUpsampleLanczosWeight(FFX_MIN16_F2 fSrcSampleOffset, FFX_MIN16_F fKernelWeight) -{ - FFX_MIN16_F2 fSrcSampleOffsetBiased = fSrcSampleOffset * fKernelWeight.xx; -#if FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 0 // LANCZOS_TYPE_REFERENCE - FFX_MIN16_F fSampleWeight = Lanczos2(length(fSrcSampleOffsetBiased)); -#elif FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 1 // LANCZOS_TYPE_LUT - FFX_MIN16_F fSampleWeight = Lanczos2_UseLUT(length(fSrcSampleOffsetBiased)); -#elif FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 2 // LANCZOS_TYPE_APPROXIMATE - FFX_MIN16_F fSampleWeight = Lanczos2ApproxSq(dot(fSrcSampleOffsetBiased, fSrcSampleOffsetBiased)); - - // To Test: Save reciproqual sqrt compute - // FfxFloat32 fSampleWeight = Lanczos2Sq_UseLUT(dot(fSrcSampleOffsetBiased, fSrcSampleOffsetBiased)); -#else -#error "Invalid Lanczos type" -#endif - return fSampleWeight; -} -#endif - -FfxFloat32 ComputeMaxKernelWeight() { - const FfxFloat32 fKernelSizeBias = 1.0f; - - FfxFloat32 fKernelWeight = FfxFloat32(1) + (FfxFloat32(1.0f) / FfxFloat32x2(DownscaleFactor()) - FfxFloat32(1)).x * FfxFloat32(fKernelSizeBias); - - return ffxMin(FfxFloat32(1.99f), fKernelWeight); -} - -FfxFloat32x4 ComputeUpsampledColorAndWeight(const AccumulationPassCommonParams params, - FFX_PARAMETER_INOUT RectificationBox clippingBox, FfxFloat32 fReactiveFactor) -{ - #if FFX_FSR2_OPTION_UPSAMPLE_SAMPLERS_USE_DATA_HALF && FFX_HALF - #include "ffx_fsr2_force16_begin.h" - #endif - // We compute a sliced lanczos filter with 2 lobes (other slices are accumulated temporaly) - FfxFloat32x2 fDstOutputPos = FfxFloat32x2(params.iPxHrPos) + FFX_BROADCAST_FLOAT32X2(0.5f); // Destination resolution output pixel center position - FfxFloat32x2 fSrcOutputPos = fDstOutputPos * DownscaleFactor(); // Source resolution output pixel center position - FfxInt32x2 iSrcInputPos = FfxInt32x2(floor(fSrcOutputPos)); // TODO: what about weird upscale factors... - - #if FFX_FSR2_OPTION_UPSAMPLE_SAMPLERS_USE_DATA_HALF && FFX_HALF - #include "ffx_fsr2_force16_end.h" - #endif - - FfxFloat32x3 fSamples[iLanczos2SampleCount]; - - FfxFloat32x2 fSrcUnjitteredPos = (FfxFloat32x2(iSrcInputPos) + FfxFloat32x2(0.5f, 0.5f)) - Jitter(); // This is the un-jittered position of the sample at offset 0,0 - - FfxInt32x2 offsetTL; - offsetTL.x = (fSrcUnjitteredPos.x > fSrcOutputPos.x) ? FfxInt32(-2) : FfxInt32(-1); - offsetTL.y = (fSrcUnjitteredPos.y > fSrcOutputPos.y) ? FfxInt32(-2) : FfxInt32(-1); - - //Load samples - // If fSrcUnjitteredPos.y > fSrcOutputPos.y, indicates offsetTL.y = -2, sample offset Y will be [-2, 1], clipbox will be rows [1, 3]. - // Flip row# for sampling offset in this case, so first 0~2 rows in the sampled array can always be used for computing the clipbox. - // This reduces branch or cmove on sampled colors, but moving this overhead to sample position / weight calculation time which apply to less values. - const FfxBoolean bFlipRow = fSrcUnjitteredPos.y > fSrcOutputPos.y; - const FfxBoolean bFlipCol = fSrcUnjitteredPos.x > fSrcOutputPos.x; - - FfxFloat32x2 fOffsetTL = FfxFloat32x2(offsetTL); - - FFX_UNROLL - for (FfxInt32 row = 0; row < 3; row++) { - - FFX_UNROLL - for (FfxInt32 col = 0; col < 3; col++) { - FfxInt32 iSampleIndex = col + (row << 2); - - FfxInt32x2 sampleColRow = FfxInt32x2(bFlipCol ? (3 - col) : col, bFlipRow ? (3 - row) : row); - FfxInt32x2 iSrcSamplePos = FfxInt32x2(iSrcInputPos) + offsetTL + sampleColRow; - - const FfxInt32x2 sampleCoord = ClampLoad(iSrcSamplePos, FfxInt32x2(0, 0), FfxInt32x2(RenderSize())); - - fSamples[iSampleIndex] = LoadPreparedInputColor(FfxInt32x2(sampleCoord)); - } - } - - FfxFloat32x4 fColorAndWeight = FfxFloat32x4(0.0f, 0.0f, 0.0f, 0.0f); - - FfxFloat32x2 fBaseSampleOffset = FfxFloat32x2(fSrcUnjitteredPos - fSrcOutputPos); - - // Identify how much of each upsampled color to be used for this frame - const FfxFloat32 fKernelReactiveFactor = ffxMax(fReactiveFactor, FfxFloat32(params.bIsNewSample)); - const FfxFloat32 fKernelBiasMax = ComputeMaxKernelWeight() * (1.0f - fKernelReactiveFactor); - - const FfxFloat32 fKernelBiasMin = ffxMax(1.0f, ((1.0f + fKernelBiasMax) * 0.3f)); - const FfxFloat32 fKernelBiasFactor = ffxMax(0.0f, ffxMax(0.25f * params.fDepthClipFactor, fKernelReactiveFactor)); - const FfxFloat32 fKernelBias = ffxLerp(fKernelBiasMax, fKernelBiasMin, fKernelBiasFactor); - - const FfxFloat32 fRectificationCurveBias = ffxLerp(-2.0f, -3.0f, ffxSaturate(params.fHrVelocity / 50.0f)); - - FFX_UNROLL - for (FfxInt32 row = 0; row < 3; row++) { - FFX_UNROLL - for (FfxInt32 col = 0; col < 3; col++) { - FfxInt32 iSampleIndex = col + (row << 2); - - const FfxInt32x2 sampleColRow = FfxInt32x2(bFlipCol ? (3 - col) : col, bFlipRow ? (3 - row) : row); - const FfxFloat32x2 fOffset = fOffsetTL + FfxFloat32x2(sampleColRow); - FfxFloat32x2 fSrcSampleOffset = fBaseSampleOffset + fOffset; - - FfxInt32x2 iSrcSamplePos = FfxInt32x2(iSrcInputPos) + FfxInt32x2(offsetTL) + sampleColRow; - - const FfxFloat32 fOnScreenFactor = FfxFloat32(IsOnScreen(FfxInt32x2(iSrcSamplePos), FfxInt32x2(RenderSize()))); - FfxFloat32 fSampleWeight = fOnScreenFactor * FfxFloat32(GetUpsampleLanczosWeight(fSrcSampleOffset, fKernelBias)); - - fColorAndWeight += FfxFloat32x4(fSamples[iSampleIndex] * fSampleWeight, fSampleWeight); - - // Update rectification box - { - const FfxFloat32 fSrcSampleOffsetSq = dot(fSrcSampleOffset, fSrcSampleOffset); - const FfxFloat32 fBoxSampleWeight = exp(fRectificationCurveBias * fSrcSampleOffsetSq); - - const FfxBoolean bInitialSample = (row == 0) && (col == 0); - RectificationBoxAddSample(bInitialSample, clippingBox, fSamples[iSampleIndex], fBoxSampleWeight); - } - } - } - - RectificationBoxComputeVarianceBoxData(clippingBox); - - fColorAndWeight.w *= FfxFloat32(fColorAndWeight.w > FSR2_EPSILON); - - if (fColorAndWeight.w > FSR2_EPSILON) { - // Normalize for deringing (we need to compare colors) - fColorAndWeight.xyz = fColorAndWeight.xyz / fColorAndWeight.w; - fColorAndWeight.w *= fUpsampleLanczosWeightScale; - - Deringing(clippingBox, fColorAndWeight.xyz); - } - - #if FFX_FSR2_OPTION_UPSAMPLE_SAMPLERS_USE_DATA_HALF && FFX_HALF - #include "ffx_fsr2_force16_end.h" - #endif - - return fColorAndWeight; -} - -#endif //!defined( FFX_FSR2_UPSAMPLE_H )