Skip to content

Commit db686c9

Browse files
authored
Merge pull request #114746 from akien-mga/4.5-cherrypicks-rendering
[4.5] Cherry-picks for the 4.5 branch (future 4.5.2) - 1st batch (rendering)
2 parents 1bad148 + 23e3457 commit db686c9

56 files changed

Lines changed: 474 additions & 276 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

drivers/d3d12/rendering_device_driver_d3d12.cpp

Lines changed: 7 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1573,12 +1573,6 @@ RDD::TextureID RenderingDeviceDriverD3D12::texture_create(const TextureFormat &p
15731573
tex_info->view_descs.srv = srv_desc;
15741574
tex_info->view_descs.uav = uav_desc;
15751575

1576-
if (!barrier_capabilities.enhanced_barriers_supported && (p_format.usage_bits & (TEXTURE_USAGE_STORAGE_BIT | TEXTURE_USAGE_COLOR_ATTACHMENT_BIT))) {
1577-
// Fallback to clear resources when they're first used in a uniform set. Not necessary if enhanced barriers
1578-
// are supported, as the discard flag will be used instead when transitioning from an undefined layout.
1579-
textures_pending_clear.add(&tex_info->pending_clear);
1580-
}
1581-
15821576
return TextureID(tex_info);
15831577
}
15841578

@@ -3055,7 +3049,7 @@ D3D12_UNORDERED_ACCESS_VIEW_DESC RenderingDeviceDriverD3D12::_make_ranged_uav_fo
30553049
} break;
30563050
case D3D12_UAV_DIMENSION_TEXTURE3D: {
30573051
uav_desc.Texture3D.MipSlice = mip;
3058-
uav_desc.Texture3D.WSize >>= p_mipmap_offset;
3052+
uav_desc.Texture3D.WSize = MAX(uav_desc.Texture3D.WSize >> p_mipmap_offset, 1U);
30593053
} break;
30603054
default:
30613055
break;
@@ -3643,21 +3637,6 @@ void RenderingDeviceDriverD3D12::command_uniform_set_prepare_for_use(CommandBuff
36433637
return;
36443638
}
36453639

3646-
// Perform pending blackouts.
3647-
{
3648-
SelfList<TextureInfo> *E = textures_pending_clear.first();
3649-
while (E) {
3650-
TextureSubresourceRange subresources;
3651-
subresources.layer_count = E->self()->layers;
3652-
subresources.mipmap_count = E->self()->mipmaps;
3653-
command_clear_color_texture(p_cmd_buffer, TextureID(E->self()), TEXTURE_LAYOUT_UNDEFINED, Color(), subresources);
3654-
3655-
SelfList<TextureInfo> *next = E->next();
3656-
E->remove_from_list();
3657-
E = next;
3658-
}
3659-
}
3660-
36613640
CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id;
36623641
const UniformSetInfo *uniform_set_info = (const UniformSetInfo *)p_uniform_set.id;
36633642
const ShaderInfo *shader_info_in = (const ShaderInfo *)p_shader.id;
@@ -4571,8 +4550,7 @@ void RenderingDeviceDriverD3D12::command_begin_render_pass(CommandBufferID p_cmd
45714550
p_rect.position.y,
45724551
p_rect.position.x + p_rect.size.x,
45734552
p_rect.position.y + p_rect.size.y);
4574-
cmd_buf_info->render_pass_state.region_is_all = !(
4575-
cmd_buf_info->render_pass_state.region_rect.left == 0 &&
4553+
cmd_buf_info->render_pass_state.region_is_all = (cmd_buf_info->render_pass_state.region_rect.left == 0 &&
45764554
cmd_buf_info->render_pass_state.region_rect.top == 0 &&
45774555
cmd_buf_info->render_pass_state.region_rect.right == fb_info->size.x &&
45784556
cmd_buf_info->render_pass_state.region_rect.bottom == fb_info->size.y);
@@ -4616,7 +4594,6 @@ void RenderingDeviceDriverD3D12::command_begin_render_pass(CommandBufferID p_cmd
46164594
if (pass_info->attachments[i].load_op == ATTACHMENT_LOAD_OP_CLEAR) {
46174595
clear.aspect.set_flag(TEXTURE_ASPECT_COLOR_BIT);
46184596
clear.color_attachment = i;
4619-
tex_info->pending_clear.remove_from_list();
46204597
}
46214598
} else if ((tex_info->desc.Flags & D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)) {
46224599
if (pass_info->attachments[i].load_op == ATTACHMENT_LOAD_OP_CLEAR) {
@@ -5799,6 +5776,8 @@ uint64_t RenderingDeviceDriverD3D12::api_trait_get(ApiTrait p_trait) {
57995776
return true;
58005777
case API_TRAIT_BUFFERS_REQUIRE_TRANSITIONS:
58015778
return !barrier_capabilities.enhanced_barriers_supported;
5779+
case API_TRAIT_TEXTURE_OUTPUTS_REQUIRE_CLEARS:
5780+
return true;
58025781
default:
58035782
return RenderingDeviceDriver::api_trait_get(p_trait);
58045783
}
@@ -5807,7 +5786,7 @@ uint64_t RenderingDeviceDriverD3D12::api_trait_get(ApiTrait p_trait) {
58075786
bool RenderingDeviceDriverD3D12::has_feature(Features p_feature) {
58085787
switch (p_feature) {
58095788
case SUPPORTS_HALF_FLOAT:
5810-
return shader_capabilities.native_16bit_ops && storage_buffer_capabilities.storage_buffer_16_bit_access_is_supported;
5789+
return shader_capabilities.native_16bit_ops;
58115790
case SUPPORTS_FRAGMENT_SHADER_WITH_ONLY_SIDE_EFFECTS:
58125791
return true;
58135792
case SUPPORTS_BUFFER_DEVICE_ADDRESS:
@@ -6037,7 +6016,6 @@ Error RenderingDeviceDriverD3D12::_check_capabilities() {
60376016
subgroup_capabilities.wave_ops_supported = false;
60386017
shader_capabilities.shader_model = (D3D_SHADER_MODEL)0;
60396018
shader_capabilities.native_16bit_ops = false;
6040-
storage_buffer_capabilities.storage_buffer_16_bit_access_is_supported = false;
60416019
format_capabilities.relaxed_casting_supported = false;
60426020

60436021
{
@@ -6078,9 +6056,8 @@ Error RenderingDeviceDriverD3D12::_check_capabilities() {
60786056

60796057
D3D12_FEATURE_DATA_D3D12_OPTIONS options = {};
60806058
res = device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS, &options, sizeof(options));
6081-
if (SUCCEEDED(res)) {
6082-
storage_buffer_capabilities.storage_buffer_16_bit_access_is_supported = options.TypedUAVLoadAdditionalFormats;
6083-
}
6059+
ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), ERR_UNAVAILABLE, "CheckFeatureSupport failed with error " + vformat("0x%08ux", (uint64_t)res) + ".");
6060+
ERR_FAIL_COND_V_MSG(!options.TypedUAVLoadAdditionalFormats, ERR_UNAVAILABLE, "No support for Typed UAV Load Additional Formats has been found.");
60846061

60856062
D3D12_FEATURE_DATA_D3D12_OPTIONS1 options1 = {};
60866063
res = device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS1, &options1, sizeof(options1));

drivers/d3d12/rendering_device_driver_d3d12.h

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -114,10 +114,6 @@ class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver {
114114
bool native_16bit_ops = false;
115115
};
116116

117-
struct StorageBufferCapabilities {
118-
bool storage_buffer_16_bit_access_is_supported = false;
119-
};
120-
121117
struct FormatCapabilities {
122118
bool relaxed_casting_supported = false;
123119
};
@@ -143,7 +139,6 @@ class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver {
143139
FragmentShadingRateCapabilities fsr_capabilities;
144140
FragmentDensityMapCapabilities fdm_capabilities;
145141
ShaderCapabilities shader_capabilities;
146-
StorageBufferCapabilities storage_buffer_capabilities;
147142
FormatCapabilities format_capabilities;
148143
BarrierCapabilities barrier_capabilities;
149144
MiscFeaturesSupport misc_features_support;
@@ -360,12 +355,10 @@ class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver {
360355
TextureInfo *main_texture = nullptr;
361356

362357
UINT mapped_subresource = UINT_MAX;
363-
SelfList<TextureInfo> pending_clear{ this };
364358
#ifdef DEBUG_ENABLED
365359
bool created_from_extension = false;
366360
#endif
367361
};
368-
SelfList<TextureInfo>::List textures_pending_clear;
369362

370363
HashMap<DXGI_FORMAT, uint32_t> format_sample_counts_mask_cache;
371364
Mutex format_sample_counts_mask_cache_mutex;

drivers/d3d12/rendering_shader_container_d3d12.cpp

Lines changed: 16 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -99,28 +99,26 @@ uint32_t RenderingDXIL::patch_specialization_constant(
9999
const uint64_t (&p_stages_bit_offsets)[D3D12_BITCODE_OFFSETS_NUM_STAGES],
100100
HashMap<RenderingDeviceCommons::ShaderStage, Vector<uint8_t>> &r_stages_bytecodes,
101101
bool p_is_first_patch) {
102-
uint32_t patch_val = 0;
102+
int64_t patch_val = 0;
103103
switch (p_type) {
104104
case RenderingDeviceCommons::PIPELINE_SPECIALIZATION_CONSTANT_TYPE_INT: {
105-
uint32_t int_value = *((const int *)p_value);
106-
ERR_FAIL_COND_V(int_value & (1 << 31), 0);
107-
patch_val = int_value;
105+
patch_val = *((const int32_t *)p_value);
108106
} break;
109107
case RenderingDeviceCommons::PIPELINE_SPECIALIZATION_CONSTANT_TYPE_BOOL: {
110108
bool bool_value = *((const bool *)p_value);
111-
patch_val = (uint32_t)bool_value;
109+
patch_val = (int32_t)bool_value;
112110
} break;
113111
case RenderingDeviceCommons::PIPELINE_SPECIALIZATION_CONSTANT_TYPE_FLOAT: {
114-
uint32_t int_value = *((const int *)p_value);
115-
ERR_FAIL_COND_V(int_value & (1 << 31), 0);
116-
patch_val = (int_value >> 1);
112+
patch_val = *((const int32_t *)p_value);
117113
} break;
118114
}
119-
// For VBR encoding to encode the number of bits we expect (32), we need to set the MSB unconditionally.
120-
// However, signed VBR moves the MSB to the LSB, so setting the MSB to 1 wouldn't help. Therefore,
121-
// the bit we set to 1 is the one at index 30.
122-
patch_val |= (1 << 30);
123-
patch_val <<= 1; // What signed VBR does.
115+
116+
// Encode to signed VBR.
117+
if (patch_val >= 0) {
118+
patch_val <<= 1;
119+
} else {
120+
patch_val = ((-patch_val) << 1) | 1;
121+
}
124122

125123
auto tamper_bits = [](uint8_t *p_start, uint64_t p_bit_offset, uint64_t p_tb_value) -> uint64_t {
126124
uint64_t original = 0;
@@ -174,13 +172,13 @@ uint32_t RenderingDXIL::patch_specialization_constant(
174172

175173
Vector<uint8_t> &bytecode = r_stages_bytecodes[(RenderingDeviceCommons::ShaderStage)stage];
176174
#ifdef DEV_ENABLED
177-
uint64_t orig_patch_val = tamper_bits(bytecode.ptrw(), offset, patch_val);
175+
uint64_t orig_patch_val = tamper_bits(bytecode.ptrw(), offset, (uint64_t)patch_val);
178176
// Checking against the value the NIR patch should have set.
179177
DEV_ASSERT(!p_is_first_patch || ((orig_patch_val >> 1) & GODOT_NIR_SC_SENTINEL_MAGIC_MASK) == GODOT_NIR_SC_SENTINEL_MAGIC);
180-
uint64_t readback_patch_val = tamper_bits(bytecode.ptrw(), offset, patch_val);
181-
DEV_ASSERT(readback_patch_val == patch_val);
178+
uint64_t readback_patch_val = tamper_bits(bytecode.ptrw(), offset, (uint64_t)patch_val);
179+
DEV_ASSERT(readback_patch_val == (uint64_t)patch_val);
182180
#else
183-
tamper_bits(bytecode.ptrw(), offset, patch_val);
181+
tamper_bits(bytecode.ptrw(), offset, (uint64_t)patch_val);
184182
#endif
185183

186184
stages_patched_mask |= (1 << stage);
@@ -319,10 +317,6 @@ bool RenderingShaderContainerD3D12::_convert_spirv_to_nir(const Vector<Rendering
319317

320318
ERR_FAIL_NULL_V_MSG(shader, false, "Shader translation (step 1) at stage " + String(RenderingDeviceCommons::SHADER_STAGE_NAMES[stage]) + " failed.");
321319

322-
#ifdef DEV_ENABLED
323-
nir_validate_shader(shader, "Validate before feeding NIR to the DXIL compiler");
324-
#endif
325-
326320
if (stage == RenderingDeviceCommons::SHADER_STAGE_VERTEX) {
327321
dxil_runtime_conf.yz_flip.y_mask = 0xffff;
328322
dxil_runtime_conf.yz_flip.mode = DXIL_SPIRV_Y_FLIP_UNCONDITIONAL;
@@ -621,9 +615,7 @@ bool RenderingShaderContainerD3D12::_generate_root_signature(BitField<RenderingD
621615
D3D12_ROOT_SIGNATURE_FLAGS root_sig_flags =
622616
D3D12_ROOT_SIGNATURE_FLAG_DENY_HULL_SHADER_ROOT_ACCESS |
623617
D3D12_ROOT_SIGNATURE_FLAG_DENY_DOMAIN_SHADER_ROOT_ACCESS |
624-
D3D12_ROOT_SIGNATURE_FLAG_DENY_GEOMETRY_SHADER_ROOT_ACCESS |
625-
D3D12_ROOT_SIGNATURE_FLAG_DENY_AMPLIFICATION_SHADER_ROOT_ACCESS |
626-
D3D12_ROOT_SIGNATURE_FLAG_DENY_MESH_SHADER_ROOT_ACCESS;
618+
D3D12_ROOT_SIGNATURE_FLAG_DENY_GEOMETRY_SHADER_ROOT_ACCESS;
627619

628620
if (!p_stages_processed.has_flag(RenderingDeviceCommons::SHADER_STAGE_VERTEX_BIT)) {
629621
root_sig_flags |= D3D12_ROOT_SIGNATURE_FLAG_DENY_VERTEX_SHADER_ROOT_ACCESS;

drivers/gles3/rasterizer_gles3.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -410,7 +410,7 @@ void RasterizerGLES3::_blit_render_target_to_screen(DisplayServer::WindowID p_sc
410410
}
411411
#endif
412412

413-
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, GLES3::TextureStorage::system_fbo);
413+
glBindFramebuffer(GL_FRAMEBUFFER, GLES3::TextureStorage::system_fbo);
414414

415415
if (p_first) {
416416
if (p_blit.dst_rect.position != Vector2() || p_blit.dst_rect.size != rt->size) {

drivers/gles3/shaders/scene.glsl

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1022,12 +1022,6 @@ uniform highp mat4 world_transform;
10221022
uniform highp uint instance_offset;
10231023
uniform highp uint model_flags;
10241024

1025-
/* clang-format off */
1026-
1027-
#GLOBALS
1028-
1029-
/* clang-format on */
1030-
10311025
#define LIGHT_BAKE_DISABLED 0u
10321026
#define LIGHT_BAKE_STATIC 1u
10331027
#define LIGHT_BAKE_DYNAMIC 2u
@@ -1268,6 +1262,12 @@ layout(location = 0) out vec4 frag_color;
12681262

12691263
#endif // !RENDER_MATERIAL
12701264

1265+
/* clang-format off */
1266+
1267+
#GLOBALS
1268+
1269+
/* clang-format on */
1270+
12711271
vec3 F0(float metallic, float specular, vec3 albedo) {
12721272
float dielectric = 0.16 * specular * specular;
12731273
// use albedo * metallic as colored specular reflectance at 0 angle for metallic materials;

drivers/gles3/storage/config.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,7 @@ Config::Config() {
232232
//https://github.com/godotengine/godot/issues/92662#issuecomment-2161199477
233233
//disable_particles_workaround = false;
234234
}
235-
} else if (rendering_device_name == "PowerVR Rogue GE8320") {
235+
} else if (rendering_device_name.contains("PowerVR")) {
236236
disable_transform_feedback_shader_cache = true;
237237
}
238238

drivers/gles3/storage/light_storage.cpp

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -350,7 +350,14 @@ AABB LightStorage::light_get_aabb(RID p_light) const {
350350
switch (light->type) {
351351
case RS::LIGHT_SPOT: {
352352
float len = light->param[RS::LIGHT_PARAM_RANGE];
353-
float size = Math::tan(Math::deg_to_rad(light->param[RS::LIGHT_PARAM_SPOT_ANGLE])) * len;
353+
float angle = Math::deg_to_rad(light->param[RS::LIGHT_PARAM_SPOT_ANGLE]);
354+
355+
if (angle > Math::PI * 0.5) {
356+
// Light casts backwards as well.
357+
return AABB(Vector3(-1, -1, -1) * len, Vector3(2, 2, 2) * len);
358+
}
359+
360+
float size = Math::sin(angle) * len;
354361
return AABB(Vector3(-size, -size, -len), Vector3(size * 2, size * 2, len));
355362
};
356363
case RS::LIGHT_OMNI: {
@@ -807,6 +814,9 @@ bool LightStorage::reflection_probe_instance_begin_render(RID p_instance, RID p_
807814

808815
ERR_FAIL_NULL_V(atlas, false);
809816

817+
ERR_FAIL_COND_V_MSG(atlas->size < 4, false, "Attempted to render to a reflection atlas of invalid resolution.");
818+
ERR_FAIL_COND_V_MSG(atlas->count < 1, false, "Attempted to render to a reflection atlas of size < 1.");
819+
810820
ReflectionProbeInstance *rpi = reflection_probe_instance_owner.get_or_null(p_instance);
811821
ERR_FAIL_NULL_V(rpi, false);
812822

drivers/gles3/storage/material_storage.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2265,6 +2265,7 @@ void MaterialStorage::shader_set_code(RID p_shader, const String &p_code) {
22652265
}
22662266

22672267
if (shader->data) {
2268+
shader->data->set_path_hint(shader->path_hint);
22682269
shader->data->set_code(p_code);
22692270
}
22702271

drivers/gles3/storage/render_scene_buffers_gles3.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,10 @@ void RenderSceneBuffersGLES3::_check_render_buffers() {
200200
uint32_t depth_format_size = 4;
201201
bool use_multiview = view_count > 1;
202202

203+
if (!use_internal_buffer && internal3d.color != 0) {
204+
_clear_intermediate_buffers();
205+
}
206+
203207
if ((!use_internal_buffer || internal3d.color != 0) && (msaa3d.mode == RS::VIEWPORT_MSAA_DISABLED || msaa3d.color != 0)) {
204208
// already setup!
205209
return;

0 commit comments

Comments
 (0)