Skip to content

DX12 Copy Stuff #7706

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 9 commits into
base: trunk
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion tests/tests/wgpu-gpu/regression/issue_6827.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ static TEST_SCATTER: GpuTestConfiguration = GpuTestConfiguration::new()
// Unfortunately this depends on if `D3D12_FEATURE_DATA_D3D12_OPTIONS13.UnrestrictedBufferTextureCopyPitchSupported`
// is true, which we have no way to encode. This reproduces in CI though, so not too worried about it.
FailureCase::backend(wgpu::Backends::DX12)
.flaky()
.validation_error(
"D3D12_PLACED_SUBRESOURCE_FOOTPRINT::Offset must be a multiple of 512",
)
Expand Down
93 changes: 42 additions & 51 deletions wgpu-core/src/command/transfer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -235,96 +235,87 @@ pub(crate) fn validate_linear_texture_data(
copy_size: &Extent3d,
need_copy_aligned_rows: bool,
) -> Result<(BufferAddress, BufferAddress), TransferError> {
// Convert all inputs to BufferAddress (u64) to avoid some of the overflow issues
// Note: u64 is not always enough to prevent overflow, especially when multiplying
// something with a potentially large depth value, so it is preferable to validate
// the copy size before calling this function (for example via `validate_texture_copy_range`).
let copy_width = copy_size.width as BufferAddress;
let copy_height = copy_size.height as BufferAddress;
let depth_or_array_layers = copy_size.depth_or_array_layers as BufferAddress;

let offset = layout.offset;

let block_size = format.block_copy_size(Some(aspect)).unwrap() as BufferAddress;
let (block_width, block_height) = format.block_dimensions();
let block_width = block_width as BufferAddress;
let block_height = block_height as BufferAddress;

if copy_width % block_width != 0 {
let wgt::BufferTextureCopyInfo {
copy_width,
copy_height,
depth_or_array_layers,

offset,

block_size_bytes,
block_width_texels,
block_height_texels,

width_blocks: _,
height_blocks,

row_bytes_dense,
row_stride_bytes,

image_stride_rows: _,
image_stride_bytes,

image_rows_dense: _,
image_bytes_dense: _,

bytes_in_copy,
} = layout.get_buffer_texture_copy_info(format, aspect, copy_size);

if copy_width % block_width_texels != 0 {
return Err(TransferError::UnalignedCopyWidth);
}
if copy_height % block_height != 0 {
if copy_height % block_height_texels != 0 {
return Err(TransferError::UnalignedCopyHeight);
}

let width_in_blocks = copy_width / block_width;
let height_in_blocks = copy_height / block_height;

let bytes_in_last_row = width_in_blocks * block_size;

let bytes_per_row = if let Some(bytes_per_row) = layout.bytes_per_row {
let bytes_per_row = bytes_per_row as BufferAddress;
if bytes_per_row < bytes_in_last_row {
if let Some(raw_bytes_per_row) = layout.bytes_per_row {
let raw_bytes_per_row = raw_bytes_per_row as BufferAddress;
if raw_bytes_per_row < row_bytes_dense {
return Err(TransferError::InvalidBytesPerRow);
}
bytes_per_row
} else {
if depth_or_array_layers > 1 || height_in_blocks > 1 {
if depth_or_array_layers > 1 || height_blocks > 1 {
return Err(TransferError::UnspecifiedBytesPerRow);
}
0
};
let rows_per_image = if let Some(rows_per_image) = layout.rows_per_image {
let rows_per_image = rows_per_image as BufferAddress;
if rows_per_image < height_in_blocks {
}

if let Some(raw_rows_per_image) = layout.rows_per_image {
let raw_rows_per_image = raw_rows_per_image as BufferAddress;
if raw_rows_per_image < height_blocks {
return Err(TransferError::InvalidRowsPerImage);
}
rows_per_image
} else {
if depth_or_array_layers > 1 {
return Err(TransferError::UnspecifiedRowsPerImage);
}
0
};

if need_copy_aligned_rows {
let bytes_per_row_alignment = wgt::COPY_BYTES_PER_ROW_ALIGNMENT as BufferAddress;

let mut offset_alignment = block_size;
let mut offset_alignment = block_size_bytes;
if format.is_depth_stencil_format() {
offset_alignment = 4
}
if offset % offset_alignment != 0 {
return Err(TransferError::UnalignedBufferOffset(offset));
}

if bytes_per_row % bytes_per_row_alignment != 0 {
if row_stride_bytes % bytes_per_row_alignment != 0 {
return Err(TransferError::UnalignedBytesPerRow);
}
}

let bytes_per_image = bytes_per_row * rows_per_image;

let required_bytes_in_copy = if depth_or_array_layers == 0 {
0
} else {
let mut required_bytes_in_copy = bytes_per_image * (depth_or_array_layers - 1);
if height_in_blocks > 0 {
required_bytes_in_copy += bytes_per_row * (height_in_blocks - 1) + bytes_in_last_row;
}
required_bytes_in_copy
};

if offset + required_bytes_in_copy > buffer_size {
if offset + bytes_in_copy > buffer_size {
return Err(TransferError::BufferOverrun {
start_offset: offset,
end_offset: offset + required_bytes_in_copy,
end_offset: offset + bytes_in_copy,
buffer_size,
side: buffer_side,
});
}

Ok((required_bytes_in_copy, bytes_per_image))
Ok((bytes_in_copy, image_stride_bytes))
}

/// WebGPU's [validating texture copy range][vtcr] algorithm.
Expand Down
17 changes: 17 additions & 0 deletions wgpu-hal/src/dx12/adapter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,22 @@ impl super::Adapter {
.is_ok()
};

let unrestricted_buffer_texture_copy_pitch = {
let mut features13 = Direct3D12::D3D12_FEATURE_DATA_D3D12_OPTIONS13::default();
let res = unsafe {
device.CheckFeatureSupport(
Direct3D12::D3D12_FEATURE_D3D12_OPTIONS13,
<*mut _>::cast(&mut features13),
size_of_val(&features13) as u32,
)
}
.is_ok();

res && features13
.UnrestrictedBufferTextureCopyPitchSupported
.as_bool()
};

let mut max_sampler_descriptor_heap_size =
Direct3D12::D3D12_MAX_SHADER_VISIBLE_SAMPLER_HEAP_SIZE;
{
Expand Down Expand Up @@ -299,6 +315,7 @@ impl super::Adapter {
suballocation_supported: !info.name.contains("Iris(R) Xe"),
shader_model,
max_sampler_descriptor_heap_size,
unrestricted_buffer_texture_copy_pitch,
};

// Theoretically vram limited, but in practice 2^20 is the limit
Expand Down
26 changes: 26 additions & 0 deletions wgpu-hal/src/dx12/command.rs
Original file line number Diff line number Diff line change
Expand Up @@ -621,6 +621,19 @@ impl crate::CommandEncoder for super::CommandEncoder {
},
};

{
let offset = unsafe { src_location.Anonymous.PlacedFootprint.Offset };
let remainder = offset % Direct3D12::D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT as u64;

assert_eq!(
remainder,
0,
"D3D12_PLACED_SUBRESOURCE_FOOTPRINT::Offset must be a multiple of 512. Was {}. Remainder is {}",
offset,
remainder
);
};

let src_box = make_box(&wgt::Origin3d::ZERO, &r.size);
unsafe {
list.CopyTextureRegion(
Expand Down Expand Up @@ -661,6 +674,19 @@ impl crate::CommandEncoder for super::CommandEncoder {
},
};

{
let offset = unsafe { dst_location.Anonymous.PlacedFootprint.Offset };
let remainder = offset % Direct3D12::D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT as u64;

assert_eq!(
remainder,
0,
"D3D12_PLACED_SUBRESOURCE_FOOTPRINT::Offset must be a multiple of 512. Was {}. Remainder is {}",
offset,
remainder
);
};

let src_box = make_box(&r.texture_base.origin, &r.size);
unsafe {
list.CopyTextureRegion(&dst_location, 0, 0, 0, &src_location, Some(&src_box))
Expand Down
16 changes: 16 additions & 0 deletions wgpu-hal/src/dx12/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ mod instance;
mod sampler;
mod shader_compilation;
mod suballocation;
mod texture_copies;
mod types;
mod view;

Expand Down Expand Up @@ -573,6 +574,7 @@ struct PrivateCapabilities {
suballocation_supported: bool,
shader_model: naga::back::hlsl::ShaderModel,
max_sampler_descriptor_heap_size: u32,
unrestricted_buffer_texture_copy_pitch: bool,
}

#[derive(Default)]
Expand Down Expand Up @@ -841,6 +843,20 @@ pub struct Buffer {
allocation: suballocation::Allocation,
}

impl PartialEq for Buffer {
fn eq(&self, other: &Self) -> bool {
self.resource == other.resource
}
}

impl Eq for Buffer {}

impl std::hash::Hash for Buffer {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
self.resource.as_raw().hash(state);
}
}

unsafe impl Send for Buffer {}
unsafe impl Sync for Buffer {}

Expand Down
Loading