Skip to content

Fix initialization tracking for the destination of copyTextureToBuffer #8099

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: trunk
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -93,12 +93,14 @@ This allows using precompiled shaders without manually checking which backend's
- Copies within the same texture must not overlap.
- Copies of multisampled or depth/stencil formats must span an entire subresource (layer).
- Copies of depth/stencil formats must be 4B aligned.
- For texture-buffer copies, `bytes_per_row` on the buffer side must be 256B-aligned, even if the transfer is a single row.
- The offset for `set_vertex_buffer` and `set_index_buffer` must be 4B aligned. By @andyleiserson in [#7929](https://github.com/gfx-rs/wgpu/pull/7929).
- The offset and size of bindings are validated as fitting within the underlying buffer in more cases. By @andyleiserson in [#7911](https://github.com/gfx-rs/wgpu/pull/7911).
- The function you pass to `Device::on_uncaptured_error()` must now implement `Sync` in addition to `Send`, and be wrapped in `Arc` instead of `Box`.
In exchange for this, it is no longer possible for calling `wgpu` functions while in that callback to cause a deadlock (not that we encourage you to actually do that).
By @kpreid in [#8011](https://github.com/gfx-rs/wgpu/pull/8011).
- The limits requested for a device must now satisfy `min_subgroup_size <= max_subgroup_size`. By @andyleiserson in [#8085](https://github.com/gfx-rs/wgpu/pull/8085).
- Track the initialization status of buffer memory correctly when `copy_texture_to_buffer` skips over padding space between rows or layers, or when the start/end of a texture-buffer transfer is not 4B aligned. By @andyleiserson in [#8099](https://github.com/gfx-rs/wgpu/pull/8099).

#### Naga

Expand Down
19 changes: 11 additions & 8 deletions cts_runner/test.lst
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,9 @@ webgpu:api,operation,compute,basic:memcpy:*
//FAIL: webgpu:api,operation,compute,basic:large_dispatch:*
webgpu:api,operation,compute_pipeline,overrides:*
webgpu:api,operation,device,lost:*
// This is all the storeOp tests, but some of them fail if run in a single invocation.
// https://github.com/gfx-rs/wgpu/issues/8021
webgpu:api,operation,render_pass,storeOp:render_pass_store_op,color_attachment_with_depth_stencil_attachment:*
webgpu:api,operation,render_pass,storeOp:render_pass_store_op,color_attachment_only:*
webgpu:api,operation,render_pass,storeOp:render_pass_store_op,multiple_color_attachments:*
webgpu:api,operation,render_pass,storeOp:render_pass_store_op,depth_stencil_attachment_only:*
webgpu:api,operation,render_pass,storeOp:*
// Presumably vertex pulling, revisit after https://github.com/gfx-rs/wgpu/issues/7981 is fixed.
fails-if(metal) webgpu:api,operation,vertex_state,correctness:setVertexBuffer_offset_and_attribute_offset:*
fails-if(dx12) webgpu:api,validation,capability_checks,limits,maxBindGroups:setBindGroup,*
webgpu:api,validation,createBindGroup:buffer,effective_buffer_binding_size:*
webgpu:api,validation,encoding,beginComputePass:*
Expand Down Expand Up @@ -60,14 +57,20 @@ webgpu:api,validation,encoding,programmable,pipeline_bind_group_compat:sampler_b
webgpu:api,validation,encoding,queries,general:occlusion_query,query_index:*
webgpu:api,validation,image_copy,buffer_texture_copies:depth_stencil_format,copy_usage_and_aspect:*
webgpu:api,validation,image_copy,buffer_texture_copies:depth_stencil_format,copy_buffer_size:*
fails-if(dx12) webgpu:api,validation,image_copy,buffer_texture_copies:depth_stencil_format,copy_buffer_offset:format="stencil8";aspect="stencil-only";copyType="CopyB2T"
fails-if(dx12) webgpu:api,validation,image_copy,buffer_texture_copies:depth_stencil_format,copy_buffer_offset:format="stencil8";aspect="stencil-only";copyType="CopyT2B"
fails-if(dx12) webgpu:api,validation,image_copy,buffer_texture_copies:depth_stencil_format,copy_buffer_offset:format="depth32float";aspect="depth-only";copyType="CopyT2B"
fails-if(dx12) webgpu:api,validation,image_copy,buffer_texture_copies:depth_stencil_format,copy_buffer_offset:format="depth24plus-stencil8";aspect="stencil-only";copyType="CopyB2T"
fails-if(dx12) webgpu:api,validation,image_copy,buffer_texture_copies:depth_stencil_format,copy_buffer_offset:format="depth24plus-stencil8";aspect="stencil-only";copyType="CopyT2B"
//mix of PASS and FAIL: other subtests of copy_buffer_offset. Related bugs:
// https://github.com/gfx-rs/wgpu/issues/7946, https://github.com/gfx-rs/wgpu/issues/7947
// https://github.com/gfx-rs/wgpu/issues/7946
webgpu:api,validation,image_copy,buffer_texture_copies:sample_count:*
webgpu:api,validation,image_copy,buffer_texture_copies:texture_buffer_usages:*
webgpu:api,validation,image_copy,buffer_texture_copies:device_mismatch:*
fails-if(dx12) webgpu:api,validation,image_copy,buffer_texture_copies:offset_and_bytesPerRow:format="r8uint";copyType="CopyB2T";dimension="1d"
fails-if(dx12) webgpu:api,validation,image_copy,buffer_texture_copies:offset_and_bytesPerRow:format="r8uint";copyType="CopyT2B";dimension="1d"
fails-if(dx12) webgpu:api,validation,image_copy,buffer_texture_copies:offset_and_bytesPerRow:format="rgba32float";copyType="CopyB2T";dimension="1d"
fails-if(dx12) webgpu:api,validation,image_copy,buffer_texture_copies:offset_and_bytesPerRow:format="rgba32float";copyType="CopyT2B";dimension="1d"
fails-if(dx12) webgpu:api,validation,image_copy,buffer_texture_copies:offset_and_bytesPerRow:format="rgba8unorm";copyType="CopyB2T";dimension="2d"
fails-if(dx12) webgpu:api,validation,image_copy,buffer_texture_copies:offset_and_bytesPerRow:format="rgba8unorm";copyType="CopyT2B";dimension="2d"
fails-if(dx12) webgpu:api,validation,image_copy,buffer_texture_copies:offset_and_bytesPerRow:format="rgba8unorm";copyType="CopyB2T";dimension="3d"
Expand All @@ -79,7 +82,7 @@ fails-if(dx12) webgpu:api,validation,image_copy,buffer_texture_copies:offset_and
fails-if(dx12) webgpu:api,validation,image_copy,buffer_texture_copies:offset_and_bytesPerRow:format="astc-4x4-unorm";copyType="CopyT2B";dimension="3d"
fails-if(dx12) webgpu:api,validation,image_copy,buffer_texture_copies:offset_and_bytesPerRow:format="astc-4x4-unorm";copyType="CopyB2T";dimension="3d"
//mix of PASS and FAIL: other subtests of offset_and_bytesPerRow. Related bugs:
// https://github.com/gfx-rs/wgpu/issues/7946, https://github.com/gfx-rs/wgpu/issues/7947
// https://github.com/gfx-rs/wgpu/issues/7946
webgpu:api,validation,image_copy,layout_related:copy_end_overflows_u64:*
// Fails with OOM in CI.
fails-if(dx12) webgpu:api,validation,image_copy,layout_related:offset_alignment:*
Expand Down
184 changes: 134 additions & 50 deletions wgpu-core/src/command/transfer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ use crate::{
TextureInitTrackerAction,
},
resource::{
MissingBufferUsageError, MissingTextureUsageError, ParentDevice, RawResourceAccess,
Buffer, MissingBufferUsageError, MissingTextureUsageError, ParentDevice, RawResourceAccess,
Texture, TextureErrorDimension,
},
snatch::SnatchGuard,
Expand All @@ -34,7 +34,7 @@ pub type TexelCopyBufferInfo = wgt::TexelCopyBufferInfo<BufferId>;
pub type TexelCopyTextureInfo = wgt::TexelCopyTextureInfo<TextureId>;
pub type CopyExternalImageDestInfo = wgt::CopyExternalImageDestInfo<TextureId>;

#[derive(Clone, Copy, Debug)]
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum CopySide {
Source,
Destination,
Expand Down Expand Up @@ -271,9 +271,11 @@ pub(crate) fn extract_texture_selector<T>(
///
/// Copied with some modifications from WebGPU standard.
///
/// If successful, returns a pair `(bytes, stride)`, where:
/// If successful, returns a tuple `(bytes, stride, is_contiguous)`, where:
/// - `bytes` is the number of buffer bytes required for this copy, and
/// - `stride` number of bytes between array layers.
/// - `is_contiguous` is true if the linear texture data does not have padding
/// between rows or between images.
///
/// [vltd]: https://gpuweb.github.io/gpuweb/#abstract-opdef-validating-linear-texture-data
pub(crate) fn validate_linear_texture_data(
Expand All @@ -283,30 +285,29 @@ pub(crate) fn validate_linear_texture_data(
buffer_size: BufferAddress,
buffer_side: CopySide,
copy_size: &Extent3d,
need_copy_aligned_rows: bool,
) -> Result<(BufferAddress, BufferAddress), TransferError> {
) -> Result<(BufferAddress, BufferAddress, bool), TransferError> {
let wgt::BufferTextureCopyInfo {
copy_width,
copy_height,
depth_or_array_layers,

offset,

block_size_bytes,
block_size_bytes: _,
block_width_texels,
block_height_texels,

width_blocks: _,
height_blocks,

row_bytes_dense: _,
row_bytes_dense,
row_stride_bytes,

image_stride_rows: _,
image_stride_bytes,

image_rows_dense: _,
image_bytes_dense: _,
image_bytes_dense,

bytes_in_copy,
} = layout.get_buffer_texture_copy_info(format, aspect, copy_size)?;
Expand All @@ -333,24 +334,6 @@ pub(crate) fn validate_linear_texture_data(
return Err(TransferError::UnspecifiedRowsPerImage);
};

if need_copy_aligned_rows {
let bytes_per_row_alignment = wgt::COPY_BYTES_PER_ROW_ALIGNMENT as BufferAddress;

let mut offset_alignment = block_size_bytes;
if format.is_depth_stencil_format() {
offset_alignment = 4
}
if offset % offset_alignment != 0 {
return Err(TransferError::UnalignedBufferOffset(offset));
}

// The alignment of row_stride_bytes is only required if there are
// multiple rows
if requires_multiple_rows && row_stride_bytes % bytes_per_row_alignment != 0 {
return Err(TransferError::UnalignedBytesPerRow);
}
}

// Avoid underflow in the subtraction by checking bytes_in_copy against buffer_size first.
if bytes_in_copy > buffer_size || offset > buffer_size - bytes_in_copy {
return Err(TransferError::BufferOverrun {
Expand All @@ -361,7 +344,10 @@ pub(crate) fn validate_linear_texture_data(
});
}

Ok((bytes_in_copy, image_stride_bytes))
let is_contiguous = (row_stride_bytes == row_bytes_dense || !requires_multiple_rows)
&& (image_stride_bytes == image_bytes_dense || !requires_multiple_images);

Ok((bytes_in_copy, image_stride_bytes, is_contiguous))
}

/// Validation for texture/buffer copies.
Expand All @@ -372,7 +358,15 @@ pub(crate) fn validate_linear_texture_data(
/// * The copy must be from/to a single aspect of the texture.
/// * If `aligned` is true, the buffer offset must be aligned appropriately.
///
/// The following steps in the algorithm are implemented elsewhere:
/// And implements the following check from WebGPU's [validating GPUTexelCopyBufferInfo][vtcbi]
/// algorithm:
/// * If `aligned` is true, `bytesPerRow` must be a multiple of 256.
///
/// Note that the `bytesPerRow` alignment check is enforced whenever
/// `bytesPerRow` is specified, even if the transfer is not multiple rows and
/// `bytesPerRow` could have been omitted.
///
/// The following steps in [validating texture buffer copy][vtbc] are implemented elsewhere:
/// * Invocation of other validation algorithms.
/// * The texture usage (COPY_DST / COPY_SRC) check.
/// * The check for non-copyable depth/stencil formats. The caller must perform
Expand All @@ -382,11 +376,12 @@ pub(crate) fn validate_linear_texture_data(
/// non-copyable format.
///
/// [vtbc]: https://gpuweb.github.io/gpuweb/#abstract-opdef-validating-texture-buffer-copy
/// [vtcbi]: https://www.w3.org/TR/webgpu/#abstract-opdef-validating-gputexelcopybufferinfo
pub(crate) fn validate_texture_buffer_copy<T>(
texture_copy_view: &wgt::TexelCopyTextureInfo<T>,
aspect: hal::FormatAspects,
desc: &wgt::TextureDescriptor<(), Vec<wgt::TextureFormat>>,
offset: BufferAddress,
layout: &wgt::TexelCopyBufferLayout,
aligned: bool,
) -> Result<(), TransferError> {
if desc.sample_count != 1 {
Expand All @@ -411,8 +406,14 @@ pub(crate) fn validate_texture_buffer_copy<T>(
.expect("non-copyable formats should have been rejected previously")
};

if aligned && offset % u64::from(offset_alignment) != 0 {
return Err(TransferError::UnalignedBufferOffset(offset));
if aligned && layout.offset % u64::from(offset_alignment) != 0 {
return Err(TransferError::UnalignedBufferOffset(layout.offset));
}

if let Some(bytes_per_row) = layout.bytes_per_row {
if aligned && bytes_per_row % wgt::COPY_BYTES_PER_ROW_ALIGNMENT != 0 {
return Err(TransferError::UnalignedBytesPerRow);
}
}

Ok(())
Expand Down Expand Up @@ -684,6 +685,90 @@ fn handle_dst_texture_init(
Ok(())
}

/// Handle initialization tracking for a transfer's source or destination buffer.
///
/// Ensures that the transfer will not read from uninitialized memory, and updates
/// the initialization state information to reflect the transfer.
fn handle_buffer_init(
cmd_buf_data: &mut CommandBufferMutable,
info: &TexelCopyBufferInfo,
buffer: &Arc<Buffer>,
direction: CopySide,
required_buffer_bytes_in_copy: BufferAddress,
is_contiguous: bool,
) {
const ALIGN_SIZE: BufferAddress = wgt::COPY_BUFFER_ALIGNMENT;
const ALIGN_MASK: BufferAddress = wgt::COPY_BUFFER_ALIGNMENT - 1;

let start = info.layout.offset;
let end = info.layout.offset + required_buffer_bytes_in_copy;
if !is_contiguous || direction == CopySide::Source {
// If the transfer will read the buffer, then the whole region needs to
// be initialized.
//
// If the transfer will not write a contiguous region of the buffer,
// then we need to make sure the padding areas are initialized. For now,
// initialize the whole region, although this could be improved to
// initialize only the necessary parts if doing so is likely to be
// faster than initializing the whole thing.
//
// Adjust the start/end outwards to 4B alignment.
let aligned_start = start & !ALIGN_MASK;
let aligned_end = (end + ALIGN_MASK) & !ALIGN_MASK;
cmd_buf_data.buffer_memory_init_actions.extend(
buffer.initialization_status.read().create_action(
buffer,
aligned_start..aligned_end,
MemoryInitKind::NeedsInitializedMemory,
),
);
} else {
// If the transfer will write a contiguous region of the buffer, then we
// don't need to initialize that region.
//
// However, if the start and end are not 4B aligned, we need to make
// sure that we don't end up trying to initialize non-4B-aligned regions
// later.
//
// Adjust the start/end inwards to 4B alignment, we will handle the
// first/last pieces differently.
let aligned_start = (start + ALIGN_MASK) & !ALIGN_MASK;
let aligned_end = end & !ALIGN_MASK;
if aligned_start != start {
cmd_buf_data.buffer_memory_init_actions.extend(
buffer.initialization_status.read().create_action(
buffer,
aligned_start - ALIGN_SIZE..aligned_start,
MemoryInitKind::NeedsInitializedMemory,
),
);
}
if aligned_start != aligned_end {
cmd_buf_data.buffer_memory_init_actions.extend(
buffer.initialization_status.read().create_action(
buffer,
aligned_start..aligned_end,
MemoryInitKind::ImplicitlyInitialized,
),
);
}
if aligned_end != end {
// It is possible that `aligned_end + ALIGN_SIZE > dst_buffer.size`,
// because `dst_buffer.size` is the user-requested size, not the
// final size of the buffer. The final size of the buffer is not
// readily available, but was rounded up to COPY_BUFFER_ALIGNMENT,
// so no overrun is possible.
cmd_buf_data.buffer_memory_init_actions.extend(
buffer.initialization_status.read().create_action(
buffer,
aligned_end..aligned_end + ALIGN_SIZE,
MemoryInitKind::NeedsInitializedMemory,
),
);
}
}
}

impl Global {
pub fn command_encoder_copy_buffer_to_buffer(
&self,
Expand Down Expand Up @@ -958,19 +1043,18 @@ impl Global {
destination,
dst_base.aspect,
&dst_texture.desc,
source.layout.offset,
&source.layout,
true, // alignment required for buffer offset
)?;

let (required_buffer_bytes_in_copy, bytes_per_array_layer) =
let (required_buffer_bytes_in_copy, bytes_per_array_layer, is_contiguous) =
validate_linear_texture_data(
&source.layout,
dst_texture.desc.format,
destination.aspect,
src_buffer.size,
CopySide::Source,
copy_size,
true,
)?;

if dst_texture.desc.format.is_depth_stencil_format() {
Expand All @@ -979,12 +1063,13 @@ impl Global {
.map_err(TransferError::from)?;
}

cmd_buf_data.buffer_memory_init_actions.extend(
src_buffer.initialization_status.read().create_action(
&src_buffer,
source.layout.offset..(source.layout.offset + required_buffer_bytes_in_copy),
MemoryInitKind::NeedsInitializedMemory,
),
handle_buffer_init(
cmd_buf_data,
source,
&src_buffer,
CopySide::Source,
required_buffer_bytes_in_copy,
is_contiguous,
);

let regions = (0..array_layer_count)
Expand Down Expand Up @@ -1085,19 +1170,18 @@ impl Global {
source,
src_base.aspect,
&src_texture.desc,
destination.layout.offset,
&destination.layout,
true, // alignment required for buffer offset
)?;

let (required_buffer_bytes_in_copy, bytes_per_array_layer) =
let (required_buffer_bytes_in_copy, bytes_per_array_layer, is_contiguous) =
validate_linear_texture_data(
&destination.layout,
src_texture.desc.format,
source.aspect,
dst_buffer.size,
CopySide::Destination,
copy_size,
true,
)?;

if src_texture.desc.format.is_depth_stencil_format() {
Expand Down Expand Up @@ -1146,13 +1230,13 @@ impl Global {
let dst_barrier =
dst_pending.map(|pending| pending.into_hal(&dst_buffer, &snatch_guard));

cmd_buf_data.buffer_memory_init_actions.extend(
dst_buffer.initialization_status.read().create_action(
&dst_buffer,
destination.layout.offset
..(destination.layout.offset + required_buffer_bytes_in_copy),
MemoryInitKind::ImplicitlyInitialized,
),
handle_buffer_init(
cmd_buf_data,
destination,
&dst_buffer,
CopySide::Destination,
required_buffer_bytes_in_copy,
is_contiguous,
);

let regions = (0..array_layer_count)
Expand Down
Loading
Loading