From 8253ccb9760181c0282be844405ad14275298ac7 Mon Sep 17 00:00:00 2001 From: Andy Leiserson Date: Wed, 13 Aug 2025 20:11:41 -0700 Subject: [PATCH 1/2] Tweak the bytes_per_row alignment check Previously, the check was skipped if the copy was a single row, which is not correct. The check should be made whenever bytes_per_row is specified. It is permissible not to specify bytes_per_row if the copy is a single row, but if it is specified, it must be aligned. Also removes a redundant check of the `offset` alignment. Since the offset and bytesPerRow alignment checks are not part of "validating linear texture data", I chose to remove that instance of them. These checks are now in `validate_texture_buffer_copy`, which does not correspond 1:1 with the spec, but has a comment explaining how it does correspond. --- CHANGELOG.md | 1 + cts_runner/test.lst | 2 ++ wgpu-core/src/command/transfer.rs | 50 ++++++++++++++----------------- wgpu-core/src/device/queue.rs | 5 ++-- 4 files changed, 27 insertions(+), 31 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ed056032c2f..6f7bbe139b8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -93,6 +93,7 @@ This allows using precompiled shaders without manually checking which backend's - Copies within the same texture must not overlap. - Copies of multisampled or depth/stencil formats must span an entire subresource (layer). - Copies of depth/stencil formats must be 4B aligned. + - For texture-buffer copies, `bytes_per_row` on the buffer side must be 256B-aligned, even if the transfer is a single row. - The offset for `set_vertex_buffer` and `set_index_buffer` must be 4B aligned. By @andyleiserson in [#7929](https://github.com/gfx-rs/wgpu/pull/7929). - The offset and size of bindings are validated as fitting within the underlying buffer in more cases. By @andyleiserson in [#7911](https://github.com/gfx-rs/wgpu/pull/7911). - The function you pass to `Device::on_uncaptured_error()` must now implement `Sync` in addition to `Send`, and be wrapped in `Arc` instead of `Box`. diff --git a/cts_runner/test.lst b/cts_runner/test.lst index 082d348a2b7..996f02a4b53 100644 --- a/cts_runner/test.lst +++ b/cts_runner/test.lst @@ -68,6 +68,8 @@ fails-if(dx12) webgpu:api,validation,image_copy,buffer_texture_copies:depth_sten webgpu:api,validation,image_copy,buffer_texture_copies:sample_count:* webgpu:api,validation,image_copy,buffer_texture_copies:texture_buffer_usages:* webgpu:api,validation,image_copy,buffer_texture_copies:device_mismatch:* +fails-if(dx12) webgpu:api,validation,image_copy,buffer_texture_copies:offset_and_bytesPerRow:format="rgba32float";copyType="CopyB2T";dimension="1d" +fails-if(dx12) webgpu:api,validation,image_copy,buffer_texture_copies:offset_and_bytesPerRow:format="rgba32float";copyType="CopyT2B";dimension="1d" fails-if(dx12) webgpu:api,validation,image_copy,buffer_texture_copies:offset_and_bytesPerRow:format="rgba8unorm";copyType="CopyB2T";dimension="2d" fails-if(dx12) webgpu:api,validation,image_copy,buffer_texture_copies:offset_and_bytesPerRow:format="rgba8unorm";copyType="CopyT2B";dimension="2d" fails-if(dx12) webgpu:api,validation,image_copy,buffer_texture_copies:offset_and_bytesPerRow:format="rgba8unorm";copyType="CopyB2T";dimension="3d" diff --git a/wgpu-core/src/command/transfer.rs b/wgpu-core/src/command/transfer.rs index 3a818e148cb..e14fd8a89ea 100644 --- a/wgpu-core/src/command/transfer.rs +++ b/wgpu-core/src/command/transfer.rs @@ -283,7 +283,6 @@ pub(crate) fn validate_linear_texture_data( buffer_size: BufferAddress, buffer_side: CopySide, copy_size: &Extent3d, - need_copy_aligned_rows: bool, ) -> Result<(BufferAddress, BufferAddress), TransferError> { let wgt::BufferTextureCopyInfo { copy_width, @@ -292,7 +291,7 @@ pub(crate) fn validate_linear_texture_data( offset, - block_size_bytes, + block_size_bytes: _, block_width_texels, block_height_texels, @@ -333,24 +332,6 @@ pub(crate) fn validate_linear_texture_data( return Err(TransferError::UnspecifiedRowsPerImage); }; - if need_copy_aligned_rows { - let bytes_per_row_alignment = wgt::COPY_BYTES_PER_ROW_ALIGNMENT as BufferAddress; - - let mut offset_alignment = block_size_bytes; - if format.is_depth_stencil_format() { - offset_alignment = 4 - } - if offset % offset_alignment != 0 { - return Err(TransferError::UnalignedBufferOffset(offset)); - } - - // The alignment of row_stride_bytes is only required if there are - // multiple rows - if requires_multiple_rows && row_stride_bytes % bytes_per_row_alignment != 0 { - return Err(TransferError::UnalignedBytesPerRow); - } - } - // Avoid underflow in the subtraction by checking bytes_in_copy against buffer_size first. if bytes_in_copy > buffer_size || offset > buffer_size - bytes_in_copy { return Err(TransferError::BufferOverrun { @@ -372,7 +353,15 @@ pub(crate) fn validate_linear_texture_data( /// * The copy must be from/to a single aspect of the texture. /// * If `aligned` is true, the buffer offset must be aligned appropriately. /// -/// The following steps in the algorithm are implemented elsewhere: +/// And implements the following check from WebGPU's [validating GPUTexelCopyBufferInfo][vtcbi] +/// algorithm: +/// * If `aligned` is true, `bytesPerRow` must be a multiple of 256. +/// +/// Note that the `bytesPerRow` alignment check is enforced whenever +/// `bytesPerRow` is specified, even if the transfer is not multiple rows and +/// `bytesPerRow` could have been omitted. +/// +/// The following steps in [validating texture buffer copy][vtbc] are implemented elsewhere: /// * Invocation of other validation algorithms. /// * The texture usage (COPY_DST / COPY_SRC) check. /// * The check for non-copyable depth/stencil formats. The caller must perform @@ -382,11 +371,12 @@ pub(crate) fn validate_linear_texture_data( /// non-copyable format. /// /// [vtbc]: https://gpuweb.github.io/gpuweb/#abstract-opdef-validating-texture-buffer-copy +/// [vtcbi]: https://www.w3.org/TR/webgpu/#abstract-opdef-validating-gputexelcopybufferinfo pub(crate) fn validate_texture_buffer_copy( texture_copy_view: &wgt::TexelCopyTextureInfo, aspect: hal::FormatAspects, desc: &wgt::TextureDescriptor<(), Vec>, - offset: BufferAddress, + layout: &wgt::TexelCopyBufferLayout, aligned: bool, ) -> Result<(), TransferError> { if desc.sample_count != 1 { @@ -411,8 +401,14 @@ pub(crate) fn validate_texture_buffer_copy( .expect("non-copyable formats should have been rejected previously") }; - if aligned && offset % u64::from(offset_alignment) != 0 { - return Err(TransferError::UnalignedBufferOffset(offset)); + if aligned && layout.offset % u64::from(offset_alignment) != 0 { + return Err(TransferError::UnalignedBufferOffset(layout.offset)); + } + + if let Some(bytes_per_row) = layout.bytes_per_row { + if aligned && bytes_per_row % wgt::COPY_BYTES_PER_ROW_ALIGNMENT != 0 { + return Err(TransferError::UnalignedBytesPerRow); + } } Ok(()) @@ -958,7 +954,7 @@ impl Global { destination, dst_base.aspect, &dst_texture.desc, - source.layout.offset, + &source.layout, true, // alignment required for buffer offset )?; @@ -970,7 +966,6 @@ impl Global { src_buffer.size, CopySide::Source, copy_size, - true, )?; if dst_texture.desc.format.is_depth_stencil_format() { @@ -1085,7 +1080,7 @@ impl Global { source, src_base.aspect, &src_texture.desc, - destination.layout.offset, + &destination.layout, true, // alignment required for buffer offset )?; @@ -1097,7 +1092,6 @@ impl Global { dst_buffer.size, CopySide::Destination, copy_size, - true, )?; if src_texture.desc.format.is_depth_stencil_format() { diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs index d946ec35a0b..d33855c16b2 100644 --- a/wgpu-core/src/device/queue.rs +++ b/wgpu-core/src/device/queue.rs @@ -759,8 +759,8 @@ impl Queue { &destination, dst_base.aspect, &dst.desc, - data_layout.offset, - false, // alignment not required for buffer offset + &data_layout, + false, // alignment not required for buffer offset or bytes per row )?; // Note: `_source_bytes_per_array_layer` is ignored since we @@ -772,7 +772,6 @@ impl Queue { data.len() as wgt::BufferAddress, CopySide::Source, size, - false, )?; if dst.desc.format.is_depth_stencil_format() { From 0a834d25564e5030607d385f98170fcf0427ffb8 Mon Sep 17 00:00:00 2001 From: Andy Leiserson Date: Wed, 13 Aug 2025 20:13:31 -0700 Subject: [PATCH 2/2] Fix buffer initialization tracking for some buffer-texture copies Fixes #7947 Fixes #8021 Fixes #8097 --- CHANGELOG.md | 1 + cts_runner/test.lst | 17 ++-- wgpu-core/src/command/transfer.rs | 134 +++++++++++++++++++++++++----- wgpu-core/src/device/queue.rs | 19 +++-- 4 files changed, 132 insertions(+), 39 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6f7bbe139b8..8d199846183 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -100,6 +100,7 @@ This allows using precompiled shaders without manually checking which backend's In exchange for this, it is no longer possible for calling `wgpu` functions while in that callback to cause a deadlock (not that we encourage you to actually do that). By @kpreid in [#8011](https://github.com/gfx-rs/wgpu/pull/8011). - The limits requested for a device must now satisfy `min_subgroup_size <= max_subgroup_size`. By @andyleiserson in [#8085](https://github.com/gfx-rs/wgpu/pull/8085). +- Track the initialization status of buffer memory correctly when `copy_texture_to_buffer` skips over padding space between rows or layers, or when the start/end of a texture-buffer transfer is not 4B aligned. By @andyleiserson in [#8099](https://github.com/gfx-rs/wgpu/pull/8099). #### Naga diff --git a/cts_runner/test.lst b/cts_runner/test.lst index 996f02a4b53..8a98c345125 100644 --- a/cts_runner/test.lst +++ b/cts_runner/test.lst @@ -13,12 +13,9 @@ webgpu:api,operation,compute,basic:memcpy:* //FAIL: webgpu:api,operation,compute,basic:large_dispatch:* webgpu:api,operation,compute_pipeline,overrides:* webgpu:api,operation,device,lost:* -// This is all the storeOp tests, but some of them fail if run in a single invocation. -// https://github.com/gfx-rs/wgpu/issues/8021 -webgpu:api,operation,render_pass,storeOp:render_pass_store_op,color_attachment_with_depth_stencil_attachment:* -webgpu:api,operation,render_pass,storeOp:render_pass_store_op,color_attachment_only:* -webgpu:api,operation,render_pass,storeOp:render_pass_store_op,multiple_color_attachments:* -webgpu:api,operation,render_pass,storeOp:render_pass_store_op,depth_stencil_attachment_only:* +webgpu:api,operation,render_pass,storeOp:* +// Presumably vertex pulling, revisit after https://github.com/gfx-rs/wgpu/issues/7981 is fixed. +fails-if(metal) webgpu:api,operation,vertex_state,correctness:setVertexBuffer_offset_and_attribute_offset:* fails-if(dx12) webgpu:api,validation,capability_checks,limits,maxBindGroups:setBindGroup,* webgpu:api,validation,createBindGroup:buffer,effective_buffer_binding_size:* webgpu:api,validation,encoding,beginComputePass:* @@ -60,14 +57,18 @@ webgpu:api,validation,encoding,programmable,pipeline_bind_group_compat:sampler_b webgpu:api,validation,encoding,queries,general:occlusion_query,query_index:* webgpu:api,validation,image_copy,buffer_texture_copies:depth_stencil_format,copy_usage_and_aspect:* webgpu:api,validation,image_copy,buffer_texture_copies:depth_stencil_format,copy_buffer_size:* +fails-if(dx12) webgpu:api,validation,image_copy,buffer_texture_copies:depth_stencil_format,copy_buffer_offset:format="stencil8";aspect="stencil-only";copyType="CopyB2T" +fails-if(dx12) webgpu:api,validation,image_copy,buffer_texture_copies:depth_stencil_format,copy_buffer_offset:format="stencil8";aspect="stencil-only";copyType="CopyT2B" fails-if(dx12) webgpu:api,validation,image_copy,buffer_texture_copies:depth_stencil_format,copy_buffer_offset:format="depth32float";aspect="depth-only";copyType="CopyT2B" fails-if(dx12) webgpu:api,validation,image_copy,buffer_texture_copies:depth_stencil_format,copy_buffer_offset:format="depth24plus-stencil8";aspect="stencil-only";copyType="CopyB2T" fails-if(dx12) webgpu:api,validation,image_copy,buffer_texture_copies:depth_stencil_format,copy_buffer_offset:format="depth24plus-stencil8";aspect="stencil-only";copyType="CopyT2B" //mix of PASS and FAIL: other subtests of copy_buffer_offset. Related bugs: -// https://github.com/gfx-rs/wgpu/issues/7946, https://github.com/gfx-rs/wgpu/issues/7947 +// https://github.com/gfx-rs/wgpu/issues/7946 webgpu:api,validation,image_copy,buffer_texture_copies:sample_count:* webgpu:api,validation,image_copy,buffer_texture_copies:texture_buffer_usages:* webgpu:api,validation,image_copy,buffer_texture_copies:device_mismatch:* +fails-if(dx12) webgpu:api,validation,image_copy,buffer_texture_copies:offset_and_bytesPerRow:format="r8uint";copyType="CopyB2T";dimension="1d" +fails-if(dx12) webgpu:api,validation,image_copy,buffer_texture_copies:offset_and_bytesPerRow:format="r8uint";copyType="CopyT2B";dimension="1d" fails-if(dx12) webgpu:api,validation,image_copy,buffer_texture_copies:offset_and_bytesPerRow:format="rgba32float";copyType="CopyB2T";dimension="1d" fails-if(dx12) webgpu:api,validation,image_copy,buffer_texture_copies:offset_and_bytesPerRow:format="rgba32float";copyType="CopyT2B";dimension="1d" fails-if(dx12) webgpu:api,validation,image_copy,buffer_texture_copies:offset_and_bytesPerRow:format="rgba8unorm";copyType="CopyB2T";dimension="2d" @@ -81,7 +82,7 @@ fails-if(dx12) webgpu:api,validation,image_copy,buffer_texture_copies:offset_and fails-if(dx12) webgpu:api,validation,image_copy,buffer_texture_copies:offset_and_bytesPerRow:format="astc-4x4-unorm";copyType="CopyT2B";dimension="3d" fails-if(dx12) webgpu:api,validation,image_copy,buffer_texture_copies:offset_and_bytesPerRow:format="astc-4x4-unorm";copyType="CopyB2T";dimension="3d" //mix of PASS and FAIL: other subtests of offset_and_bytesPerRow. Related bugs: -// https://github.com/gfx-rs/wgpu/issues/7946, https://github.com/gfx-rs/wgpu/issues/7947 +// https://github.com/gfx-rs/wgpu/issues/7946 webgpu:api,validation,image_copy,layout_related:copy_end_overflows_u64:* // Fails with OOM in CI. fails-if(dx12) webgpu:api,validation,image_copy,layout_related:offset_alignment:* diff --git a/wgpu-core/src/command/transfer.rs b/wgpu-core/src/command/transfer.rs index e14fd8a89ea..2d198904c35 100644 --- a/wgpu-core/src/command/transfer.rs +++ b/wgpu-core/src/command/transfer.rs @@ -22,7 +22,7 @@ use crate::{ TextureInitTrackerAction, }, resource::{ - MissingBufferUsageError, MissingTextureUsageError, ParentDevice, RawResourceAccess, + Buffer, MissingBufferUsageError, MissingTextureUsageError, ParentDevice, RawResourceAccess, Texture, TextureErrorDimension, }, snatch::SnatchGuard, @@ -34,7 +34,7 @@ pub type TexelCopyBufferInfo = wgt::TexelCopyBufferInfo; pub type TexelCopyTextureInfo = wgt::TexelCopyTextureInfo; pub type CopyExternalImageDestInfo = wgt::CopyExternalImageDestInfo; -#[derive(Clone, Copy, Debug)] +#[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum CopySide { Source, Destination, @@ -271,9 +271,11 @@ pub(crate) fn extract_texture_selector( /// /// Copied with some modifications from WebGPU standard. /// -/// If successful, returns a pair `(bytes, stride)`, where: +/// If successful, returns a tuple `(bytes, stride, is_contiguous)`, where: /// - `bytes` is the number of buffer bytes required for this copy, and /// - `stride` number of bytes between array layers. +/// - `is_contiguous` is true if the linear texture data does not have padding +/// between rows or between images. /// /// [vltd]: https://gpuweb.github.io/gpuweb/#abstract-opdef-validating-linear-texture-data pub(crate) fn validate_linear_texture_data( @@ -283,7 +285,7 @@ pub(crate) fn validate_linear_texture_data( buffer_size: BufferAddress, buffer_side: CopySide, copy_size: &Extent3d, -) -> Result<(BufferAddress, BufferAddress), TransferError> { +) -> Result<(BufferAddress, BufferAddress, bool), TransferError> { let wgt::BufferTextureCopyInfo { copy_width, copy_height, @@ -298,14 +300,14 @@ pub(crate) fn validate_linear_texture_data( width_blocks: _, height_blocks, - row_bytes_dense: _, + row_bytes_dense, row_stride_bytes, image_stride_rows: _, image_stride_bytes, image_rows_dense: _, - image_bytes_dense: _, + image_bytes_dense, bytes_in_copy, } = layout.get_buffer_texture_copy_info(format, aspect, copy_size)?; @@ -342,7 +344,10 @@ pub(crate) fn validate_linear_texture_data( }); } - Ok((bytes_in_copy, image_stride_bytes)) + let is_contiguous = (row_stride_bytes == row_bytes_dense || !requires_multiple_rows) + && (image_stride_bytes == image_bytes_dense || !requires_multiple_images); + + Ok((bytes_in_copy, image_stride_bytes, is_contiguous)) } /// Validation for texture/buffer copies. @@ -680,6 +685,90 @@ fn handle_dst_texture_init( Ok(()) } +/// Handle initialization tracking for a transfer's source or destination buffer. +/// +/// Ensures that the transfer will not read from uninitialized memory, and updates +/// the initialization state information to reflect the transfer. +fn handle_buffer_init( + cmd_buf_data: &mut CommandBufferMutable, + info: &TexelCopyBufferInfo, + buffer: &Arc, + direction: CopySide, + required_buffer_bytes_in_copy: BufferAddress, + is_contiguous: bool, +) { + const ALIGN_SIZE: BufferAddress = wgt::COPY_BUFFER_ALIGNMENT; + const ALIGN_MASK: BufferAddress = wgt::COPY_BUFFER_ALIGNMENT - 1; + + let start = info.layout.offset; + let end = info.layout.offset + required_buffer_bytes_in_copy; + if !is_contiguous || direction == CopySide::Source { + // If the transfer will read the buffer, then the whole region needs to + // be initialized. + // + // If the transfer will not write a contiguous region of the buffer, + // then we need to make sure the padding areas are initialized. For now, + // initialize the whole region, although this could be improved to + // initialize only the necessary parts if doing so is likely to be + // faster than initializing the whole thing. + // + // Adjust the start/end outwards to 4B alignment. + let aligned_start = start & !ALIGN_MASK; + let aligned_end = (end + ALIGN_MASK) & !ALIGN_MASK; + cmd_buf_data.buffer_memory_init_actions.extend( + buffer.initialization_status.read().create_action( + buffer, + aligned_start..aligned_end, + MemoryInitKind::NeedsInitializedMemory, + ), + ); + } else { + // If the transfer will write a contiguous region of the buffer, then we + // don't need to initialize that region. + // + // However, if the start and end are not 4B aligned, we need to make + // sure that we don't end up trying to initialize non-4B-aligned regions + // later. + // + // Adjust the start/end inwards to 4B alignment, we will handle the + // first/last pieces differently. + let aligned_start = (start + ALIGN_MASK) & !ALIGN_MASK; + let aligned_end = end & !ALIGN_MASK; + if aligned_start != start { + cmd_buf_data.buffer_memory_init_actions.extend( + buffer.initialization_status.read().create_action( + buffer, + aligned_start - ALIGN_SIZE..aligned_start, + MemoryInitKind::NeedsInitializedMemory, + ), + ); + } + if aligned_start != aligned_end { + cmd_buf_data.buffer_memory_init_actions.extend( + buffer.initialization_status.read().create_action( + buffer, + aligned_start..aligned_end, + MemoryInitKind::ImplicitlyInitialized, + ), + ); + } + if aligned_end != end { + // It is possible that `aligned_end + ALIGN_SIZE > dst_buffer.size`, + // because `dst_buffer.size` is the user-requested size, not the + // final size of the buffer. The final size of the buffer is not + // readily available, but was rounded up to COPY_BUFFER_ALIGNMENT, + // so no overrun is possible. + cmd_buf_data.buffer_memory_init_actions.extend( + buffer.initialization_status.read().create_action( + buffer, + aligned_end..aligned_end + ALIGN_SIZE, + MemoryInitKind::NeedsInitializedMemory, + ), + ); + } + } +} + impl Global { pub fn command_encoder_copy_buffer_to_buffer( &self, @@ -958,7 +1047,7 @@ impl Global { true, // alignment required for buffer offset )?; - let (required_buffer_bytes_in_copy, bytes_per_array_layer) = + let (required_buffer_bytes_in_copy, bytes_per_array_layer, is_contiguous) = validate_linear_texture_data( &source.layout, dst_texture.desc.format, @@ -974,12 +1063,13 @@ impl Global { .map_err(TransferError::from)?; } - cmd_buf_data.buffer_memory_init_actions.extend( - src_buffer.initialization_status.read().create_action( - &src_buffer, - source.layout.offset..(source.layout.offset + required_buffer_bytes_in_copy), - MemoryInitKind::NeedsInitializedMemory, - ), + handle_buffer_init( + cmd_buf_data, + source, + &src_buffer, + CopySide::Source, + required_buffer_bytes_in_copy, + is_contiguous, ); let regions = (0..array_layer_count) @@ -1084,7 +1174,7 @@ impl Global { true, // alignment required for buffer offset )?; - let (required_buffer_bytes_in_copy, bytes_per_array_layer) = + let (required_buffer_bytes_in_copy, bytes_per_array_layer, is_contiguous) = validate_linear_texture_data( &destination.layout, src_texture.desc.format, @@ -1140,13 +1230,13 @@ impl Global { let dst_barrier = dst_pending.map(|pending| pending.into_hal(&dst_buffer, &snatch_guard)); - cmd_buf_data.buffer_memory_init_actions.extend( - dst_buffer.initialization_status.read().create_action( - &dst_buffer, - destination.layout.offset - ..(destination.layout.offset + required_buffer_bytes_in_copy), - MemoryInitKind::ImplicitlyInitialized, - ), + handle_buffer_init( + cmd_buf_data, + destination, + &dst_buffer, + CopySide::Destination, + required_buffer_bytes_in_copy, + is_contiguous, ); let regions = (0..array_layer_count) diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs index d33855c16b2..0d55735fdde 100644 --- a/wgpu-core/src/device/queue.rs +++ b/wgpu-core/src/device/queue.rs @@ -759,20 +759,21 @@ impl Queue { &destination, dst_base.aspect, &dst.desc, - &data_layout, + data_layout, false, // alignment not required for buffer offset or bytes per row )?; // Note: `_source_bytes_per_array_layer` is ignored since we // have a staging copy, and it can have a different value. - let (required_bytes_in_copy, _source_bytes_per_array_layer) = validate_linear_texture_data( - data_layout, - dst.desc.format, - destination.aspect, - data.len() as wgt::BufferAddress, - CopySide::Source, - size, - )?; + let (required_bytes_in_copy, _source_bytes_per_array_layer, _) = + validate_linear_texture_data( + data_layout, + dst.desc.format, + destination.aspect, + data.len() as wgt::BufferAddress, + CopySide::Source, + size, + )?; if dst.desc.format.is_depth_stencil_format() { self.device