Skip to content

Commit 7aabc45

Browse files
committed
Fix buffer initialization tracking for some buffer-texture copies
Fixes #7947 Fixes #8021 Fixes #8097
1 parent 1bafb3a commit 7aabc45

File tree

4 files changed

+132
-39
lines changed

4 files changed

+132
-39
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ By @Vecvec in [#7913](https://github.com/gfx-rs/wgpu/pull/7913).
7676
- The function you pass to `Device::on_uncaptured_error()` must now implement `Sync` in addition to `Send`, and be wrapped in `Arc` instead of `Box`.
7777
In exchange for this, it is no longer possible for calling `wgpu` functions while in that callback to cause a deadlock (not that we encourage you to actually do that).
7878
By @kpreid in [#8011](https://github.com/gfx-rs/wgpu/pull/8011).
79+
- `wgpu` now tracks the initialization status of buffer memory correctly when `copy_texture_to_buffer` skips over padding space between rows or layers, or when the start/end of a texture-buffer transfer is not 4B aligned. By @andyleiserson in [#8099](https://github.com/gfx-rs/wgpu/pull/8099).
7980

8081
#### Naga
8182

cts_runner/test.lst

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,9 @@ webgpu:api,operation,compute,basic:memcpy:*
1313
//FAIL: webgpu:api,operation,compute,basic:large_dispatch:*
1414
webgpu:api,operation,compute_pipeline,overrides:*
1515
webgpu:api,operation,device,lost:*
16-
// This is all the storeOp tests, but some of them fail if run in a single invocation.
17-
// https://github.com/gfx-rs/wgpu/issues/8021
18-
webgpu:api,operation,render_pass,storeOp:render_pass_store_op,color_attachment_with_depth_stencil_attachment:*
19-
webgpu:api,operation,render_pass,storeOp:render_pass_store_op,color_attachment_only:*
20-
webgpu:api,operation,render_pass,storeOp:render_pass_store_op,multiple_color_attachments:*
21-
webgpu:api,operation,render_pass,storeOp:render_pass_store_op,depth_stencil_attachment_only:*
16+
webgpu:api,operation,render_pass,storeOp:*
17+
// Presumably vertex pulling, revisit after https://github.com/gfx-rs/wgpu/issues/7981 is fixed.
18+
fails-if(metal) webgpu:api,operation,vertex_state,correctness:setVertexBuffer_offset_and_attribute_offset:*
2219
webgpu:api,validation,capability_checks,limits,maxBindGroups:setBindGroup,at_over:limitTest="overMaximum";*
2320
webgpu:api,validation,createBindGroup:buffer,effective_buffer_binding_size:*
2421
webgpu:api,validation,encoding,beginComputePass:*
@@ -60,14 +57,18 @@ webgpu:api,validation,encoding,programmable,pipeline_bind_group_compat:sampler_b
6057
webgpu:api,validation,encoding,queries,general:occlusion_query,query_index:*
6158
webgpu:api,validation,image_copy,buffer_texture_copies:depth_stencil_format,copy_usage_and_aspect:*
6259
webgpu:api,validation,image_copy,buffer_texture_copies:depth_stencil_format,copy_buffer_size:*
60+
fails-if(dx12) webgpu:api,validation,image_copy,buffer_texture_copies:depth_stencil_format,copy_buffer_offset:format="stencil8";aspect="stencil-only";copyType="CopyB2T"
61+
fails-if(dx12) webgpu:api,validation,image_copy,buffer_texture_copies:depth_stencil_format,copy_buffer_offset:format="stencil8";aspect="stencil-only";copyType="CopyT2B"
6362
fails-if(dx12) webgpu:api,validation,image_copy,buffer_texture_copies:depth_stencil_format,copy_buffer_offset:format="depth32float";aspect="depth-only";copyType="CopyT2B"
6463
fails-if(dx12) webgpu:api,validation,image_copy,buffer_texture_copies:depth_stencil_format,copy_buffer_offset:format="depth24plus-stencil8";aspect="stencil-only";copyType="CopyB2T"
6564
fails-if(dx12) webgpu:api,validation,image_copy,buffer_texture_copies:depth_stencil_format,copy_buffer_offset:format="depth24plus-stencil8";aspect="stencil-only";copyType="CopyT2B"
6665
//mix of PASS and FAIL: other subtests of copy_buffer_offset. Related bugs:
67-
// https://github.com/gfx-rs/wgpu/issues/7946, https://github.com/gfx-rs/wgpu/issues/7947
66+
// https://github.com/gfx-rs/wgpu/issues/7946
6867
webgpu:api,validation,image_copy,buffer_texture_copies:sample_count:*
6968
webgpu:api,validation,image_copy,buffer_texture_copies:texture_buffer_usages:*
7069
webgpu:api,validation,image_copy,buffer_texture_copies:device_mismatch:*
70+
fails-if(dx12) webgpu:api,validation,image_copy,buffer_texture_copies:offset_and_bytesPerRow:format="r8uint";copyType="CopyB2T";dimension="1d"
71+
fails-if(dx12) webgpu:api,validation,image_copy,buffer_texture_copies:offset_and_bytesPerRow:format="r8uint";copyType="CopyT2B";dimension="1d"
7172
fails-if(dx12) webgpu:api,validation,image_copy,buffer_texture_copies:offset_and_bytesPerRow:format="rgba32float";copyType="CopyB2T";dimension="1d"
7273
fails-if(dx12) webgpu:api,validation,image_copy,buffer_texture_copies:offset_and_bytesPerRow:format="rgba32float";copyType="CopyT2B";dimension="1d"
7374
fails-if(dx12) webgpu:api,validation,image_copy,buffer_texture_copies:offset_and_bytesPerRow:format="rgba8unorm";copyType="CopyB2T";dimension="2d"
@@ -81,7 +82,7 @@ fails-if(dx12) webgpu:api,validation,image_copy,buffer_texture_copies:offset_and
8182
fails-if(dx12) webgpu:api,validation,image_copy,buffer_texture_copies:offset_and_bytesPerRow:format="astc-4x4-unorm";copyType="CopyT2B";dimension="3d"
8283
fails-if(dx12) webgpu:api,validation,image_copy,buffer_texture_copies:offset_and_bytesPerRow:format="astc-4x4-unorm";copyType="CopyB2T";dimension="3d"
8384
//mix of PASS and FAIL: other subtests of offset_and_bytesPerRow. Related bugs:
84-
// https://github.com/gfx-rs/wgpu/issues/7946, https://github.com/gfx-rs/wgpu/issues/7947
85+
// https://github.com/gfx-rs/wgpu/issues/7946
8586
webgpu:api,validation,image_copy,layout_related:copy_end_overflows_u64:*
8687
// Fails with OOM in CI.
8788
fails-if(dx12) webgpu:api,validation,image_copy,layout_related:offset_alignment:*

wgpu-core/src/command/transfer.rs

Lines changed: 112 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ use crate::{
2222
TextureInitTrackerAction,
2323
},
2424
resource::{
25-
MissingBufferUsageError, MissingTextureUsageError, ParentDevice, RawResourceAccess,
25+
Buffer, MissingBufferUsageError, MissingTextureUsageError, ParentDevice, RawResourceAccess,
2626
Texture, TextureErrorDimension,
2727
},
2828
snatch::SnatchGuard,
@@ -34,7 +34,7 @@ pub type TexelCopyBufferInfo = wgt::TexelCopyBufferInfo<BufferId>;
3434
pub type TexelCopyTextureInfo = wgt::TexelCopyTextureInfo<TextureId>;
3535
pub type CopyExternalImageDestInfo = wgt::CopyExternalImageDestInfo<TextureId>;
3636

37-
#[derive(Clone, Copy, Debug)]
37+
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
3838
pub enum CopySide {
3939
Source,
4040
Destination,
@@ -271,9 +271,11 @@ pub(crate) fn extract_texture_selector<T>(
271271
///
272272
/// Copied with some modifications from WebGPU standard.
273273
///
274-
/// If successful, returns a pair `(bytes, stride)`, where:
274+
/// If successful, returns a pair `(bytes, stride, is_contiguous)`, where:
275275
/// - `bytes` is the number of buffer bytes required for this copy, and
276276
/// - `stride` number of bytes between array layers.
277+
/// - `is_contiguous` is true if the linear texture data does not have padding
278+
/// between rows or between images.
277279
///
278280
/// [vltd]: https://gpuweb.github.io/gpuweb/#abstract-opdef-validating-linear-texture-data
279281
pub(crate) fn validate_linear_texture_data(
@@ -283,7 +285,7 @@ pub(crate) fn validate_linear_texture_data(
283285
buffer_size: BufferAddress,
284286
buffer_side: CopySide,
285287
copy_size: &Extent3d,
286-
) -> Result<(BufferAddress, BufferAddress), TransferError> {
288+
) -> Result<(BufferAddress, BufferAddress, bool), TransferError> {
287289
let wgt::BufferTextureCopyInfo {
288290
copy_width,
289291
copy_height,
@@ -298,14 +300,14 @@ pub(crate) fn validate_linear_texture_data(
298300
width_blocks: _,
299301
height_blocks,
300302

301-
row_bytes_dense: _,
303+
row_bytes_dense,
302304
row_stride_bytes,
303305

304306
image_stride_rows: _,
305307
image_stride_bytes,
306308

307309
image_rows_dense: _,
308-
image_bytes_dense: _,
310+
image_bytes_dense,
309311

310312
bytes_in_copy,
311313
} = layout.get_buffer_texture_copy_info(format, aspect, copy_size)?;
@@ -342,7 +344,10 @@ pub(crate) fn validate_linear_texture_data(
342344
});
343345
}
344346

345-
Ok((bytes_in_copy, image_stride_bytes))
347+
let is_contiguous = (row_stride_bytes == row_bytes_dense || !requires_multiple_rows)
348+
&& (image_stride_bytes == image_bytes_dense || !requires_multiple_images);
349+
350+
Ok((bytes_in_copy, image_stride_bytes, is_contiguous))
346351
}
347352

348353
/// Validation for texture/buffer copies.
@@ -680,6 +685,90 @@ fn handle_dst_texture_init(
680685
Ok(())
681686
}
682687

688+
/// Handle initialization tracking for a transfer's source or destination buffer.
689+
///
690+
/// Ensures that the transfer will not read from uninitialized memory, and updates
691+
/// the initialization state information to reflect the transfer.
692+
fn handle_buffer_init(
693+
cmd_buf_data: &mut CommandBufferMutable,
694+
info: &TexelCopyBufferInfo,
695+
buffer: &Arc<Buffer>,
696+
direction: CopySide,
697+
required_buffer_bytes_in_copy: BufferAddress,
698+
is_contiguous: bool,
699+
) {
700+
const ALIGN_SIZE: BufferAddress = wgt::COPY_BUFFER_ALIGNMENT;
701+
const ALIGN_MASK: BufferAddress = wgt::COPY_BUFFER_ALIGNMENT - 1;
702+
703+
let start = info.layout.offset;
704+
let end = info.layout.offset + required_buffer_bytes_in_copy;
705+
if !is_contiguous || direction == CopySide::Source {
706+
// If the transfer will read the buffer, then the whole region needs to
707+
// be initialized.
708+
//
709+
// If the transfer will not write a contiguous region of the buffer,
710+
// then we need to make sure the padding areas are initialized. For now,
711+
// initialize the whole region, although this could be improved to
712+
// initialize only the necessary parts if doing so is likely to be
713+
// faster than initializing the whole thing.
714+
//
715+
// Adjust the start/end outwards to 4B alignment.
716+
let aligned_start = start & !ALIGN_MASK;
717+
let aligned_end = (end + ALIGN_MASK) & !ALIGN_MASK;
718+
cmd_buf_data.buffer_memory_init_actions.extend(
719+
buffer.initialization_status.read().create_action(
720+
buffer,
721+
aligned_start..aligned_end,
722+
MemoryInitKind::NeedsInitializedMemory,
723+
),
724+
);
725+
} else {
726+
// If the transfer will write a contiguous region of the buffer, then we
727+
// don't need to initialize that region.
728+
//
729+
// However, if the start and end are not 4B aligned, we need to make
730+
// sure that we don't end up trying to initialize non-4B-aligned regions
731+
// later.
732+
//
733+
// Adjust the start/end inwards to 4B alignment, we will handle the
734+
// first/last pieces differently.
735+
let aligned_start = (start + ALIGN_MASK) & !ALIGN_MASK;
736+
let aligned_end = end & !ALIGN_MASK;
737+
if aligned_start != start {
738+
cmd_buf_data.buffer_memory_init_actions.extend(
739+
buffer.initialization_status.read().create_action(
740+
buffer,
741+
aligned_start - ALIGN_SIZE..aligned_start,
742+
MemoryInitKind::NeedsInitializedMemory,
743+
),
744+
);
745+
}
746+
if aligned_start != aligned_end {
747+
cmd_buf_data.buffer_memory_init_actions.extend(
748+
buffer.initialization_status.read().create_action(
749+
buffer,
750+
aligned_start..aligned_end,
751+
MemoryInitKind::ImplicitlyInitialized,
752+
),
753+
);
754+
}
755+
if aligned_end != end {
756+
// It is possible that `aligned_end + ALIGN_SIZE > dst_buffer.size`,
757+
// because `dst_buffer.size` is the user-requested size, not the
758+
// final size of the buffer. The final size of the buffer is not
759+
// readily available, but was rounded up to COPY_BUFFER_ALIGNMENT,
760+
// so no overrun is possible.
761+
cmd_buf_data.buffer_memory_init_actions.extend(
762+
buffer.initialization_status.read().create_action(
763+
buffer,
764+
aligned_end..aligned_end + ALIGN_SIZE,
765+
MemoryInitKind::NeedsInitializedMemory,
766+
),
767+
);
768+
}
769+
}
770+
}
771+
683772
impl Global {
684773
pub fn command_encoder_copy_buffer_to_buffer(
685774
&self,
@@ -958,7 +1047,7 @@ impl Global {
9581047
true, // alignment required for buffer offset
9591048
)?;
9601049

961-
let (required_buffer_bytes_in_copy, bytes_per_array_layer) =
1050+
let (required_buffer_bytes_in_copy, bytes_per_array_layer, is_contiguous) =
9621051
validate_linear_texture_data(
9631052
&source.layout,
9641053
dst_texture.desc.format,
@@ -974,12 +1063,13 @@ impl Global {
9741063
.map_err(TransferError::from)?;
9751064
}
9761065

977-
cmd_buf_data.buffer_memory_init_actions.extend(
978-
src_buffer.initialization_status.read().create_action(
979-
&src_buffer,
980-
source.layout.offset..(source.layout.offset + required_buffer_bytes_in_copy),
981-
MemoryInitKind::NeedsInitializedMemory,
982-
),
1066+
handle_buffer_init(
1067+
cmd_buf_data,
1068+
source,
1069+
&src_buffer,
1070+
CopySide::Source,
1071+
required_buffer_bytes_in_copy,
1072+
is_contiguous,
9831073
);
9841074

9851075
let regions = (0..array_layer_count)
@@ -1084,7 +1174,7 @@ impl Global {
10841174
true, // alignment required for buffer offset
10851175
)?;
10861176

1087-
let (required_buffer_bytes_in_copy, bytes_per_array_layer) =
1177+
let (required_buffer_bytes_in_copy, bytes_per_array_layer, is_contiguous) =
10881178
validate_linear_texture_data(
10891179
&destination.layout,
10901180
src_texture.desc.format,
@@ -1140,13 +1230,13 @@ impl Global {
11401230
let dst_barrier =
11411231
dst_pending.map(|pending| pending.into_hal(&dst_buffer, &snatch_guard));
11421232

1143-
cmd_buf_data.buffer_memory_init_actions.extend(
1144-
dst_buffer.initialization_status.read().create_action(
1145-
&dst_buffer,
1146-
destination.layout.offset
1147-
..(destination.layout.offset + required_buffer_bytes_in_copy),
1148-
MemoryInitKind::ImplicitlyInitialized,
1149-
),
1233+
handle_buffer_init(
1234+
cmd_buf_data,
1235+
destination,
1236+
&dst_buffer,
1237+
CopySide::Destination,
1238+
required_buffer_bytes_in_copy,
1239+
is_contiguous,
11501240
);
11511241

11521242
let regions = (0..array_layer_count)

wgpu-core/src/device/queue.rs

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -759,20 +759,21 @@ impl Queue {
759759
&destination,
760760
dst_base.aspect,
761761
&dst.desc,
762-
&data_layout,
762+
data_layout,
763763
false, // alignment not required for buffer offset or bytes per row
764764
)?;
765765

766766
// Note: `_source_bytes_per_array_layer` is ignored since we
767767
// have a staging copy, and it can have a different value.
768-
let (required_bytes_in_copy, _source_bytes_per_array_layer) = validate_linear_texture_data(
769-
data_layout,
770-
dst.desc.format,
771-
destination.aspect,
772-
data.len() as wgt::BufferAddress,
773-
CopySide::Source,
774-
size,
775-
)?;
768+
let (required_bytes_in_copy, _source_bytes_per_array_layer, _) =
769+
validate_linear_texture_data(
770+
data_layout,
771+
dst.desc.format,
772+
destination.aspect,
773+
data.len() as wgt::BufferAddress,
774+
CopySide::Source,
775+
size,
776+
)?;
776777

777778
if dst.desc.format.is_depth_stencil_format() {
778779
self.device

0 commit comments

Comments
 (0)