Skip to content

Commit 698316c

Browse files
fix(dx12): align tex. <-> buf. copies via intermediate buffer if !UnrestrictedBufferTextureCopyPitchSupported
1 parent fec2dba commit 698316c

File tree

6 files changed

+166
-24
lines changed

6 files changed

+166
-24
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,10 @@ Naga now requires that no type be larger than 1 GB. This limit may be lowered in
6565

6666
### Bug Fixes
6767

68+
#### DX12
69+
70+
- Align copies b/w textures and buffers via a single intermediate buffer per copy when `D3D12_FEATURE_DATA_D3D12_OPTIONS13.UnrestrictedBufferTextureCopyPitchSupported` is `false`. By @ErichDonGubler in [#7721](https://github.com/gfx-rs/wgpu/pull/7721).
71+
6872
#### Vulkan
6973

7074
Fix `STATUS_HEAP_CORRUPTION` crash when concurrently calling `create_sampler`. By @atlv24 in [#8043](https://github.com/gfx-rs/wgpu/pull/8043).

tests/tests/wgpu-gpu/regression/issue_6827.rs

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -25,17 +25,7 @@ static TEST_SCATTER: GpuTestConfiguration = GpuTestConfiguration::new()
2525
.expect_fail(FailureCase::backend_adapter(
2626
wgpu::Backends::METAL,
2727
"Apple Paravirtual device", // CI on M1
28-
))
29-
.expect_fail(
30-
// Unfortunately this depends on if `D3D12_FEATURE_DATA_D3D12_OPTIONS13.UnrestrictedBufferTextureCopyPitchSupported`
31-
// is true, which we have no way to encode. This reproduces in CI though, so not too worried about it.
32-
FailureCase::backend(wgpu::Backends::DX12)
33-
.flaky()
34-
.validation_error(
35-
"D3D12_PLACED_SUBRESOURCE_FOOTPRINT::Offset must be a multiple of 512",
36-
)
37-
.panic("GraphicsCommandList::close failed: The parameter is incorrect"),
38-
),
28+
)),
3929
)
4030
.run_async(|ctx| async move { run_test(ctx, true).await });
4131

wgpu-hal/src/dx12/adapter.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -317,8 +317,7 @@ impl super::Adapter {
317317
suballocation_supported: !info.name.contains("Iris(R) Xe"),
318318
shader_model,
319319
max_sampler_descriptor_heap_size,
320-
_unrestricted_buffer_texture_copy_pitch_supported:
321-
unrestricted_buffer_texture_copy_pitch_supported,
320+
unrestricted_buffer_texture_copy_pitch_supported,
322321
};
323322

324323
// Theoretically vram limited, but in practice 2^20 is the limit

wgpu-hal/src/dx12/command.rs

Lines changed: 141 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ use crate::{
1414
dxgi::{name::ObjectExt, result::HResult as _},
1515
},
1616
dx12::borrow_interface_temporarily,
17-
AccelerationStructureEntries,
17+
AccelerationStructureEntries, CommandEncoder as _,
1818
};
1919

2020
fn make_box(origin: &wgt::Origin3d, size: &crate::CopyExtent) -> Direct3D12::D3D12_BOX {
@@ -312,6 +312,78 @@ impl super::CommandEncoder {
312312
}
313313
}
314314
}
315+
316+
unsafe fn buf_tex_intermediate<T>(
317+
&mut self,
318+
region: crate::BufferTextureCopy,
319+
tex_fmt: wgt::TextureFormat,
320+
copy_op: impl FnOnce(&mut Self, &super::Buffer, wgt::BufferSize, crate::BufferTextureCopy) -> T,
321+
) -> Option<(T, super::Buffer)> {
322+
let size = {
323+
let copy_info = region.buffer_layout.get_buffer_texture_copy_info(
324+
tex_fmt,
325+
region.texture_base.aspect.map(),
326+
&region.size.into(),
327+
);
328+
copy_info.unwrap().bytes_in_copy
329+
};
330+
331+
let size = wgt::BufferSize::new(size)?;
332+
333+
let buffer = {
334+
let (resource, allocation) =
335+
super::suballocation::DeviceAllocationContext::from(&*self)
336+
.create_buffer(&crate::BufferDescriptor {
337+
label: None,
338+
size: size.get(),
339+
usage: wgt::BufferUses::COPY_SRC | wgt::BufferUses::COPY_DST,
340+
memory_flags: crate::MemoryFlags::empty(),
341+
})
342+
.expect(concat!(
343+
"internal error: ",
344+
"failed to allocate intermediate buffer ",
345+
"for offset alignment"
346+
));
347+
super::Buffer {
348+
resource,
349+
size: size.get(),
350+
allocation,
351+
}
352+
};
353+
354+
let mut region = region;
355+
region.buffer_layout.offset = 0;
356+
357+
unsafe {
358+
self.transition_buffers(
359+
[crate::BufferBarrier {
360+
buffer: &buffer,
361+
usage: crate::StateTransition {
362+
from: wgt::BufferUses::empty(),
363+
to: wgt::BufferUses::COPY_DST,
364+
},
365+
}]
366+
.into_iter(),
367+
)
368+
};
369+
370+
let t = copy_op(self, &buffer, size, region);
371+
372+
unsafe {
373+
self.transition_buffers(
374+
[crate::BufferBarrier {
375+
buffer: &buffer,
376+
usage: crate::StateTransition {
377+
from: wgt::BufferUses::COPY_DST,
378+
to: wgt::BufferUses::COPY_SRC,
379+
},
380+
}]
381+
.into_iter(),
382+
)
383+
};
384+
385+
Some((t, buffer))
386+
}
315387
}
316388

317389
impl crate::CommandEncoder for super::CommandEncoder {
@@ -366,6 +438,7 @@ impl crate::CommandEncoder for super::CommandEncoder {
366438
Ok(super::CommandBuffer { raw })
367439
}
368440
unsafe fn reset_all<I: Iterator<Item = super::CommandBuffer>>(&mut self, command_buffers: I) {
441+
self.intermediate_copy_bufs.clear();
369442
for cmd_buf in command_buffers {
370443
self.free_lists.push(cmd_buf.raw);
371444
}
@@ -612,31 +685,61 @@ impl crate::CommandEncoder for super::CommandEncoder {
612685
) where
613686
T: Iterator<Item = crate::BufferTextureCopy>,
614687
{
615-
for r in regions {
688+
let offset_alignment = self.shared.private_caps.texture_data_placement_alignment();
689+
690+
for naive_copy_region in regions {
691+
let is_offset_aligned = naive_copy_region.buffer_layout.offset % offset_alignment == 0;
692+
let (final_copy_region, src) = if is_offset_aligned {
693+
(naive_copy_region, src)
694+
} else {
695+
let Some((intermediate_to_dst_region, intermediate_buf)) = (unsafe {
696+
let src_offset = naive_copy_region.buffer_layout.offset;
697+
self.buf_tex_intermediate(
698+
naive_copy_region,
699+
dst.format,
700+
|this, buf, size, intermediate_to_dst_region| {
701+
let layout = crate::BufferCopy {
702+
src_offset,
703+
dst_offset: 0,
704+
size,
705+
};
706+
this.copy_buffer_to_buffer(src, buf, [layout].into_iter());
707+
intermediate_to_dst_region
708+
},
709+
)
710+
}) else {
711+
continue;
712+
};
713+
self.intermediate_copy_bufs.push(intermediate_buf);
714+
let intermediate_buf = self.intermediate_copy_bufs.last().unwrap();
715+
(intermediate_to_dst_region, intermediate_buf)
716+
};
717+
616718
let list = self.list.as_ref().unwrap();
617719

618720
let src_location = Direct3D12::D3D12_TEXTURE_COPY_LOCATION {
619721
pResource: unsafe { borrow_interface_temporarily(&src.resource) },
620722
Type: Direct3D12::D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT,
621723
Anonymous: Direct3D12::D3D12_TEXTURE_COPY_LOCATION_0 {
622-
PlacedFootprint: r.to_subresource_footprint(dst.format),
724+
PlacedFootprint: final_copy_region.to_subresource_footprint(dst.format),
623725
},
624726
};
625727
let dst_location = Direct3D12::D3D12_TEXTURE_COPY_LOCATION {
626728
pResource: unsafe { borrow_interface_temporarily(&dst.resource) },
627729
Type: Direct3D12::D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX,
628730
Anonymous: Direct3D12::D3D12_TEXTURE_COPY_LOCATION_0 {
629-
SubresourceIndex: dst.calc_subresource_for_copy(&r.texture_base),
731+
SubresourceIndex: dst
732+
.calc_subresource_for_copy(&final_copy_region.texture_base),
630733
},
631734
};
632735

633-
let src_box = make_box(&wgt::Origin3d::ZERO, &r.size);
736+
let src_box = make_box(&wgt::Origin3d::ZERO, &final_copy_region.size);
634737
unsafe {
635738
list.CopyTextureRegion(
636739
&dst_location,
637-
r.texture_base.origin.x,
638-
r.texture_base.origin.y,
639-
r.texture_base.origin.z,
740+
final_copy_region.texture_base.origin.x,
741+
final_copy_region.texture_base.origin.y,
742+
final_copy_region.texture_base.origin.z,
640743
&src_location,
641744
Some(&src_box),
642745
)
@@ -680,8 +783,37 @@ impl crate::CommandEncoder for super::CommandEncoder {
680783
};
681784
};
682785

786+
let offset_alignment = self.shared.private_caps.texture_data_placement_alignment();
787+
683788
for r in regions {
684-
copy_aligned(this, src, dst, r);
789+
let is_offset_aligned = r.buffer_layout.offset % offset_alignment == 0;
790+
if is_offset_aligned {
791+
copy_aligned(self, src, dst, r)
792+
} else {
793+
let orig_offset = r.buffer_layout.offset;
794+
let Some((intermediate_to_dst_region, src)) = (unsafe {
795+
self.buf_tex_intermediate(
796+
r,
797+
src.format,
798+
|this, buf, size, intermediate_region| {
799+
copy_aligned(this, src, buf, intermediate_region);
800+
crate::BufferCopy {
801+
src_offset: orig_offset,
802+
dst_offset: 0,
803+
size,
804+
}
805+
},
806+
)
807+
}) else {
808+
continue;
809+
};
810+
811+
unsafe {
812+
self.copy_buffer_to_buffer(&src, dst, [intermediate_to_dst_region].into_iter());
813+
}
814+
815+
self.intermediate_copy_bufs.push(src);
816+
};
685817
}
686818
}
687819

wgpu-hal/src/dx12/device.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -778,6 +778,7 @@ impl crate::Device for super::Device {
778778
mem_allocator: self.mem_allocator.clone(),
779779
rtv_pool: Arc::clone(&self.rtv_pool),
780780
temp_rtv_handles: Vec::new(),
781+
intermediate_copy_bufs: Vec::new(),
781782
null_rtv_handle: self.null_rtv_handle,
782783
list: None,
783784
free_lists: Vec::new(),

wgpu-hal/src/dx12/mod.rs

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,11 @@ use windows::{
9595
core::{Free, Interface},
9696
Win32::{
9797
Foundation,
98-
Graphics::{Direct3D, Direct3D12, DirectComposition, Dxgi},
98+
Graphics::{
99+
Direct3D,
100+
Direct3D12::{self, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT},
101+
DirectComposition, Dxgi,
102+
},
99103
System::Threading,
100104
},
101105
};
@@ -581,7 +585,17 @@ struct PrivateCapabilities {
581585
suballocation_supported: bool,
582586
shader_model: naga::back::hlsl::ShaderModel,
583587
max_sampler_descriptor_heap_size: u32,
584-
_unrestricted_buffer_texture_copy_pitch_supported: bool,
588+
unrestricted_buffer_texture_copy_pitch_supported: bool,
589+
}
590+
591+
impl PrivateCapabilities {
592+
fn texture_data_placement_alignment(&self) -> u64 {
593+
if self.unrestricted_buffer_texture_copy_pitch_supported {
594+
4
595+
} else {
596+
D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT.into()
597+
}
598+
}
585599
}
586600

587601
#[derive(Default)]
@@ -817,6 +831,8 @@ pub struct CommandEncoder {
817831
rtv_pool: Arc<Mutex<descriptor::CpuPool>>,
818832
temp_rtv_handles: Vec<descriptor::Handle>,
819833

834+
intermediate_copy_bufs: Vec<Buffer>,
835+
820836
null_rtv_handle: descriptor::Handle,
821837
list: Option<Direct3D12::ID3D12GraphicsCommandList>,
822838
free_lists: Vec<Direct3D12::ID3D12GraphicsCommandList>,

0 commit comments

Comments
 (0)