Skip to content

Commit 4b5e38a

Browse files
authored
[naga spv-out] Add f16 io polyfill (#7884)
1 parent 2996c92 commit 4b5e38a

File tree

14 files changed

+2106
-19
lines changed

14 files changed

+2106
-19
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ This allows using precompiled shaders without manually checking which backend's
104104

105105
- Naga now requires that no type be larger than 1 GB. This limit may be lowered in the future; feedback on an appropriate value for the limit is welcome. By @andyleiserson in [#7950](https://github.com/gfx-rs/wgpu/pull/7950).
106106
- If the shader source contains control characters, Naga now replaces them with U+FFFD ("replacement character") in diagnostic output. By @andyleiserson in [#8049](https://github.com/gfx-rs/wgpu/pull/8049).
107+
- Add f16 IO polyfill on Vulkan backend to enable SHADER_F16 use without requiring `storageInputOutput16`. By @cryvosh in [#7884](https://github.com/gfx-rs/wgpu/pull/7884).
107108

108109
#### DX12
109110

naga/src/back/spv/block.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,7 @@ impl Writer {
237237
}
238238
};
239239

240-
body.push(Instruction::store(res_member.id, member_value_id, None));
240+
self.store_io_with_f16_polyfill(body, res_member.id, member_value_id);
241241

242242
match res_member.built_in {
243243
Some(crate::BuiltIn::Position { .. })

naga/src/back/spv/f16_polyfill.rs

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
/*!
2+
This module provides functionality for polyfilling `f16` input/output variables
3+
when the `StorageInputOutput16` capability is not available or disabled.
4+
5+
It works by:
6+
7+
1. Declaring `f16` I/O variables as `f32` in SPIR-V
8+
2. Converting between `f16` and `f32` at runtime using `OpFConvert`
9+
3. Maintaining mappings to track which variables need conversion
10+
*/
11+
12+
use crate::back::spv::{Instruction, LocalType, NumericType, Word};
13+
use alloc::vec::Vec;
14+
15+
/// Manages `f16` I/O polyfill state and operations.
16+
#[derive(Default)]
17+
pub(in crate::back::spv) struct F16IoPolyfill {
18+
use_native: bool,
19+
io_var_to_f32_type: crate::FastHashMap<Word, Word>,
20+
}
21+
22+
impl F16IoPolyfill {
23+
pub fn new(use_storage_input_output_16: bool) -> Self {
24+
Self {
25+
use_native: use_storage_input_output_16,
26+
io_var_to_f32_type: crate::FastHashMap::default(),
27+
}
28+
}
29+
30+
pub fn needs_polyfill(&self, ty_inner: &crate::TypeInner) -> bool {
31+
use crate::{ScalarKind as Sk, TypeInner};
32+
33+
!self.use_native
34+
&& match *ty_inner {
35+
TypeInner::Scalar(ref s) if s.kind == Sk::Float && s.width == 2 => true,
36+
TypeInner::Vector { scalar, .. }
37+
if scalar.kind == Sk::Float && scalar.width == 2 =>
38+
{
39+
true
40+
}
41+
_ => false,
42+
}
43+
}
44+
45+
pub fn register_io_var(&mut self, variable_id: Word, f32_type_id: Word) {
46+
self.io_var_to_f32_type.insert(variable_id, f32_type_id);
47+
}
48+
49+
pub fn get_f32_io_type(&self, variable_id: Word) -> Option<Word> {
50+
self.io_var_to_f32_type.get(&variable_id).copied()
51+
}
52+
53+
pub fn emit_f16_to_f32_conversion(
54+
f16_value_id: Word,
55+
f32_type_id: Word,
56+
converted_id: Word,
57+
body: &mut Vec<Instruction>,
58+
) {
59+
body.push(Instruction::unary(
60+
spirv::Op::FConvert,
61+
f32_type_id,
62+
converted_id,
63+
f16_value_id,
64+
));
65+
}
66+
67+
pub fn emit_f32_to_f16_conversion(
68+
f32_value_id: Word,
69+
f16_type_id: Word,
70+
converted_id: Word,
71+
body: &mut Vec<Instruction>,
72+
) {
73+
body.push(Instruction::unary(
74+
spirv::Op::FConvert,
75+
f16_type_id,
76+
converted_id,
77+
f32_value_id,
78+
));
79+
}
80+
81+
pub fn create_polyfill_type(ty_inner: &crate::TypeInner) -> Option<LocalType> {
82+
use crate::{ScalarKind as Sk, TypeInner};
83+
84+
match *ty_inner {
85+
TypeInner::Scalar(ref s) if s.kind == Sk::Float && s.width == 2 => {
86+
Some(LocalType::Numeric(NumericType::Scalar(crate::Scalar::F32)))
87+
}
88+
TypeInner::Vector { size, scalar } if scalar.kind == Sk::Float && scalar.width == 2 => {
89+
Some(LocalType::Numeric(NumericType::Vector {
90+
size,
91+
scalar: crate::Scalar::F32,
92+
}))
93+
}
94+
_ => None,
95+
}
96+
}
97+
}
98+
99+
impl crate::back::spv::recyclable::Recyclable for F16IoPolyfill {
100+
fn recycle(mut self) -> Self {
101+
self.io_var_to_f32_type = self.io_var_to_f32_type.recycle();
102+
self
103+
}
104+
}

naga/src/back/spv/mod.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ Backend for [SPIR-V][spv] (Standard Portable Intermediate Representation).
55
*/
66

77
mod block;
8+
mod f16_polyfill;
89
mod helpers;
910
mod image;
1011
mod index;
@@ -745,6 +746,7 @@ pub struct Writer {
745746
bounds_check_policies: BoundsCheckPolicies,
746747
zero_initialize_workgroup_memory: ZeroInitializeWorkgroupMemoryMode,
747748
force_loop_bounding: bool,
749+
use_storage_input_output_16: bool,
748750
void_type: Word,
749751
//TODO: convert most of these into vectors, addressable by handle indices
750752
lookup_type: crate::FastHashMap<LookupType, Word>,
@@ -771,6 +773,10 @@ pub struct Writer {
771773

772774
ray_get_committed_intersection_function: Option<Word>,
773775
ray_get_candidate_intersection_function: Option<Word>,
776+
777+
/// F16 I/O polyfill manager for handling `f16` input/output variables
778+
/// when `StorageInputOutput16` capability is not available.
779+
io_f16_polyfills: f16_polyfill::F16IoPolyfill,
774780
}
775781

776782
bitflags::bitflags! {
@@ -853,6 +859,10 @@ pub struct Options<'a> {
853859
/// to think the number of iterations is bounded.
854860
pub force_loop_bounding: bool,
855861

862+
/// Whether to use the `StorageInputOutput16` capability for `f16` shader I/O.
863+
/// When false, `f16` I/O is polyfilled using `f32` types with conversions.
864+
pub use_storage_input_output_16: bool,
865+
856866
pub debug_info: Option<DebugInfo<'a>>,
857867
}
858868

@@ -872,6 +882,7 @@ impl Default for Options<'_> {
872882
bounds_check_policies: BoundsCheckPolicies::default(),
873883
zero_initialize_workgroup_memory: ZeroInitializeWorkgroupMemoryMode::Polyfill,
874884
force_loop_bounding: true,
885+
use_storage_input_output_16: true,
875886
debug_info: None,
876887
}
877888
}

naga/src/back/spv/writer.rs

Lines changed: 86 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ impl Writer {
7878
bounds_check_policies: options.bounds_check_policies,
7979
zero_initialize_workgroup_memory: options.zero_initialize_workgroup_memory,
8080
force_loop_bounding: options.force_loop_bounding,
81+
use_storage_input_output_16: options.use_storage_input_output_16,
8182
void_type,
8283
lookup_type: crate::FastHashMap::default(),
8384
lookup_function: crate::FastHashMap::default(),
@@ -92,6 +93,9 @@ impl Writer {
9293
temp_list: Vec::new(),
9394
ray_get_committed_intersection_function: None,
9495
ray_get_candidate_intersection_function: None,
96+
io_f16_polyfills: super::f16_polyfill::F16IoPolyfill::new(
97+
options.use_storage_input_output_16,
98+
),
9599
})
96100
}
97101

@@ -125,6 +129,7 @@ impl Writer {
125129
bounds_check_policies: self.bounds_check_policies,
126130
zero_initialize_workgroup_memory: self.zero_initialize_workgroup_memory,
127131
force_loop_bounding: self.force_loop_bounding,
132+
use_storage_input_output_16: self.use_storage_input_output_16,
128133
capabilities_available: take(&mut self.capabilities_available),
129134
binding_map: take(&mut self.binding_map),
130135

@@ -151,6 +156,7 @@ impl Writer {
151156
temp_list: take(&mut self.temp_list).recycle(),
152157
ray_get_candidate_intersection_function: None,
153158
ray_get_committed_intersection_function: None,
159+
io_f16_polyfills: take(&mut self.io_f16_polyfills).recycle(),
154160
};
155161

156162
*self = fresh;
@@ -726,10 +732,11 @@ impl Writer {
726732
binding,
727733
)?;
728734
iface.varying_ids.push(varying_id);
729-
let id = self.id_gen.next();
730-
prelude
731-
.body
732-
.push(Instruction::load(argument_type_id, id, varying_id, None));
735+
let id = self.load_io_with_f16_polyfill(
736+
&mut prelude.body,
737+
varying_id,
738+
argument_type_id,
739+
);
733740

734741
if binding == &crate::Binding::BuiltIn(crate::BuiltIn::LocalInvocationId) {
735742
local_invocation_id = Some(id);
@@ -754,10 +761,8 @@ impl Writer {
754761
binding,
755762
)?;
756763
iface.varying_ids.push(varying_id);
757-
let id = self.id_gen.next();
758-
prelude
759-
.body
760-
.push(Instruction::load(type_id, id, varying_id, None));
764+
let id =
765+
self.load_io_with_f16_polyfill(&mut prelude.body, varying_id, type_id);
761766
constituent_ids.push(id);
762767

763768
if binding == &crate::Binding::BuiltIn(crate::BuiltIn::LocalInvocationId) {
@@ -1220,8 +1225,10 @@ impl Writer {
12201225
.insert(spirv::Capability::StorageBuffer16BitAccess);
12211226
self.capabilities_used
12221227
.insert(spirv::Capability::UniformAndStorageBuffer16BitAccess);
1223-
self.capabilities_used
1224-
.insert(spirv::Capability::StorageInputOutput16);
1228+
if self.use_storage_input_output_16 {
1229+
self.capabilities_used
1230+
.insert(spirv::Capability::StorageInputOutput16);
1231+
}
12251232
}
12261233
Instruction::type_float(id, bits)
12271234
}
@@ -1905,8 +1912,26 @@ impl Writer {
19051912
ty: Handle<crate::Type>,
19061913
binding: &crate::Binding,
19071914
) -> Result<Word, Error> {
1915+
use crate::TypeInner;
1916+
19081917
let id = self.id_gen.next();
1909-
let pointer_type_id = self.get_handle_pointer_type_id(ty, class);
1918+
let ty_inner = &ir_module.types[ty].inner;
1919+
let needs_polyfill = self.needs_f16_polyfill(ty_inner);
1920+
1921+
let pointer_type_id = if needs_polyfill {
1922+
let f32_value_local =
1923+
super::f16_polyfill::F16IoPolyfill::create_polyfill_type(ty_inner)
1924+
.expect("needs_polyfill returned true but create_polyfill_type returned None");
1925+
1926+
let f32_type_id = self.get_localtype_id(f32_value_local);
1927+
let ptr_id = self.get_pointer_type_id(f32_type_id, class);
1928+
self.io_f16_polyfills.register_io_var(id, f32_type_id);
1929+
1930+
ptr_id
1931+
} else {
1932+
self.get_handle_pointer_type_id(ty, class)
1933+
};
1934+
19101935
Instruction::variable(pointer_type_id, id, class, None)
19111936
.to_words(&mut self.logical_layout.declarations);
19121937

@@ -2089,8 +2114,9 @@ impl Writer {
20892114
// > shader, must be decorated Flat
20902115
if class == spirv::StorageClass::Input && stage == crate::ShaderStage::Fragment {
20912116
let is_flat = match ir_module.types[ty].inner {
2092-
crate::TypeInner::Scalar(scalar)
2093-
| crate::TypeInner::Vector { scalar, .. } => match scalar.kind {
2117+
TypeInner::Scalar(scalar) | TypeInner::Vector { scalar, .. } => match scalar
2118+
.kind
2119+
{
20942120
Sk::Uint | Sk::Sint | Sk::Bool => true,
20952121
Sk::Float => false,
20962122
Sk::AbstractInt | Sk::AbstractFloat => {
@@ -2112,6 +2138,49 @@ impl Writer {
21122138
Ok(id)
21132139
}
21142140

2141+
/// Load an IO variable, converting from `f32` to `f16` if polyfill is active.
2142+
/// Returns the id of the loaded value matching `target_type_id`.
2143+
pub(super) fn load_io_with_f16_polyfill(
2144+
&mut self,
2145+
body: &mut Vec<Instruction>,
2146+
varying_id: Word,
2147+
target_type_id: Word,
2148+
) -> Word {
2149+
let tmp = self.id_gen.next();
2150+
if let Some(f32_ty) = self.io_f16_polyfills.get_f32_io_type(varying_id) {
2151+
body.push(Instruction::load(f32_ty, tmp, varying_id, None));
2152+
let converted = self.id_gen.next();
2153+
super::f16_polyfill::F16IoPolyfill::emit_f32_to_f16_conversion(
2154+
tmp,
2155+
target_type_id,
2156+
converted,
2157+
body,
2158+
);
2159+
converted
2160+
} else {
2161+
body.push(Instruction::load(target_type_id, tmp, varying_id, None));
2162+
tmp
2163+
}
2164+
}
2165+
2166+
/// Store an IO variable, converting from `f16` to `f32` if polyfill is active.
2167+
pub(super) fn store_io_with_f16_polyfill(
2168+
&mut self,
2169+
body: &mut Vec<Instruction>,
2170+
varying_id: Word,
2171+
value_id: Word,
2172+
) {
2173+
if let Some(f32_ty) = self.io_f16_polyfills.get_f32_io_type(varying_id) {
2174+
let converted = self.id_gen.next();
2175+
super::f16_polyfill::F16IoPolyfill::emit_f16_to_f32_conversion(
2176+
value_id, f32_ty, converted, body,
2177+
);
2178+
body.push(Instruction::store(varying_id, converted, None));
2179+
} else {
2180+
body.push(Instruction::store(varying_id, value_id, None));
2181+
}
2182+
}
2183+
21152184
fn write_global_variable(
21162185
&mut self,
21172186
ir_module: &crate::Module,
@@ -2585,6 +2654,10 @@ impl Writer {
25852654
self.decorate(id, spirv::Decoration::NonUniform, &[]);
25862655
Ok(())
25872656
}
2657+
2658+
pub(super) fn needs_f16_polyfill(&self, ty_inner: &crate::TypeInner) -> bool {
2659+
self.io_f16_polyfills.needs_polyfill(ty_inner)
2660+
}
25882661
}
25892662

25902663
#[test]

naga/tests/in/wgsl/f16-native.toml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
targets = "SPIRV"
2+
god_mode = true
3+
4+
[spv]
5+
debug = true
6+
version = [1, 1]
7+
use_storage_input_output_16 = true
8+
capabilities = ["Float16"]
9+
10+
[bounds_check_policies]
11+
index = "ReadZeroSkipWrite"
12+
buffer = "ReadZeroSkipWrite"
13+
image = "ReadZeroSkipWrite"

0 commit comments

Comments
 (0)