Skip to content

[naga spv-out] Add f16 io polyfill #7884

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 24 commits into
base: trunk
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ By @Vecvec in [#7913](https://github.com/gfx-rs/wgpu/pull/7913).
#### Naga

- Naga now requires that no type be larger than 1 GB. This limit may be lowered in the future; feedback on an appropriate value for the limit is welcome. By @andyleiserson in [#7950](https://github.com/gfx-rs/wgpu/pull/7950).
- Add f16 IO polyfill on Vulkan backend to enable SHADER_F16 use without requiring `storageInputOutput16`. By @cryvosh in [#7884](https://github.com/gfx-rs/wgpu/pull/7884)

#### DX12

Expand Down
17 changes: 16 additions & 1 deletion naga/src/back/spv/block.rs
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ impl Writer {
}
};

body.push(Instruction::store(res_member.id, member_value_id, None));
self.store_io_with_f16_polyfill(body, res_member.id, member_value_id);

match res_member.built_in {
Some(crate::BuiltIn::Position { .. })
Expand Down Expand Up @@ -2313,6 +2313,21 @@ impl BlockContext<'_> {
match self.write_access_chain(pointer, block, access_type_adjustment)? {
ExpressionPointer::Ready { pointer_id } => {
let id = self.gen_id();

if self
.writer
.io_f16_polyfills
.get_f32_io_type(pointer_id)
.is_some()
{
let converted = self.writer.load_io_with_f16_polyfill(
&mut block.body,
pointer_id,
result_type_id,
);
return Ok(converted);
}

let atomic_space =
match *self.fun_info[pointer].ty.inner_with(&self.ir_module.types) {
crate::TypeInner::Pointer { base, space } => {
Expand Down
104 changes: 104 additions & 0 deletions naga/src/back/spv/f16_polyfill.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
/*!
This module provides functionality for polyfilling `f16` input/output variables
when the `StorageInputOutput16` capability is not available or disabled.

It works by:

1. Declaring `f16` I/O variables as `f32` in SPIR-V
2. Converting between `f16` and `f32` at runtime using `OpFConvert`
3. Maintaining mappings to track which variables need conversion
*/

use crate::back::spv::{Instruction, LocalType, NumericType, Word};
use alloc::vec::Vec;

/// Manages `f16` I/O polyfill state and operations.
#[derive(Default)]
pub(in crate::back::spv) struct F16IoPolyfill {
use_native: bool,
io_var_to_f32_type: crate::FastHashMap<Word, Word>,
}

impl F16IoPolyfill {
pub fn new(use_storage_input_output_16: bool) -> Self {
Self {
use_native: use_storage_input_output_16,
io_var_to_f32_type: crate::FastHashMap::default(),
}
}

pub fn needs_polyfill(&self, ty_inner: &crate::TypeInner) -> bool {
use crate::{ScalarKind as Sk, TypeInner};

!self.use_native
&& match *ty_inner {
TypeInner::Scalar(ref s) if s.kind == Sk::Float && s.width == 2 => true,
TypeInner::Vector { scalar, .. }
if scalar.kind == Sk::Float && scalar.width == 2 =>
{
true
}
_ => false,
}
}

pub fn register_io_var(&mut self, variable_id: Word, f32_type_id: Word) {
self.io_var_to_f32_type.insert(variable_id, f32_type_id);
}

pub fn get_f32_io_type(&self, variable_id: Word) -> Option<Word> {
self.io_var_to_f32_type.get(&variable_id).copied()
}

pub fn emit_f16_to_f32_conversion(
f16_value_id: Word,
f32_type_id: Word,
converted_id: Word,
body: &mut Vec<Instruction>,
) {
body.push(Instruction::unary(
spirv::Op::FConvert,
f32_type_id,
converted_id,
f16_value_id,
));
}

pub fn emit_f32_to_f16_conversion(
f32_value_id: Word,
f16_type_id: Word,
converted_id: Word,
body: &mut Vec<Instruction>,
) {
body.push(Instruction::unary(
spirv::Op::FConvert,
f16_type_id,
converted_id,
f32_value_id,
));
}

pub fn create_polyfill_type(ty_inner: &crate::TypeInner) -> Option<LocalType> {
use crate::{ScalarKind as Sk, TypeInner};

match *ty_inner {
TypeInner::Scalar(ref s) if s.kind == Sk::Float && s.width == 2 => {
Some(LocalType::Numeric(NumericType::Scalar(crate::Scalar::F32)))
}
TypeInner::Vector { size, scalar } if scalar.kind == Sk::Float && scalar.width == 2 => {
Some(LocalType::Numeric(NumericType::Vector {
size,
scalar: crate::Scalar::F32,
}))
}
_ => None,
}
}
}

impl crate::back::spv::recyclable::Recyclable for F16IoPolyfill {
fn recycle(mut self) -> Self {
self.io_var_to_f32_type = self.io_var_to_f32_type.recycle();
self
}
}
11 changes: 11 additions & 0 deletions naga/src/back/spv/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ Backend for [SPIR-V][spv] (Standard Portable Intermediate Representation).
*/

mod block;
mod f16_polyfill;
mod helpers;
mod image;
mod index;
Expand Down Expand Up @@ -745,6 +746,7 @@ pub struct Writer {
bounds_check_policies: BoundsCheckPolicies,
zero_initialize_workgroup_memory: ZeroInitializeWorkgroupMemoryMode,
force_loop_bounding: bool,
use_storage_input_output_16: bool,
void_type: Word,
//TODO: convert most of these into vectors, addressable by handle indices
lookup_type: crate::FastHashMap<LookupType, Word>,
Expand All @@ -771,6 +773,10 @@ pub struct Writer {

ray_get_committed_intersection_function: Option<Word>,
ray_get_candidate_intersection_function: Option<Word>,

/// F16 I/O polyfill manager for handling `f16` input/output variables
/// when `StorageInputOutput16` capability is not available.
io_f16_polyfills: f16_polyfill::F16IoPolyfill,
}

bitflags::bitflags! {
Expand Down Expand Up @@ -853,6 +859,10 @@ pub struct Options<'a> {
/// to think the number of iterations is bounded.
pub force_loop_bounding: bool,

/// Whether to use the `StorageInputOutput16` capability for `f16` shader I/O.
/// When false, `f16` I/O is polyfilled using `f32` types with conversions.
pub use_storage_input_output_16: bool,

pub debug_info: Option<DebugInfo<'a>>,
}

Expand All @@ -872,6 +882,7 @@ impl Default for Options<'_> {
bounds_check_policies: BoundsCheckPolicies::default(),
zero_initialize_workgroup_memory: ZeroInitializeWorkgroupMemoryMode::Polyfill,
force_loop_bounding: true,
use_storage_input_output_16: true,
debug_info: None,
}
}
Expand Down
99 changes: 86 additions & 13 deletions naga/src/back/spv/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ impl Writer {
bounds_check_policies: options.bounds_check_policies,
zero_initialize_workgroup_memory: options.zero_initialize_workgroup_memory,
force_loop_bounding: options.force_loop_bounding,
use_storage_input_output_16: options.use_storage_input_output_16,
void_type,
lookup_type: crate::FastHashMap::default(),
lookup_function: crate::FastHashMap::default(),
Expand All @@ -92,6 +93,9 @@ impl Writer {
temp_list: Vec::new(),
ray_get_committed_intersection_function: None,
ray_get_candidate_intersection_function: None,
io_f16_polyfills: super::f16_polyfill::F16IoPolyfill::new(
options.use_storage_input_output_16,
),
})
}

Expand Down Expand Up @@ -125,6 +129,7 @@ impl Writer {
bounds_check_policies: self.bounds_check_policies,
zero_initialize_workgroup_memory: self.zero_initialize_workgroup_memory,
force_loop_bounding: self.force_loop_bounding,
use_storage_input_output_16: self.use_storage_input_output_16,
capabilities_available: take(&mut self.capabilities_available),
binding_map: take(&mut self.binding_map),

Expand All @@ -151,6 +156,7 @@ impl Writer {
temp_list: take(&mut self.temp_list).recycle(),
ray_get_candidate_intersection_function: None,
ray_get_committed_intersection_function: None,
io_f16_polyfills: take(&mut self.io_f16_polyfills).recycle(),
};

*self = fresh;
Expand Down Expand Up @@ -726,10 +732,11 @@ impl Writer {
binding,
)?;
iface.varying_ids.push(varying_id);
let id = self.id_gen.next();
prelude
.body
.push(Instruction::load(argument_type_id, id, varying_id, None));
let id = self.load_io_with_f16_polyfill(
&mut prelude.body,
varying_id,
argument_type_id,
);

if binding == &crate::Binding::BuiltIn(crate::BuiltIn::LocalInvocationId) {
local_invocation_id = Some(id);
Expand All @@ -754,10 +761,8 @@ impl Writer {
binding,
)?;
iface.varying_ids.push(varying_id);
let id = self.id_gen.next();
prelude
.body
.push(Instruction::load(type_id, id, varying_id, None));
let id =
self.load_io_with_f16_polyfill(&mut prelude.body, varying_id, type_id);
constituent_ids.push(id);

if binding == &crate::Binding::BuiltIn(crate::BuiltIn::LocalInvocationId) {
Expand Down Expand Up @@ -1220,8 +1225,10 @@ impl Writer {
.insert(spirv::Capability::StorageBuffer16BitAccess);
self.capabilities_used
.insert(spirv::Capability::UniformAndStorageBuffer16BitAccess);
self.capabilities_used
.insert(spirv::Capability::StorageInputOutput16);
if self.use_storage_input_output_16 {
self.capabilities_used
.insert(spirv::Capability::StorageInputOutput16);
}
}
Instruction::type_float(id, bits)
}
Expand Down Expand Up @@ -1905,8 +1912,26 @@ impl Writer {
ty: Handle<crate::Type>,
binding: &crate::Binding,
) -> Result<Word, Error> {
use crate::TypeInner;

let id = self.id_gen.next();
let pointer_type_id = self.get_handle_pointer_type_id(ty, class);
let ty_inner = &ir_module.types[ty].inner;
let needs_polyfill = self.needs_f16_polyfill(ty_inner);

let pointer_type_id = if needs_polyfill {
let f32_value_local =
super::f16_polyfill::F16IoPolyfill::create_polyfill_type(ty_inner)
.expect("needs_polyfill returned true but create_polyfill_type returned None");

let f32_type_id = self.get_localtype_id(f32_value_local);
let ptr_id = self.get_pointer_type_id(f32_type_id, class);
self.io_f16_polyfills.register_io_var(id, f32_type_id);

ptr_id
} else {
self.get_handle_pointer_type_id(ty, class)
};

Instruction::variable(pointer_type_id, id, class, None)
.to_words(&mut self.logical_layout.declarations);

Expand Down Expand Up @@ -2089,8 +2114,9 @@ impl Writer {
// > shader, must be decorated Flat
if class == spirv::StorageClass::Input && stage == crate::ShaderStage::Fragment {
let is_flat = match ir_module.types[ty].inner {
crate::TypeInner::Scalar(scalar)
| crate::TypeInner::Vector { scalar, .. } => match scalar.kind {
TypeInner::Scalar(scalar) | TypeInner::Vector { scalar, .. } => match scalar
.kind
{
Sk::Uint | Sk::Sint | Sk::Bool => true,
Sk::Float => false,
Sk::AbstractInt | Sk::AbstractFloat => {
Expand All @@ -2112,6 +2138,49 @@ impl Writer {
Ok(id)
}

/// Load an IO variable, converting from `f32` to `f16` if polyfill is active.
/// Returns the id of the loaded value matching `target_type_id`.
pub(super) fn load_io_with_f16_polyfill(
&mut self,
body: &mut Vec<Instruction>,
varying_id: Word,
target_type_id: Word,
) -> Word {
let tmp = self.id_gen.next();
if let Some(f32_ty) = self.io_f16_polyfills.get_f32_io_type(varying_id) {
body.push(Instruction::load(f32_ty, tmp, varying_id, None));
let converted = self.id_gen.next();
super::f16_polyfill::F16IoPolyfill::emit_f32_to_f16_conversion(
tmp,
target_type_id,
converted,
body,
);
converted
} else {
body.push(Instruction::load(target_type_id, tmp, varying_id, None));
tmp
}
}

/// Store an IO variable, converting from `f16` to `f32` if polyfill is active.
pub(super) fn store_io_with_f16_polyfill(
&mut self,
body: &mut Vec<Instruction>,
varying_id: Word,
value_id: Word,
) {
if let Some(f32_ty) = self.io_f16_polyfills.get_f32_io_type(varying_id) {
let converted = self.id_gen.next();
super::f16_polyfill::F16IoPolyfill::emit_f16_to_f32_conversion(
value_id, f32_ty, converted, body,
);
body.push(Instruction::store(varying_id, converted, None));
} else {
body.push(Instruction::store(varying_id, value_id, None));
}
}

fn write_global_variable(
&mut self,
ir_module: &crate::Module,
Expand Down Expand Up @@ -2585,6 +2654,10 @@ impl Writer {
self.decorate(id, spirv::Decoration::NonUniform, &[]);
Ok(())
}

pub(super) fn needs_f16_polyfill(&self, ty_inner: &crate::TypeInner) -> bool {
self.io_f16_polyfills.needs_polyfill(ty_inner)
}
}

#[test]
Expand Down
13 changes: 13 additions & 0 deletions naga/tests/in/wgsl/f16-native.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
targets = "SPIRV"
god_mode = true

[spv]
debug = true
version = [1, 1]
use_storage_input_output_16 = true
capabilities = ["Float16"]

[bounds_check_policies]
index = "ReadZeroSkipWrite"
buffer = "ReadZeroSkipWrite"
image = "ReadZeroSkipWrite"
Loading
Loading