Skip to content

[naga spv-out] Add f16 io polyfill #7884

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 24 commits into
base: trunk
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ Bottom level categories:
- Diagnostic rendering methods (i.e., `naga::{front::wgsl::ParseError,WithSpan}::emit_error_to_string_with_path`) now accept more types for their `path` argument via a new sealed `AsDiagnosticFilePath` trait. By @atlv24, @bushrat011899, and @ErichDonGubler in [#7643](https://github.com/gfx-rs/wgpu/pull/7643).
- Add support for [quad operations](https://www.w3.org/TR/WGSL/#quad-builtin-functions) (requires `SUBGROUP` feature to be enabled). By @dzamkov and @valaphee in [#7683](https://github.com/gfx-rs/wgpu/pull/7683).
- Add support for `atomicCompareExchangeWeak` in HLSL and GLSL backends. By @cryvosh in [#7658](https://github.com/gfx-rs/wgpu/pull/7658)
- Add f16 IO polyfill on Vulkan backend to enable SHADER_F16 use without requiring `storageInputOutput16`. By @cryvosh in [#7884](https://github.com/gfx-rs/wgpu/pull/7884)

#### General

Expand Down
24 changes: 12 additions & 12 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

30 changes: 29 additions & 1 deletion naga/src/back/spv/block.rs
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,18 @@ impl Writer {
}
};

body.push(Instruction::store(res_member.id, member_value_id, None));
if let Some((f32_ty, _)) = self.io_f16_polyfills.get_polyfill_info(res_member.id) {
let converted = self.id_gen.next();
super::f16_polyfill::F16IoPolyfill::emit_f16_to_f32_conversion(
member_value_id,
f32_ty,
converted,
body,
);
body.push(Instruction::store(res_member.id, converted, None));
} else {
body.push(Instruction::store(res_member.id, member_value_id, None));
}

match res_member.built_in {
Some(crate::BuiltIn::Position { .. })
Expand Down Expand Up @@ -2313,6 +2324,23 @@ impl BlockContext<'_> {
match self.write_access_chain(pointer, block, access_type_adjustment)? {
ExpressionPointer::Ready { pointer_id } => {
let id = self.gen_id();

if let Some((f32_ty, _)) =
self.writer.io_f16_polyfills.get_polyfill_info(pointer_id)
{
block
.body
.push(Instruction::load(f32_ty, id, pointer_id, None));
let converted = self.gen_id();
super::f16_polyfill::F16IoPolyfill::emit_f32_to_f16_conversion(
id,
result_type_id,
converted,
&mut block.body,
);
return Ok(converted);
}

let atomic_space =
match *self.fun_info[pointer].ty.inner_with(&self.ir_module.types) {
crate::TypeInner::Pointer { base, space } => {
Expand Down
104 changes: 104 additions & 0 deletions naga/src/back/spv/f16_polyfill.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
/*!
This module provides functionality polyfills f16 input/output variables
when the StorageInputOutput16 capability is not available or disabled.

It works by:
1. Declaring f16 I/O variables as f32 in SPIR-V
2. Converting between f16 and f32 at runtime using OpFConvert
3. Maintaining mappings to track which variables need conversion
*/

use crate::back::spv::{Instruction, LocalType, NumericType, Word};
use alloc::vec::Vec;

/// Manages f16 I/O polyfill state and operations.
#[derive(Default)]
pub(super) struct F16IoPolyfill {
use_native: bool,
variable_map: crate::FastHashMap<Word, (Word, Word)>,
}

impl F16IoPolyfill {
pub fn new(use_storage_input_output_16: bool) -> Self {
Self {
use_native: use_storage_input_output_16,
variable_map: crate::FastHashMap::default(),
}
}

pub fn needs_polyfill(&self, ty_inner: &crate::TypeInner) -> bool {
use crate::{ScalarKind as Sk, TypeInner};

!self.use_native
&& match *ty_inner {
TypeInner::Scalar(ref s) if s.kind == Sk::Float && s.width == 2 => true,
TypeInner::Vector { scalar, .. }
if scalar.kind == Sk::Float && scalar.width == 2 =>
{
true
}
_ => false,
}
}

pub fn register_variable(&mut self, variable_id: Word, f32_type_id: Word, f16_type_id: Word) {
self.variable_map
.insert(variable_id, (f32_type_id, f16_type_id));
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

question: I noticed that f16_type_id is never used when recalled via get_polyfill_info. Is this a bug, or just some data that can be eliminated?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch, I've cleaned up the redundant data


pub fn get_polyfill_info(&self, variable_id: Word) -> Option<(Word, Word)> {
self.variable_map.get(&variable_id).copied()
}

pub fn emit_f16_to_f32_conversion(
f16_value_id: Word,
f32_type_id: Word,
converted_id: Word,
body: &mut Vec<Instruction>,
) {
body.push(Instruction::unary(
spirv::Op::FConvert,
f32_type_id,
converted_id,
f16_value_id,
));
}

pub fn emit_f32_to_f16_conversion(
f32_value_id: Word,
f16_type_id: Word,
converted_id: Word,
body: &mut Vec<Instruction>,
) {
body.push(Instruction::unary(
spirv::Op::FConvert,
f16_type_id,
converted_id,
f32_value_id,
));
}

pub fn create_polyfill_type(ty_inner: &crate::TypeInner) -> Option<LocalType> {
use crate::{ScalarKind as Sk, TypeInner};

match *ty_inner {
TypeInner::Scalar(ref s) if s.kind == Sk::Float && s.width == 2 => {
Some(LocalType::Numeric(NumericType::Scalar(crate::Scalar::F32)))
}
TypeInner::Vector { size, scalar } if scalar.kind == Sk::Float && scalar.width == 2 => {
Some(LocalType::Numeric(NumericType::Vector {
size,
scalar: crate::Scalar::F32,
}))
}
_ => None,
}
}
}

impl crate::back::spv::recyclable::Recyclable for F16IoPolyfill {
fn recycle(mut self) -> Self {
self.variable_map = self.variable_map.recycle();
self
}
}
11 changes: 11 additions & 0 deletions naga/src/back/spv/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ Backend for [SPIR-V][spv] (Standard Portable Intermediate Representation).
*/

mod block;
mod f16_polyfill;
mod helpers;
mod image;
mod index;
Expand Down Expand Up @@ -744,6 +745,7 @@ pub struct Writer {
bounds_check_policies: BoundsCheckPolicies,
zero_initialize_workgroup_memory: ZeroInitializeWorkgroupMemoryMode,
force_loop_bounding: bool,
use_storage_input_output_16: bool,
void_type: Word,
//TODO: convert most of these into vectors, addressable by handle indices
lookup_type: crate::FastHashMap<LookupType, Word>,
Expand All @@ -770,6 +772,10 @@ pub struct Writer {

ray_get_committed_intersection_function: Option<Word>,
ray_get_candidate_intersection_function: Option<Word>,

/// F16 I/O polyfill manager for handling f16 input/output variables
/// when StorageInputOutput16 capability is not available.
io_f16_polyfills: f16_polyfill::F16IoPolyfill,
}

bitflags::bitflags! {
Expand Down Expand Up @@ -852,6 +858,10 @@ pub struct Options<'a> {
/// to think the number of iterations is bounded.
pub force_loop_bounding: bool,

/// Whether to use the StorageInputOutput16 capability for f16 shader I/O.
/// When false, f16 I/O is polyfilled using f32 types with conversions.
pub use_storage_input_output_16: bool,

pub debug_info: Option<DebugInfo<'a>>,
}

Expand All @@ -871,6 +881,7 @@ impl Default for Options<'_> {
bounds_check_policies: BoundsCheckPolicies::default(),
zero_initialize_workgroup_memory: ZeroInitializeWorkgroupMemoryMode::Polyfill,
force_loop_bounding: true,
use_storage_input_output_16: true,
debug_info: None,
}
}
Expand Down
Loading