diff --git a/CHANGELOG.md b/CHANGELOG.md index ed056032c2f..11fb072dcab 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -104,6 +104,7 @@ This allows using precompiled shaders without manually checking which backend's - Naga now requires that no type be larger than 1 GB. This limit may be lowered in the future; feedback on an appropriate value for the limit is welcome. By @andyleiserson in [#7950](https://github.com/gfx-rs/wgpu/pull/7950). - If the shader source contains control characters, Naga now replaces them with U+FFFD ("replacement character") in diagnostic output. By @andyleiserson in [#8049](https://github.com/gfx-rs/wgpu/pull/8049). +- Add f16 IO polyfill on Vulkan backend to enable SHADER_F16 use without requiring `storageInputOutput16`. By @cryvosh in [#7884](https://github.com/gfx-rs/wgpu/pull/7884). #### DX12 diff --git a/naga/src/back/spv/block.rs b/naga/src/back/spv/block.rs index 0cd414bfbeb..7758d86c414 100644 --- a/naga/src/back/spv/block.rs +++ b/naga/src/back/spv/block.rs @@ -237,7 +237,7 @@ impl Writer { } }; - body.push(Instruction::store(res_member.id, member_value_id, None)); + self.store_io_with_f16_polyfill(body, res_member.id, member_value_id); match res_member.built_in { Some(crate::BuiltIn::Position { .. }) diff --git a/naga/src/back/spv/f16_polyfill.rs b/naga/src/back/spv/f16_polyfill.rs new file mode 100644 index 00000000000..824490265af --- /dev/null +++ b/naga/src/back/spv/f16_polyfill.rs @@ -0,0 +1,104 @@ +/*! +This module provides functionality for polyfilling `f16` input/output variables +when the `StorageInputOutput16` capability is not available or disabled. + +It works by: + +1. Declaring `f16` I/O variables as `f32` in SPIR-V +2. Converting between `f16` and `f32` at runtime using `OpFConvert` +3. Maintaining mappings to track which variables need conversion +*/ + +use crate::back::spv::{Instruction, LocalType, NumericType, Word}; +use alloc::vec::Vec; + +/// Manages `f16` I/O polyfill state and operations. +#[derive(Default)] +pub(in crate::back::spv) struct F16IoPolyfill { + use_native: bool, + io_var_to_f32_type: crate::FastHashMap, +} + +impl F16IoPolyfill { + pub fn new(use_storage_input_output_16: bool) -> Self { + Self { + use_native: use_storage_input_output_16, + io_var_to_f32_type: crate::FastHashMap::default(), + } + } + + pub fn needs_polyfill(&self, ty_inner: &crate::TypeInner) -> bool { + use crate::{ScalarKind as Sk, TypeInner}; + + !self.use_native + && match *ty_inner { + TypeInner::Scalar(ref s) if s.kind == Sk::Float && s.width == 2 => true, + TypeInner::Vector { scalar, .. } + if scalar.kind == Sk::Float && scalar.width == 2 => + { + true + } + _ => false, + } + } + + pub fn register_io_var(&mut self, variable_id: Word, f32_type_id: Word) { + self.io_var_to_f32_type.insert(variable_id, f32_type_id); + } + + pub fn get_f32_io_type(&self, variable_id: Word) -> Option { + self.io_var_to_f32_type.get(&variable_id).copied() + } + + pub fn emit_f16_to_f32_conversion( + f16_value_id: Word, + f32_type_id: Word, + converted_id: Word, + body: &mut Vec, + ) { + body.push(Instruction::unary( + spirv::Op::FConvert, + f32_type_id, + converted_id, + f16_value_id, + )); + } + + pub fn emit_f32_to_f16_conversion( + f32_value_id: Word, + f16_type_id: Word, + converted_id: Word, + body: &mut Vec, + ) { + body.push(Instruction::unary( + spirv::Op::FConvert, + f16_type_id, + converted_id, + f32_value_id, + )); + } + + pub fn create_polyfill_type(ty_inner: &crate::TypeInner) -> Option { + use crate::{ScalarKind as Sk, TypeInner}; + + match *ty_inner { + TypeInner::Scalar(ref s) if s.kind == Sk::Float && s.width == 2 => { + Some(LocalType::Numeric(NumericType::Scalar(crate::Scalar::F32))) + } + TypeInner::Vector { size, scalar } if scalar.kind == Sk::Float && scalar.width == 2 => { + Some(LocalType::Numeric(NumericType::Vector { + size, + scalar: crate::Scalar::F32, + })) + } + _ => None, + } + } +} + +impl crate::back::spv::recyclable::Recyclable for F16IoPolyfill { + fn recycle(mut self) -> Self { + self.io_var_to_f32_type = self.io_var_to_f32_type.recycle(); + self + } +} diff --git a/naga/src/back/spv/mod.rs b/naga/src/back/spv/mod.rs index 21c00015478..ab3abe95515 100644 --- a/naga/src/back/spv/mod.rs +++ b/naga/src/back/spv/mod.rs @@ -5,6 +5,7 @@ Backend for [SPIR-V][spv] (Standard Portable Intermediate Representation). */ mod block; +mod f16_polyfill; mod helpers; mod image; mod index; @@ -745,6 +746,7 @@ pub struct Writer { bounds_check_policies: BoundsCheckPolicies, zero_initialize_workgroup_memory: ZeroInitializeWorkgroupMemoryMode, force_loop_bounding: bool, + use_storage_input_output_16: bool, void_type: Word, //TODO: convert most of these into vectors, addressable by handle indices lookup_type: crate::FastHashMap, @@ -771,6 +773,10 @@ pub struct Writer { ray_get_committed_intersection_function: Option, ray_get_candidate_intersection_function: Option, + + /// F16 I/O polyfill manager for handling `f16` input/output variables + /// when `StorageInputOutput16` capability is not available. + io_f16_polyfills: f16_polyfill::F16IoPolyfill, } bitflags::bitflags! { @@ -853,6 +859,10 @@ pub struct Options<'a> { /// to think the number of iterations is bounded. pub force_loop_bounding: bool, + /// Whether to use the `StorageInputOutput16` capability for `f16` shader I/O. + /// When false, `f16` I/O is polyfilled using `f32` types with conversions. + pub use_storage_input_output_16: bool, + pub debug_info: Option>, } @@ -872,6 +882,7 @@ impl Default for Options<'_> { bounds_check_policies: BoundsCheckPolicies::default(), zero_initialize_workgroup_memory: ZeroInitializeWorkgroupMemoryMode::Polyfill, force_loop_bounding: true, + use_storage_input_output_16: true, debug_info: None, } } diff --git a/naga/src/back/spv/writer.rs b/naga/src/back/spv/writer.rs index 0688eb6c975..48d13f03c25 100644 --- a/naga/src/back/spv/writer.rs +++ b/naga/src/back/spv/writer.rs @@ -78,6 +78,7 @@ impl Writer { bounds_check_policies: options.bounds_check_policies, zero_initialize_workgroup_memory: options.zero_initialize_workgroup_memory, force_loop_bounding: options.force_loop_bounding, + use_storage_input_output_16: options.use_storage_input_output_16, void_type, lookup_type: crate::FastHashMap::default(), lookup_function: crate::FastHashMap::default(), @@ -92,6 +93,9 @@ impl Writer { temp_list: Vec::new(), ray_get_committed_intersection_function: None, ray_get_candidate_intersection_function: None, + io_f16_polyfills: super::f16_polyfill::F16IoPolyfill::new( + options.use_storage_input_output_16, + ), }) } @@ -125,6 +129,7 @@ impl Writer { bounds_check_policies: self.bounds_check_policies, zero_initialize_workgroup_memory: self.zero_initialize_workgroup_memory, force_loop_bounding: self.force_loop_bounding, + use_storage_input_output_16: self.use_storage_input_output_16, capabilities_available: take(&mut self.capabilities_available), binding_map: take(&mut self.binding_map), @@ -151,6 +156,7 @@ impl Writer { temp_list: take(&mut self.temp_list).recycle(), ray_get_candidate_intersection_function: None, ray_get_committed_intersection_function: None, + io_f16_polyfills: take(&mut self.io_f16_polyfills).recycle(), }; *self = fresh; @@ -726,10 +732,11 @@ impl Writer { binding, )?; iface.varying_ids.push(varying_id); - let id = self.id_gen.next(); - prelude - .body - .push(Instruction::load(argument_type_id, id, varying_id, None)); + let id = self.load_io_with_f16_polyfill( + &mut prelude.body, + varying_id, + argument_type_id, + ); if binding == &crate::Binding::BuiltIn(crate::BuiltIn::LocalInvocationId) { local_invocation_id = Some(id); @@ -754,10 +761,8 @@ impl Writer { binding, )?; iface.varying_ids.push(varying_id); - let id = self.id_gen.next(); - prelude - .body - .push(Instruction::load(type_id, id, varying_id, None)); + let id = + self.load_io_with_f16_polyfill(&mut prelude.body, varying_id, type_id); constituent_ids.push(id); if binding == &crate::Binding::BuiltIn(crate::BuiltIn::LocalInvocationId) { @@ -1220,8 +1225,10 @@ impl Writer { .insert(spirv::Capability::StorageBuffer16BitAccess); self.capabilities_used .insert(spirv::Capability::UniformAndStorageBuffer16BitAccess); - self.capabilities_used - .insert(spirv::Capability::StorageInputOutput16); + if self.use_storage_input_output_16 { + self.capabilities_used + .insert(spirv::Capability::StorageInputOutput16); + } } Instruction::type_float(id, bits) } @@ -1905,8 +1912,26 @@ impl Writer { ty: Handle, binding: &crate::Binding, ) -> Result { + use crate::TypeInner; + let id = self.id_gen.next(); - let pointer_type_id = self.get_handle_pointer_type_id(ty, class); + let ty_inner = &ir_module.types[ty].inner; + let needs_polyfill = self.needs_f16_polyfill(ty_inner); + + let pointer_type_id = if needs_polyfill { + let f32_value_local = + super::f16_polyfill::F16IoPolyfill::create_polyfill_type(ty_inner) + .expect("needs_polyfill returned true but create_polyfill_type returned None"); + + let f32_type_id = self.get_localtype_id(f32_value_local); + let ptr_id = self.get_pointer_type_id(f32_type_id, class); + self.io_f16_polyfills.register_io_var(id, f32_type_id); + + ptr_id + } else { + self.get_handle_pointer_type_id(ty, class) + }; + Instruction::variable(pointer_type_id, id, class, None) .to_words(&mut self.logical_layout.declarations); @@ -2089,8 +2114,9 @@ impl Writer { // > shader, must be decorated Flat if class == spirv::StorageClass::Input && stage == crate::ShaderStage::Fragment { let is_flat = match ir_module.types[ty].inner { - crate::TypeInner::Scalar(scalar) - | crate::TypeInner::Vector { scalar, .. } => match scalar.kind { + TypeInner::Scalar(scalar) | TypeInner::Vector { scalar, .. } => match scalar + .kind + { Sk::Uint | Sk::Sint | Sk::Bool => true, Sk::Float => false, Sk::AbstractInt | Sk::AbstractFloat => { @@ -2112,6 +2138,49 @@ impl Writer { Ok(id) } + /// Load an IO variable, converting from `f32` to `f16` if polyfill is active. + /// Returns the id of the loaded value matching `target_type_id`. + pub(super) fn load_io_with_f16_polyfill( + &mut self, + body: &mut Vec, + varying_id: Word, + target_type_id: Word, + ) -> Word { + let tmp = self.id_gen.next(); + if let Some(f32_ty) = self.io_f16_polyfills.get_f32_io_type(varying_id) { + body.push(Instruction::load(f32_ty, tmp, varying_id, None)); + let converted = self.id_gen.next(); + super::f16_polyfill::F16IoPolyfill::emit_f32_to_f16_conversion( + tmp, + target_type_id, + converted, + body, + ); + converted + } else { + body.push(Instruction::load(target_type_id, tmp, varying_id, None)); + tmp + } + } + + /// Store an IO variable, converting from `f16` to `f32` if polyfill is active. + pub(super) fn store_io_with_f16_polyfill( + &mut self, + body: &mut Vec, + varying_id: Word, + value_id: Word, + ) { + if let Some(f32_ty) = self.io_f16_polyfills.get_f32_io_type(varying_id) { + let converted = self.id_gen.next(); + super::f16_polyfill::F16IoPolyfill::emit_f16_to_f32_conversion( + value_id, f32_ty, converted, body, + ); + body.push(Instruction::store(varying_id, converted, None)); + } else { + body.push(Instruction::store(varying_id, value_id, None)); + } + } + fn write_global_variable( &mut self, ir_module: &crate::Module, @@ -2585,6 +2654,10 @@ impl Writer { self.decorate(id, spirv::Decoration::NonUniform, &[]); Ok(()) } + + pub(super) fn needs_f16_polyfill(&self, ty_inner: &crate::TypeInner) -> bool { + self.io_f16_polyfills.needs_polyfill(ty_inner) + } } #[test] diff --git a/naga/tests/in/wgsl/f16-native.toml b/naga/tests/in/wgsl/f16-native.toml new file mode 100644 index 00000000000..529d34f80da --- /dev/null +++ b/naga/tests/in/wgsl/f16-native.toml @@ -0,0 +1,13 @@ +targets = "SPIRV" +god_mode = true + +[spv] +debug = true +version = [1, 1] +use_storage_input_output_16 = true +capabilities = ["Float16"] + +[bounds_check_policies] +index = "ReadZeroSkipWrite" +buffer = "ReadZeroSkipWrite" +image = "ReadZeroSkipWrite" diff --git a/naga/tests/in/wgsl/f16-native.wgsl b/naga/tests/in/wgsl/f16-native.wgsl new file mode 100644 index 00000000000..fda726df765 --- /dev/null +++ b/naga/tests/in/wgsl/f16-native.wgsl @@ -0,0 +1,79 @@ +enable f16; + +@fragment +fn test_direct( + @location(0) scalar_f16: f16, + @location(1) scalar_f32: f32, + @location(2) vec2_f16: vec2, + @location(3) vec2_f32: vec2, + @location(4) vec3_f16: vec3, + @location(5) vec3_f32: vec3, + @location(6) vec4_f16: vec4, + @location(7) vec4_f32: vec4, +) -> F16IO { + var output: F16IO; + output.scalar_f16 = scalar_f16 + 1.0h; + output.scalar_f32 = scalar_f32 + 1.0; + output.vec2_f16 = vec2_f16 + vec2(1.0h); + output.vec2_f32 = vec2_f32 + vec2(1.0); + output.vec3_f16 = vec3_f16 + vec3(1.0h); + output.vec3_f32 = vec3_f32 + vec3(1.0); + output.vec4_f16 = vec4_f16 + vec4(1.0h); + output.vec4_f32 = vec4_f32 + vec4(1.0); + return output; +} + +struct F16IO { + @location(0) scalar_f16: f16, + @location(1) scalar_f32: f32, + @location(2) vec2_f16: vec2, + @location(3) vec2_f32: vec2, + @location(4) vec3_f16: vec3, + @location(5) vec3_f32: vec3, + @location(6) vec4_f16: vec4, + @location(7) vec4_f32: vec4, +} + +@fragment +fn test_struct(input: F16IO) -> F16IO { + var output: F16IO; + output.scalar_f16 = input.scalar_f16 + 1.0h; + output.scalar_f32 = input.scalar_f32 + 1.0; + output.vec2_f16 = input.vec2_f16 + vec2(1.0h); + output.vec2_f32 = input.vec2_f32 + vec2(1.0); + output.vec3_f16 = input.vec3_f16 + vec3(1.0h); + output.vec3_f32 = input.vec3_f32 + vec3(1.0); + output.vec4_f16 = input.vec4_f16 + vec4(1.0h); + output.vec4_f32 = input.vec4_f32 + vec4(1.0); + return output; +} + +@fragment +fn test_copy_input(input_original: F16IO) -> F16IO { + var input = input_original; + var output: F16IO; + output.scalar_f16 = input.scalar_f16 + 1.0h; + output.scalar_f32 = input.scalar_f32 + 1.0; + output.vec2_f16 = input.vec2_f16 + vec2(1.0h); + output.vec2_f32 = input.vec2_f32 + vec2(1.0); + output.vec3_f16 = input.vec3_f16 + vec3(1.0h); + output.vec3_f32 = input.vec3_f32 + vec3(1.0); + output.vec4_f16 = input.vec4_f16 + vec4(1.0h); + output.vec4_f32 = input.vec4_f32 + vec4(1.0); + return output; +} + +@fragment +fn test_return_partial(input_original: F16IO) -> @location(0) f16 { + var input = input_original; + input.scalar_f16 = 0.0h; + return input.scalar_f16; +} + +@fragment +fn test_component_access(input: F16IO) -> F16IO { + var output: F16IO; + output.vec2_f16.x = input.vec2_f16.y; + output.vec2_f16.y = input.vec2_f16.x; + return output; +} \ No newline at end of file diff --git a/naga/tests/in/wgsl/f16-polyfill.toml b/naga/tests/in/wgsl/f16-polyfill.toml new file mode 100644 index 00000000000..96160063e05 --- /dev/null +++ b/naga/tests/in/wgsl/f16-polyfill.toml @@ -0,0 +1,13 @@ +targets = "SPIRV" +god_mode = true + +[spv] +debug = true +version = [1, 1] +use_storage_input_output_16 = false +capabilities = ["Float16"] + +[bounds_check_policies] +index = "ReadZeroSkipWrite" +buffer = "ReadZeroSkipWrite" +image = "ReadZeroSkipWrite" diff --git a/naga/tests/in/wgsl/f16-polyfill.wgsl b/naga/tests/in/wgsl/f16-polyfill.wgsl new file mode 100644 index 00000000000..fda726df765 --- /dev/null +++ b/naga/tests/in/wgsl/f16-polyfill.wgsl @@ -0,0 +1,79 @@ +enable f16; + +@fragment +fn test_direct( + @location(0) scalar_f16: f16, + @location(1) scalar_f32: f32, + @location(2) vec2_f16: vec2, + @location(3) vec2_f32: vec2, + @location(4) vec3_f16: vec3, + @location(5) vec3_f32: vec3, + @location(6) vec4_f16: vec4, + @location(7) vec4_f32: vec4, +) -> F16IO { + var output: F16IO; + output.scalar_f16 = scalar_f16 + 1.0h; + output.scalar_f32 = scalar_f32 + 1.0; + output.vec2_f16 = vec2_f16 + vec2(1.0h); + output.vec2_f32 = vec2_f32 + vec2(1.0); + output.vec3_f16 = vec3_f16 + vec3(1.0h); + output.vec3_f32 = vec3_f32 + vec3(1.0); + output.vec4_f16 = vec4_f16 + vec4(1.0h); + output.vec4_f32 = vec4_f32 + vec4(1.0); + return output; +} + +struct F16IO { + @location(0) scalar_f16: f16, + @location(1) scalar_f32: f32, + @location(2) vec2_f16: vec2, + @location(3) vec2_f32: vec2, + @location(4) vec3_f16: vec3, + @location(5) vec3_f32: vec3, + @location(6) vec4_f16: vec4, + @location(7) vec4_f32: vec4, +} + +@fragment +fn test_struct(input: F16IO) -> F16IO { + var output: F16IO; + output.scalar_f16 = input.scalar_f16 + 1.0h; + output.scalar_f32 = input.scalar_f32 + 1.0; + output.vec2_f16 = input.vec2_f16 + vec2(1.0h); + output.vec2_f32 = input.vec2_f32 + vec2(1.0); + output.vec3_f16 = input.vec3_f16 + vec3(1.0h); + output.vec3_f32 = input.vec3_f32 + vec3(1.0); + output.vec4_f16 = input.vec4_f16 + vec4(1.0h); + output.vec4_f32 = input.vec4_f32 + vec4(1.0); + return output; +} + +@fragment +fn test_copy_input(input_original: F16IO) -> F16IO { + var input = input_original; + var output: F16IO; + output.scalar_f16 = input.scalar_f16 + 1.0h; + output.scalar_f32 = input.scalar_f32 + 1.0; + output.vec2_f16 = input.vec2_f16 + vec2(1.0h); + output.vec2_f32 = input.vec2_f32 + vec2(1.0); + output.vec3_f16 = input.vec3_f16 + vec3(1.0h); + output.vec3_f32 = input.vec3_f32 + vec3(1.0); + output.vec4_f16 = input.vec4_f16 + vec4(1.0h); + output.vec4_f32 = input.vec4_f32 + vec4(1.0); + return output; +} + +@fragment +fn test_return_partial(input_original: F16IO) -> @location(0) f16 { + var input = input_original; + input.scalar_f16 = 0.0h; + return input.scalar_f16; +} + +@fragment +fn test_component_access(input: F16IO) -> F16IO { + var output: F16IO; + output.vec2_f16.x = input.vec2_f16.y; + output.vec2_f16.y = input.vec2_f16.x; + return output; +} \ No newline at end of file diff --git a/naga/tests/naga/snapshots.rs b/naga/tests/naga/snapshots.rs index e2288eee918..f08ca3b0f65 100644 --- a/naga/tests/naga/snapshots.rs +++ b/naga/tests/naga/snapshots.rs @@ -91,7 +91,7 @@ struct SpirvInParameters { adjust_coordinate_space: bool, } -#[derive(Default, serde::Deserialize)] +#[derive(serde::Deserialize)] #[serde(default)] struct SpirvOutParameters { version: SpvOutVersion, @@ -101,11 +101,29 @@ struct SpirvOutParameters { force_point_size: bool, clamp_frag_depth: bool, separate_entry_points: bool, + use_storage_input_output_16: bool, #[cfg(all(feature = "deserialize", spv_out))] #[serde(deserialize_with = "deserialize_binding_map")] binding_map: naga::back::spv::BindingMap, } +impl Default for SpirvOutParameters { + fn default() -> Self { + Self { + version: SpvOutVersion::default(), + capabilities: naga::FastHashSet::default(), + debug: false, + adjust_coordinate_space: false, + force_point_size: false, + clamp_frag_depth: false, + separate_entry_points: false, + use_storage_input_output_16: true, + #[cfg(all(feature = "deserialize", spv_out))] + binding_map: naga::back::spv::BindingMap::default(), + } + } +} + #[derive(Default, serde::Deserialize)] #[serde(default)] struct WgslOutParameters { @@ -617,6 +635,7 @@ fn write_output_spv( binding_map: params.binding_map.clone(), zero_initialize_workgroup_memory: spv::ZeroInitializeWorkgroupMemoryMode::Polyfill, force_loop_bounding: true, + use_storage_input_output_16: params.use_storage_input_output_16, debug_info, }; diff --git a/naga/tests/naga/spirv_capabilities.rs b/naga/tests/naga/spirv_capabilities.rs index 2d46e37f72d..6d0e8153b81 100644 --- a/naga/tests/naga/spirv_capabilities.rs +++ b/naga/tests/naga/spirv_capabilities.rs @@ -6,6 +6,9 @@ Test SPIR-V backend capability checks. use spirv::Capability as Ca; +#[cfg(spv_out)] +use rspirv::binary::Disassemble; + fn capabilities_used(source: &str) -> naga::FastIndexSet { use naga::back::spv; use naga::valid; @@ -213,3 +216,135 @@ fn int64() { fn float16() { require(&[Ca::Float16], "enable f16; fn f(x: f16) { }"); } + +#[test] +fn f16_io_capabilities() { + let source = r#" + enable f16; + + struct VertexOutput { + @location(0) color: vec3, + } + + @fragment + fn main(input: VertexOutput) -> @location(0) vec4 { + return vec4(input.color, f16(1.0)); + } + "#; + + use naga::back::spv; + use naga::valid; + + let module = naga::front::wgsl::parse_str(source).unwrap(); + let info = valid::Validator::new(valid::ValidationFlags::all(), valid::Capabilities::all()) + .validate(&module) + .unwrap(); + + // Test native path: use_storage_input_output_16 = true + let options_native = spv::Options { + use_storage_input_output_16: true, + ..Default::default() + }; + + let mut words_native = vec![]; + let mut writer_native = spv::Writer::new(&options_native).unwrap(); + writer_native + .write(&module, &info, None, &None, &mut words_native) + .unwrap(); + let caps_native = writer_native.get_capabilities_used(); + + // Should include `StorageInputOutput16` for native `f16` I/O + assert!(caps_native.contains(&Ca::StorageInputOutput16)); + + // Test polyfill path: use_storage_input_output_16 = false + let options_polyfill = spv::Options { + use_storage_input_output_16: false, + ..Default::default() + }; + + let mut words_polyfill = vec![]; + let mut writer_polyfill = spv::Writer::new(&options_polyfill).unwrap(); + writer_polyfill + .write(&module, &info, None, &None, &mut words_polyfill) + .unwrap(); + let caps_polyfill = writer_polyfill.get_capabilities_used(); + + // Should not include `StorageInputOutput16` when polyfilled + assert!(!caps_polyfill.contains(&Ca::StorageInputOutput16)); + + // But should still include the basic `f16` capabilities + assert!(caps_polyfill.contains(&Ca::Float16)); +} + +#[cfg(spv_out)] +#[test] +fn f16_io_polyfill_codegen() { + let source = r#" + enable f16; + + struct F16IO { + @location(0) scalar_f16: f16, + @location(1) scalar_f32: f32, + @location(2) vec2_f16: vec2, + @location(3) vec2_f32: vec2, + } + + @fragment + fn main(input: F16IO) -> F16IO { + var output = input; + output.scalar_f16 = input.scalar_f16 + 1.0h; + output.vec2_f16.x = input.vec2_f16.y; + return output; + } + "#; + + use naga::{back::spv, valid}; + + let module = naga::front::wgsl::parse_str(source).unwrap(); + let info = valid::Validator::new(valid::ValidationFlags::all(), valid::Capabilities::all()) + .validate(&module) + .unwrap(); + + // Test Native Path + let options_native = spv::Options { + use_storage_input_output_16: true, + ..Default::default() + }; + let mut words_native = vec![]; + let mut writer_native = spv::Writer::new(&options_native).unwrap(); + writer_native + .write(&module, &info, None, &None, &mut words_native) + .unwrap(); + let caps_native = writer_native.get_capabilities_used(); + let dis_native = rspirv::dr::load_words(words_native).unwrap().disassemble(); + + // Native path must request the capability and must NOT have conversions. + assert!(caps_native.contains(&Ca::StorageInputOutput16)); + assert!(!dis_native.contains("OpFConvert")); + + // Test Polyfill Path + let options_polyfill = spv::Options { + use_storage_input_output_16: false, + ..Default::default() + }; + let mut words_polyfill = vec![]; + let mut writer_polyfill = spv::Writer::new(&options_polyfill).unwrap(); + writer_polyfill + .write(&module, &info, None, &None, &mut words_polyfill) + .unwrap(); + let caps_polyfill = writer_polyfill.get_capabilities_used(); + let dis_polyfill = rspirv::dr::load_words(words_polyfill) + .unwrap() + .disassemble(); + + // Polyfill path should request the capability but not have conversions. + assert!(!caps_polyfill.contains(&Ca::StorageInputOutput16)); + assert!(dis_polyfill.contains("OpFConvert")); + + // Should have 2 input conversions, and 2 output conversions + let fconvert_count = dis_polyfill.matches("OpFConvert").count(); + assert_eq!( + fconvert_count, 4, + "Expected 4 OpFConvert instructions for polyfilled I/O" + ); +} diff --git a/naga/tests/out/spv/wgsl-f16-native.spvasm b/naga/tests/out/spv/wgsl-f16-native.spvasm new file mode 100644 index 00000000000..43210270933 --- /dev/null +++ b/naga/tests/out/spv/wgsl-f16-native.spvasm @@ -0,0 +1,761 @@ +; SPIR-V +; Version: 1.1 +; Generator: rspirv +; Bound: 318 +OpCapability Shader +OpCapability Float16 +OpCapability StorageBuffer16BitAccess +OpCapability UniformAndStorageBuffer16BitAccess +OpCapability StorageInputOutput16 +OpExtension "SPV_KHR_16bit_storage" +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %54 "test_direct" %14 %17 %20 %23 %26 %29 %32 %35 %38 %40 %42 %44 %46 %48 %50 %52 +OpEntryPoint Fragment %136 "test_struct" %112 %114 %116 %118 %120 %122 %124 %126 %128 %129 %130 %131 %132 %133 %134 %135 +OpEntryPoint Fragment %199 "test_copy_input" %175 %177 %179 %181 %183 %185 %187 %189 %191 %192 %193 %194 %195 %196 %197 %198 +OpEntryPoint Fragment %265 "test_return_partial" %248 %250 %252 %254 %256 %258 %260 %262 %264 +OpEntryPoint Fragment %299 "test_component_access" %275 %277 %279 %281 %283 %285 %287 %289 %291 %292 %293 %294 %295 %296 %297 %298 +OpExecutionMode %54 OriginUpperLeft +OpExecutionMode %136 OriginUpperLeft +OpExecutionMode %199 OriginUpperLeft +OpExecutionMode %265 OriginUpperLeft +OpExecutionMode %299 OriginUpperLeft +%3 = OpString "f16-native.wgsl" +OpSource Unknown 0 %3 "enable f16; + +@fragment +fn test_direct( + @location(0) scalar_f16: f16, + @location(1) scalar_f32: f32, + @location(2) vec2_f16: vec2, + @location(3) vec2_f32: vec2, + @location(4) vec3_f16: vec3, + @location(5) vec3_f32: vec3, + @location(6) vec4_f16: vec4, + @location(7) vec4_f32: vec4, +) -> F16IO { + var output: F16IO; + output.scalar_f16 = scalar_f16 + 1.0h; + output.scalar_f32 = scalar_f32 + 1.0; + output.vec2_f16 = vec2_f16 + vec2(1.0h); + output.vec2_f32 = vec2_f32 + vec2(1.0); + output.vec3_f16 = vec3_f16 + vec3(1.0h); + output.vec3_f32 = vec3_f32 + vec3(1.0); + output.vec4_f16 = vec4_f16 + vec4(1.0h); + output.vec4_f32 = vec4_f32 + vec4(1.0); + return output; +} + +struct F16IO { + @location(0) scalar_f16: f16, + @location(1) scalar_f32: f32, + @location(2) vec2_f16: vec2, + @location(3) vec2_f32: vec2, + @location(4) vec3_f16: vec3, + @location(5) vec3_f32: vec3, + @location(6) vec4_f16: vec4, + @location(7) vec4_f32: vec4, +} + +@fragment +fn test_struct(input: F16IO) -> F16IO { + var output: F16IO; + output.scalar_f16 = input.scalar_f16 + 1.0h; + output.scalar_f32 = input.scalar_f32 + 1.0; + output.vec2_f16 = input.vec2_f16 + vec2(1.0h); + output.vec2_f32 = input.vec2_f32 + vec2(1.0); + output.vec3_f16 = input.vec3_f16 + vec3(1.0h); + output.vec3_f32 = input.vec3_f32 + vec3(1.0); + output.vec4_f16 = input.vec4_f16 + vec4(1.0h); + output.vec4_f32 = input.vec4_f32 + vec4(1.0); + return output; +} + +@fragment +fn test_copy_input(input_original: F16IO) -> F16IO { + var input = input_original; + var output: F16IO; + output.scalar_f16 = input.scalar_f16 + 1.0h; + output.scalar_f32 = input.scalar_f32 + 1.0; + output.vec2_f16 = input.vec2_f16 + vec2(1.0h); + output.vec2_f32 = input.vec2_f32 + vec2(1.0); + output.vec3_f16 = input.vec3_f16 + vec3(1.0h); + output.vec3_f32 = input.vec3_f32 + vec3(1.0); + output.vec4_f16 = input.vec4_f16 + vec4(1.0h); + output.vec4_f32 = input.vec4_f32 + vec4(1.0); + return output; +} + +@fragment +fn test_return_partial(input_original: F16IO) -> @location(0) f16 { + var input = input_original; + input.scalar_f16 = 0.0h; + return input.scalar_f16; +} + +@fragment +fn test_component_access(input: F16IO) -> F16IO { + var output: F16IO; + output.vec2_f16.x = input.vec2_f16.y; + output.vec2_f16.y = input.vec2_f16.x; + return output; +}" +OpMemberName %12 0 "scalar_f16" +OpMemberName %12 1 "scalar_f32" +OpMemberName %12 2 "vec2_f16" +OpMemberName %12 3 "vec2_f32" +OpMemberName %12 4 "vec3_f16" +OpMemberName %12 5 "vec3_f32" +OpMemberName %12 6 "vec4_f16" +OpMemberName %12 7 "vec4_f32" +OpName %12 "F16IO" +OpName %14 "scalar_f16" +OpName %17 "scalar_f32" +OpName %20 "vec2_f16" +OpName %23 "vec2_f32" +OpName %26 "vec3_f16" +OpName %29 "vec3_f32" +OpName %32 "vec4_f16" +OpName %35 "vec4_f32" +OpName %38 "scalar_f16" +OpName %40 "scalar_f32" +OpName %42 "vec2_f16" +OpName %44 "vec2_f32" +OpName %46 "vec3_f16" +OpName %48 "vec3_f32" +OpName %50 "vec4_f16" +OpName %52 "vec4_f32" +OpName %54 "test_direct" +OpName %64 "output" +OpName %112 "scalar_f16" +OpName %114 "scalar_f32" +OpName %116 "vec2_f16" +OpName %118 "vec2_f32" +OpName %120 "vec3_f16" +OpName %122 "vec3_f32" +OpName %124 "vec4_f16" +OpName %126 "vec4_f32" +OpName %128 "scalar_f16" +OpName %129 "scalar_f32" +OpName %130 "vec2_f16" +OpName %131 "vec2_f32" +OpName %132 "vec3_f16" +OpName %133 "vec3_f32" +OpName %134 "vec4_f16" +OpName %135 "vec4_f32" +OpName %136 "test_struct" +OpName %137 "output" +OpName %175 "scalar_f16" +OpName %177 "scalar_f32" +OpName %179 "vec2_f16" +OpName %181 "vec2_f32" +OpName %183 "vec3_f16" +OpName %185 "vec3_f32" +OpName %187 "vec4_f16" +OpName %189 "vec4_f32" +OpName %191 "scalar_f16" +OpName %192 "scalar_f32" +OpName %193 "vec2_f16" +OpName %194 "vec2_f32" +OpName %195 "vec3_f16" +OpName %196 "vec3_f32" +OpName %197 "vec4_f16" +OpName %198 "vec4_f32" +OpName %199 "test_copy_input" +OpName %200 "input" +OpName %202 "output" +OpName %248 "scalar_f16" +OpName %250 "scalar_f32" +OpName %252 "vec2_f16" +OpName %254 "vec2_f32" +OpName %256 "vec3_f16" +OpName %258 "vec3_f32" +OpName %260 "vec4_f16" +OpName %262 "vec4_f32" +OpName %265 "test_return_partial" +OpName %267 "input" +OpName %275 "scalar_f16" +OpName %277 "scalar_f32" +OpName %279 "vec2_f16" +OpName %281 "vec2_f32" +OpName %283 "vec3_f16" +OpName %285 "vec3_f32" +OpName %287 "vec4_f16" +OpName %289 "vec4_f32" +OpName %291 "scalar_f16" +OpName %292 "scalar_f32" +OpName %293 "vec2_f16" +OpName %294 "vec2_f32" +OpName %295 "vec3_f16" +OpName %296 "vec3_f32" +OpName %297 "vec4_f16" +OpName %298 "vec4_f32" +OpName %299 "test_component_access" +OpName %300 "output" +OpMemberDecorate %12 0 Offset 0 +OpMemberDecorate %12 1 Offset 4 +OpMemberDecorate %12 2 Offset 8 +OpMemberDecorate %12 3 Offset 16 +OpMemberDecorate %12 4 Offset 24 +OpMemberDecorate %12 5 Offset 32 +OpMemberDecorate %12 6 Offset 48 +OpMemberDecorate %12 7 Offset 64 +OpDecorate %14 Location 0 +OpDecorate %17 Location 1 +OpDecorate %20 Location 2 +OpDecorate %23 Location 3 +OpDecorate %26 Location 4 +OpDecorate %29 Location 5 +OpDecorate %32 Location 6 +OpDecorate %35 Location 7 +OpDecorate %38 Location 0 +OpDecorate %40 Location 1 +OpDecorate %42 Location 2 +OpDecorate %44 Location 3 +OpDecorate %46 Location 4 +OpDecorate %48 Location 5 +OpDecorate %50 Location 6 +OpDecorate %52 Location 7 +OpDecorate %112 Location 0 +OpDecorate %114 Location 1 +OpDecorate %116 Location 2 +OpDecorate %118 Location 3 +OpDecorate %120 Location 4 +OpDecorate %122 Location 5 +OpDecorate %124 Location 6 +OpDecorate %126 Location 7 +OpDecorate %128 Location 0 +OpDecorate %129 Location 1 +OpDecorate %130 Location 2 +OpDecorate %131 Location 3 +OpDecorate %132 Location 4 +OpDecorate %133 Location 5 +OpDecorate %134 Location 6 +OpDecorate %135 Location 7 +OpDecorate %175 Location 0 +OpDecorate %177 Location 1 +OpDecorate %179 Location 2 +OpDecorate %181 Location 3 +OpDecorate %183 Location 4 +OpDecorate %185 Location 5 +OpDecorate %187 Location 6 +OpDecorate %189 Location 7 +OpDecorate %191 Location 0 +OpDecorate %192 Location 1 +OpDecorate %193 Location 2 +OpDecorate %194 Location 3 +OpDecorate %195 Location 4 +OpDecorate %196 Location 5 +OpDecorate %197 Location 6 +OpDecorate %198 Location 7 +OpDecorate %248 Location 0 +OpDecorate %250 Location 1 +OpDecorate %252 Location 2 +OpDecorate %254 Location 3 +OpDecorate %256 Location 4 +OpDecorate %258 Location 5 +OpDecorate %260 Location 6 +OpDecorate %262 Location 7 +OpDecorate %264 Location 0 +OpDecorate %275 Location 0 +OpDecorate %277 Location 1 +OpDecorate %279 Location 2 +OpDecorate %281 Location 3 +OpDecorate %283 Location 4 +OpDecorate %285 Location 5 +OpDecorate %287 Location 6 +OpDecorate %289 Location 7 +OpDecorate %291 Location 0 +OpDecorate %292 Location 1 +OpDecorate %293 Location 2 +OpDecorate %294 Location 3 +OpDecorate %295 Location 4 +OpDecorate %296 Location 5 +OpDecorate %297 Location 6 +OpDecorate %298 Location 7 +%2 = OpTypeVoid +%4 = OpTypeFloat 16 +%5 = OpTypeFloat 32 +%6 = OpTypeVector %4 2 +%7 = OpTypeVector %5 2 +%8 = OpTypeVector %4 3 +%9 = OpTypeVector %5 3 +%10 = OpTypeVector %4 4 +%11 = OpTypeVector %5 4 +%12 = OpTypeStruct %4 %5 %6 %7 %8 %9 %10 %11 +%15 = OpTypePointer Input %4 +%14 = OpVariable %15 Input +%18 = OpTypePointer Input %5 +%17 = OpVariable %18 Input +%21 = OpTypePointer Input %6 +%20 = OpVariable %21 Input +%24 = OpTypePointer Input %7 +%23 = OpVariable %24 Input +%27 = OpTypePointer Input %8 +%26 = OpVariable %27 Input +%30 = OpTypePointer Input %9 +%29 = OpVariable %30 Input +%33 = OpTypePointer Input %10 +%32 = OpVariable %33 Input +%36 = OpTypePointer Input %11 +%35 = OpVariable %36 Input +%39 = OpTypePointer Output %4 +%38 = OpVariable %39 Output +%41 = OpTypePointer Output %5 +%40 = OpVariable %41 Output +%43 = OpTypePointer Output %6 +%42 = OpVariable %43 Output +%45 = OpTypePointer Output %7 +%44 = OpVariable %45 Output +%47 = OpTypePointer Output %8 +%46 = OpVariable %47 Output +%49 = OpTypePointer Output %9 +%48 = OpVariable %49 Output +%51 = OpTypePointer Output %10 +%50 = OpVariable %51 Output +%53 = OpTypePointer Output %11 +%52 = OpVariable %53 Output +%55 = OpTypeFunction %2 +%56 = OpConstant %4 0.000000000000000000000000000000000000000021524 +%57 = OpConstant %5 1 +%58 = OpConstantComposite %6 %56 %56 +%59 = OpConstantComposite %7 %57 %57 +%60 = OpConstantComposite %8 %56 %56 %56 +%61 = OpConstantComposite %9 %57 %57 %57 +%62 = OpConstantComposite %10 %56 %56 %56 %56 +%63 = OpConstantComposite %11 %57 %57 %57 %57 +%65 = OpTypePointer Function %12 +%66 = OpConstantNull %12 +%68 = OpTypePointer Function %4 +%71 = OpTypeInt 32 0 +%70 = OpConstant %71 0 +%73 = OpTypePointer Function %5 +%75 = OpConstant %71 1 +%77 = OpTypePointer Function %6 +%79 = OpConstant %71 2 +%81 = OpTypePointer Function %7 +%83 = OpConstant %71 3 +%85 = OpTypePointer Function %8 +%87 = OpConstant %71 4 +%89 = OpTypePointer Function %9 +%91 = OpConstant %71 5 +%93 = OpTypePointer Function %10 +%95 = OpConstant %71 6 +%97 = OpTypePointer Function %11 +%99 = OpConstant %71 7 +%112 = OpVariable %15 Input +%114 = OpVariable %18 Input +%116 = OpVariable %21 Input +%118 = OpVariable %24 Input +%120 = OpVariable %27 Input +%122 = OpVariable %30 Input +%124 = OpVariable %33 Input +%126 = OpVariable %36 Input +%128 = OpVariable %39 Output +%129 = OpVariable %41 Output +%130 = OpVariable %43 Output +%131 = OpVariable %45 Output +%132 = OpVariable %47 Output +%133 = OpVariable %49 Output +%134 = OpVariable %51 Output +%135 = OpVariable %53 Output +%138 = OpConstantNull %12 +%175 = OpVariable %15 Input +%177 = OpVariable %18 Input +%179 = OpVariable %21 Input +%181 = OpVariable %24 Input +%183 = OpVariable %27 Input +%185 = OpVariable %30 Input +%187 = OpVariable %33 Input +%189 = OpVariable %36 Input +%191 = OpVariable %39 Output +%192 = OpVariable %41 Output +%193 = OpVariable %43 Output +%194 = OpVariable %45 Output +%195 = OpVariable %47 Output +%196 = OpVariable %49 Output +%197 = OpVariable %51 Output +%198 = OpVariable %53 Output +%201 = OpConstantNull %12 +%203 = OpConstantNull %12 +%248 = OpVariable %15 Input +%250 = OpVariable %18 Input +%252 = OpVariable %21 Input +%254 = OpVariable %24 Input +%256 = OpVariable %27 Input +%258 = OpVariable %30 Input +%260 = OpVariable %33 Input +%262 = OpVariable %36 Input +%264 = OpVariable %39 Output +%266 = OpConstant %4 0 +%268 = OpConstantNull %12 +%275 = OpVariable %15 Input +%277 = OpVariable %18 Input +%279 = OpVariable %21 Input +%281 = OpVariable %24 Input +%283 = OpVariable %27 Input +%285 = OpVariable %30 Input +%287 = OpVariable %33 Input +%289 = OpVariable %36 Input +%291 = OpVariable %39 Output +%292 = OpVariable %41 Output +%293 = OpVariable %43 Output +%294 = OpVariable %45 Output +%295 = OpVariable %47 Output +%296 = OpVariable %49 Output +%297 = OpVariable %51 Output +%298 = OpVariable %53 Output +%301 = OpConstantNull %12 +%54 = OpFunction %2 None %55 +%13 = OpLabel +%64 = OpVariable %65 Function %66 +%16 = OpLoad %4 %14 +%19 = OpLoad %5 %17 +%22 = OpLoad %6 %20 +%25 = OpLoad %7 %23 +%28 = OpLoad %8 %26 +%31 = OpLoad %9 %29 +%34 = OpLoad %10 %32 +%37 = OpLoad %11 %35 +OpBranch %67 +%67 = OpLabel +OpLine %3 15 5 +OpLine %3 15 25 +%69 = OpFAdd %4 %16 %56 +OpLine %3 15 5 +%72 = OpAccessChain %68 %64 %70 +OpStore %72 %69 +OpLine %3 16 5 +OpLine %3 16 25 +%74 = OpFAdd %5 %19 %57 +OpLine %3 16 5 +%76 = OpAccessChain %73 %64 %75 +OpStore %76 %74 +OpLine %3 17 5 +OpLine %3 17 23 +%78 = OpFAdd %6 %22 %58 +OpLine %3 17 5 +%80 = OpAccessChain %77 %64 %79 +OpStore %80 %78 +OpLine %3 18 5 +OpLine %3 18 34 +OpLine %3 18 23 +%82 = OpFAdd %7 %25 %59 +OpLine %3 18 5 +%84 = OpAccessChain %81 %64 %83 +OpStore %84 %82 +OpLine %3 19 5 +OpLine %3 19 23 +%86 = OpFAdd %8 %28 %60 +OpLine %3 19 5 +%88 = OpAccessChain %85 %64 %87 +OpStore %88 %86 +OpLine %3 20 5 +OpLine %3 20 34 +OpLine %3 20 23 +%90 = OpFAdd %9 %31 %61 +OpLine %3 20 5 +%92 = OpAccessChain %89 %64 %91 +OpStore %92 %90 +OpLine %3 21 5 +OpLine %3 21 23 +%94 = OpFAdd %10 %34 %62 +OpLine %3 21 5 +%96 = OpAccessChain %93 %64 %95 +OpStore %96 %94 +OpLine %3 22 5 +OpLine %3 22 34 +OpLine %3 22 23 +%98 = OpFAdd %11 %37 %63 +OpLine %3 22 5 +%100 = OpAccessChain %97 %64 %99 +OpStore %100 %98 +OpLine %3 1 1 +%101 = OpLoad %12 %64 +%102 = OpCompositeExtract %4 %101 0 +OpStore %38 %102 +%103 = OpCompositeExtract %5 %101 1 +OpStore %40 %103 +%104 = OpCompositeExtract %6 %101 2 +OpStore %42 %104 +%105 = OpCompositeExtract %7 %101 3 +OpStore %44 %105 +%106 = OpCompositeExtract %8 %101 4 +OpStore %46 %106 +%107 = OpCompositeExtract %9 %101 5 +OpStore %48 %107 +%108 = OpCompositeExtract %10 %101 6 +OpStore %50 %108 +%109 = OpCompositeExtract %11 %101 7 +OpStore %52 %109 +OpReturn +OpFunctionEnd +%136 = OpFunction %2 None %55 +%110 = OpLabel +%137 = OpVariable %65 Function %138 +%113 = OpLoad %4 %112 +%115 = OpLoad %5 %114 +%117 = OpLoad %6 %116 +%119 = OpLoad %7 %118 +%121 = OpLoad %8 %120 +%123 = OpLoad %9 %122 +%125 = OpLoad %10 %124 +%127 = OpLoad %11 %126 +%111 = OpCompositeConstruct %12 %113 %115 %117 %119 %121 %123 %125 %127 +OpBranch %139 +%139 = OpLabel +OpLine %3 40 5 +%140 = OpCompositeExtract %4 %111 0 +OpLine %3 40 25 +%141 = OpFAdd %4 %140 %56 +OpLine %3 40 5 +%142 = OpAccessChain %68 %137 %70 +OpStore %142 %141 +OpLine %3 41 5 +%143 = OpCompositeExtract %5 %111 1 +OpLine %3 41 25 +%144 = OpFAdd %5 %143 %57 +OpLine %3 41 5 +%145 = OpAccessChain %73 %137 %75 +OpStore %145 %144 +OpLine %3 42 5 +%146 = OpCompositeExtract %6 %111 2 +OpLine %3 42 23 +%147 = OpFAdd %6 %146 %58 +OpLine %3 42 5 +%148 = OpAccessChain %77 %137 %79 +OpStore %148 %147 +OpLine %3 43 5 +%149 = OpCompositeExtract %7 %111 3 +OpLine %3 43 40 +OpLine %3 43 23 +%150 = OpFAdd %7 %149 %59 +OpLine %3 43 5 +%151 = OpAccessChain %81 %137 %83 +OpStore %151 %150 +OpLine %3 44 5 +%152 = OpCompositeExtract %8 %111 4 +OpLine %3 44 23 +%153 = OpFAdd %8 %152 %60 +OpLine %3 44 5 +%154 = OpAccessChain %85 %137 %87 +OpStore %154 %153 +OpLine %3 45 5 +%155 = OpCompositeExtract %9 %111 5 +OpLine %3 45 40 +OpLine %3 45 23 +%156 = OpFAdd %9 %155 %61 +OpLine %3 45 5 +%157 = OpAccessChain %89 %137 %91 +OpStore %157 %156 +OpLine %3 46 5 +%158 = OpCompositeExtract %10 %111 6 +OpLine %3 46 23 +%159 = OpFAdd %10 %158 %62 +OpLine %3 46 5 +%160 = OpAccessChain %93 %137 %95 +OpStore %160 %159 +OpLine %3 47 5 +%161 = OpCompositeExtract %11 %111 7 +OpLine %3 47 40 +OpLine %3 47 23 +%162 = OpFAdd %11 %161 %63 +OpLine %3 47 5 +%163 = OpAccessChain %97 %137 %99 +OpStore %163 %162 +OpLine %3 1 1 +%164 = OpLoad %12 %137 +%165 = OpCompositeExtract %4 %164 0 +OpStore %128 %165 +%166 = OpCompositeExtract %5 %164 1 +OpStore %129 %166 +%167 = OpCompositeExtract %6 %164 2 +OpStore %130 %167 +%168 = OpCompositeExtract %7 %164 3 +OpStore %131 %168 +%169 = OpCompositeExtract %8 %164 4 +OpStore %132 %169 +%170 = OpCompositeExtract %9 %164 5 +OpStore %133 %170 +%171 = OpCompositeExtract %10 %164 6 +OpStore %134 %171 +%172 = OpCompositeExtract %11 %164 7 +OpStore %135 %172 +OpReturn +OpFunctionEnd +%199 = OpFunction %2 None %55 +%173 = OpLabel +%200 = OpVariable %65 Function %201 +%202 = OpVariable %65 Function %203 +%176 = OpLoad %4 %175 +%178 = OpLoad %5 %177 +%180 = OpLoad %6 %179 +%182 = OpLoad %7 %181 +%184 = OpLoad %8 %183 +%186 = OpLoad %9 %185 +%188 = OpLoad %10 %187 +%190 = OpLoad %11 %189 +%174 = OpCompositeConstruct %12 %176 %178 %180 %182 %184 %186 %188 %190 +OpBranch %204 +%204 = OpLabel +OpLine %3 53 5 +OpStore %200 %174 +OpLine %3 55 5 +%205 = OpAccessChain %68 %200 %70 +%206 = OpLoad %4 %205 +OpLine %3 55 25 +%207 = OpFAdd %4 %206 %56 +OpLine %3 55 5 +%208 = OpAccessChain %68 %202 %70 +OpStore %208 %207 +OpLine %3 56 5 +%209 = OpAccessChain %73 %200 %75 +%210 = OpLoad %5 %209 +OpLine %3 56 25 +%211 = OpFAdd %5 %210 %57 +OpLine %3 56 5 +%212 = OpAccessChain %73 %202 %75 +OpStore %212 %211 +OpLine %3 57 5 +%213 = OpAccessChain %77 %200 %79 +%214 = OpLoad %6 %213 +OpLine %3 57 23 +%215 = OpFAdd %6 %214 %58 +OpLine %3 57 5 +%216 = OpAccessChain %77 %202 %79 +OpStore %216 %215 +OpLine %3 58 5 +%217 = OpAccessChain %81 %200 %83 +%218 = OpLoad %7 %217 +OpLine %3 58 40 +OpLine %3 58 23 +%219 = OpFAdd %7 %218 %59 +OpLine %3 58 5 +%220 = OpAccessChain %81 %202 %83 +OpStore %220 %219 +OpLine %3 59 5 +%221 = OpAccessChain %85 %200 %87 +%222 = OpLoad %8 %221 +OpLine %3 59 23 +%223 = OpFAdd %8 %222 %60 +OpLine %3 59 5 +%224 = OpAccessChain %85 %202 %87 +OpStore %224 %223 +OpLine %3 60 5 +%225 = OpAccessChain %89 %200 %91 +%226 = OpLoad %9 %225 +OpLine %3 60 40 +OpLine %3 60 23 +%227 = OpFAdd %9 %226 %61 +OpLine %3 60 5 +%228 = OpAccessChain %89 %202 %91 +OpStore %228 %227 +OpLine %3 61 5 +%229 = OpAccessChain %93 %200 %95 +%230 = OpLoad %10 %229 +OpLine %3 61 23 +%231 = OpFAdd %10 %230 %62 +OpLine %3 61 5 +%232 = OpAccessChain %93 %202 %95 +OpStore %232 %231 +OpLine %3 62 5 +%233 = OpAccessChain %97 %200 %99 +%234 = OpLoad %11 %233 +OpLine %3 62 40 +OpLine %3 62 23 +%235 = OpFAdd %11 %234 %63 +OpLine %3 62 5 +%236 = OpAccessChain %97 %202 %99 +OpStore %236 %235 +OpLine %3 1 1 +%237 = OpLoad %12 %202 +%238 = OpCompositeExtract %4 %237 0 +OpStore %191 %238 +%239 = OpCompositeExtract %5 %237 1 +OpStore %192 %239 +%240 = OpCompositeExtract %6 %237 2 +OpStore %193 %240 +%241 = OpCompositeExtract %7 %237 3 +OpStore %194 %241 +%242 = OpCompositeExtract %8 %237 4 +OpStore %195 %242 +%243 = OpCompositeExtract %9 %237 5 +OpStore %196 %243 +%244 = OpCompositeExtract %10 %237 6 +OpStore %197 %244 +%245 = OpCompositeExtract %11 %237 7 +OpStore %198 %245 +OpReturn +OpFunctionEnd +%265 = OpFunction %2 None %55 +%246 = OpLabel +%267 = OpVariable %65 Function %268 +%249 = OpLoad %4 %248 +%251 = OpLoad %5 %250 +%253 = OpLoad %6 %252 +%255 = OpLoad %7 %254 +%257 = OpLoad %8 %256 +%259 = OpLoad %9 %258 +%261 = OpLoad %10 %260 +%263 = OpLoad %11 %262 +%247 = OpCompositeConstruct %12 %249 %251 %253 %255 %257 %259 %261 %263 +OpBranch %269 +%269 = OpLabel +OpLine %3 68 5 +OpStore %267 %247 +OpLine %3 69 5 +OpLine %3 69 5 +%270 = OpAccessChain %68 %267 %70 +OpStore %270 %266 +OpLine %3 70 12 +%271 = OpAccessChain %68 %267 %70 +%272 = OpLoad %4 %271 +OpStore %264 %272 +OpReturn +OpFunctionEnd +%299 = OpFunction %2 None %55 +%273 = OpLabel +%300 = OpVariable %65 Function %301 +%276 = OpLoad %4 %275 +%278 = OpLoad %5 %277 +%280 = OpLoad %6 %279 +%282 = OpLoad %7 %281 +%284 = OpLoad %8 %283 +%286 = OpLoad %9 %285 +%288 = OpLoad %10 %287 +%290 = OpLoad %11 %289 +%274 = OpCompositeConstruct %12 %276 %278 %280 %282 %284 %286 %288 %290 +OpBranch %302 +%302 = OpLabel +OpLine %3 76 5 +%303 = OpCompositeExtract %6 %274 2 +%304 = OpCompositeExtract %4 %303 1 +OpLine %3 76 5 +%305 = OpAccessChain %68 %300 %79 %70 +OpStore %305 %304 +OpLine %3 77 5 +%306 = OpCompositeExtract %6 %274 2 +%307 = OpCompositeExtract %4 %306 0 +OpLine %3 77 5 +%308 = OpAccessChain %68 %300 %79 %75 +OpStore %308 %307 +OpLine %3 1 1 +%309 = OpLoad %12 %300 +%310 = OpCompositeExtract %4 %309 0 +OpStore %291 %310 +%311 = OpCompositeExtract %5 %309 1 +OpStore %292 %311 +%312 = OpCompositeExtract %6 %309 2 +OpStore %293 %312 +%313 = OpCompositeExtract %7 %309 3 +OpStore %294 %313 +%314 = OpCompositeExtract %8 %309 4 +OpStore %295 %314 +%315 = OpCompositeExtract %9 %309 5 +OpStore %296 %315 +%316 = OpCompositeExtract %10 %309 6 +OpStore %297 %316 +%317 = OpCompositeExtract %11 %309 7 +OpStore %298 %317 +OpReturn +OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/spv/wgsl-f16-polyfill.spvasm b/naga/tests/out/spv/wgsl-f16-polyfill.spvasm new file mode 100644 index 00000000000..d673816a486 --- /dev/null +++ b/naga/tests/out/spv/wgsl-f16-polyfill.spvasm @@ -0,0 +1,789 @@ +; SPIR-V +; Version: 1.1 +; Generator: rspirv +; Bound: 347 +OpCapability Shader +OpCapability Float16 +OpCapability StorageBuffer16BitAccess +OpCapability UniformAndStorageBuffer16BitAccess +OpExtension "SPV_KHR_16bit_storage" +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %50 "test_direct" %14 %18 %20 %24 %26 %30 %32 %36 %38 %40 %41 %43 %44 %46 %47 %49 +OpEntryPoint Fragment %140 "test_struct" %112 %115 %117 %120 %122 %125 %127 %130 %132 %133 %134 %135 %136 %137 %138 %139 +OpEntryPoint Fragment %211 "test_copy_input" %183 %186 %188 %191 %193 %196 %198 %201 %203 %204 %205 %206 %207 %208 %209 %210 +OpEntryPoint Fragment %285 "test_return_partial" %264 %267 %269 %272 %274 %277 %279 %282 %284 +OpEntryPoint Fragment %324 "test_component_access" %296 %299 %301 %304 %306 %309 %311 %314 %316 %317 %318 %319 %320 %321 %322 %323 +OpExecutionMode %50 OriginUpperLeft +OpExecutionMode %140 OriginUpperLeft +OpExecutionMode %211 OriginUpperLeft +OpExecutionMode %285 OriginUpperLeft +OpExecutionMode %324 OriginUpperLeft +%3 = OpString "f16-polyfill.wgsl" +OpSource Unknown 0 %3 "enable f16; + +@fragment +fn test_direct( + @location(0) scalar_f16: f16, + @location(1) scalar_f32: f32, + @location(2) vec2_f16: vec2, + @location(3) vec2_f32: vec2, + @location(4) vec3_f16: vec3, + @location(5) vec3_f32: vec3, + @location(6) vec4_f16: vec4, + @location(7) vec4_f32: vec4, +) -> F16IO { + var output: F16IO; + output.scalar_f16 = scalar_f16 + 1.0h; + output.scalar_f32 = scalar_f32 + 1.0; + output.vec2_f16 = vec2_f16 + vec2(1.0h); + output.vec2_f32 = vec2_f32 + vec2(1.0); + output.vec3_f16 = vec3_f16 + vec3(1.0h); + output.vec3_f32 = vec3_f32 + vec3(1.0); + output.vec4_f16 = vec4_f16 + vec4(1.0h); + output.vec4_f32 = vec4_f32 + vec4(1.0); + return output; +} + +struct F16IO { + @location(0) scalar_f16: f16, + @location(1) scalar_f32: f32, + @location(2) vec2_f16: vec2, + @location(3) vec2_f32: vec2, + @location(4) vec3_f16: vec3, + @location(5) vec3_f32: vec3, + @location(6) vec4_f16: vec4, + @location(7) vec4_f32: vec4, +} + +@fragment +fn test_struct(input: F16IO) -> F16IO { + var output: F16IO; + output.scalar_f16 = input.scalar_f16 + 1.0h; + output.scalar_f32 = input.scalar_f32 + 1.0; + output.vec2_f16 = input.vec2_f16 + vec2(1.0h); + output.vec2_f32 = input.vec2_f32 + vec2(1.0); + output.vec3_f16 = input.vec3_f16 + vec3(1.0h); + output.vec3_f32 = input.vec3_f32 + vec3(1.0); + output.vec4_f16 = input.vec4_f16 + vec4(1.0h); + output.vec4_f32 = input.vec4_f32 + vec4(1.0); + return output; +} + +@fragment +fn test_copy_input(input_original: F16IO) -> F16IO { + var input = input_original; + var output: F16IO; + output.scalar_f16 = input.scalar_f16 + 1.0h; + output.scalar_f32 = input.scalar_f32 + 1.0; + output.vec2_f16 = input.vec2_f16 + vec2(1.0h); + output.vec2_f32 = input.vec2_f32 + vec2(1.0); + output.vec3_f16 = input.vec3_f16 + vec3(1.0h); + output.vec3_f32 = input.vec3_f32 + vec3(1.0); + output.vec4_f16 = input.vec4_f16 + vec4(1.0h); + output.vec4_f32 = input.vec4_f32 + vec4(1.0); + return output; +} + +@fragment +fn test_return_partial(input_original: F16IO) -> @location(0) f16 { + var input = input_original; + input.scalar_f16 = 0.0h; + return input.scalar_f16; +} + +@fragment +fn test_component_access(input: F16IO) -> F16IO { + var output: F16IO; + output.vec2_f16.x = input.vec2_f16.y; + output.vec2_f16.y = input.vec2_f16.x; + return output; +}" +OpMemberName %12 0 "scalar_f16" +OpMemberName %12 1 "scalar_f32" +OpMemberName %12 2 "vec2_f16" +OpMemberName %12 3 "vec2_f32" +OpMemberName %12 4 "vec3_f16" +OpMemberName %12 5 "vec3_f32" +OpMemberName %12 6 "vec4_f16" +OpMemberName %12 7 "vec4_f32" +OpName %12 "F16IO" +OpName %14 "scalar_f16" +OpName %18 "scalar_f32" +OpName %20 "vec2_f16" +OpName %24 "vec2_f32" +OpName %26 "vec3_f16" +OpName %30 "vec3_f32" +OpName %32 "vec4_f16" +OpName %36 "vec4_f32" +OpName %38 "scalar_f16" +OpName %40 "scalar_f32" +OpName %41 "vec2_f16" +OpName %43 "vec2_f32" +OpName %44 "vec3_f16" +OpName %46 "vec3_f32" +OpName %47 "vec4_f16" +OpName %49 "vec4_f32" +OpName %50 "test_direct" +OpName %60 "output" +OpName %112 "scalar_f16" +OpName %115 "scalar_f32" +OpName %117 "vec2_f16" +OpName %120 "vec2_f32" +OpName %122 "vec3_f16" +OpName %125 "vec3_f32" +OpName %127 "vec4_f16" +OpName %130 "vec4_f32" +OpName %132 "scalar_f16" +OpName %133 "scalar_f32" +OpName %134 "vec2_f16" +OpName %135 "vec2_f32" +OpName %136 "vec3_f16" +OpName %137 "vec3_f32" +OpName %138 "vec4_f16" +OpName %139 "vec4_f32" +OpName %140 "test_struct" +OpName %141 "output" +OpName %183 "scalar_f16" +OpName %186 "scalar_f32" +OpName %188 "vec2_f16" +OpName %191 "vec2_f32" +OpName %193 "vec3_f16" +OpName %196 "vec3_f32" +OpName %198 "vec4_f16" +OpName %201 "vec4_f32" +OpName %203 "scalar_f16" +OpName %204 "scalar_f32" +OpName %205 "vec2_f16" +OpName %206 "vec2_f32" +OpName %207 "vec3_f16" +OpName %208 "vec3_f32" +OpName %209 "vec4_f16" +OpName %210 "vec4_f32" +OpName %211 "test_copy_input" +OpName %212 "input" +OpName %214 "output" +OpName %264 "scalar_f16" +OpName %267 "scalar_f32" +OpName %269 "vec2_f16" +OpName %272 "vec2_f32" +OpName %274 "vec3_f16" +OpName %277 "vec3_f32" +OpName %279 "vec4_f16" +OpName %282 "vec4_f32" +OpName %285 "test_return_partial" +OpName %287 "input" +OpName %296 "scalar_f16" +OpName %299 "scalar_f32" +OpName %301 "vec2_f16" +OpName %304 "vec2_f32" +OpName %306 "vec3_f16" +OpName %309 "vec3_f32" +OpName %311 "vec4_f16" +OpName %314 "vec4_f32" +OpName %316 "scalar_f16" +OpName %317 "scalar_f32" +OpName %318 "vec2_f16" +OpName %319 "vec2_f32" +OpName %320 "vec3_f16" +OpName %321 "vec3_f32" +OpName %322 "vec4_f16" +OpName %323 "vec4_f32" +OpName %324 "test_component_access" +OpName %325 "output" +OpMemberDecorate %12 0 Offset 0 +OpMemberDecorate %12 1 Offset 4 +OpMemberDecorate %12 2 Offset 8 +OpMemberDecorate %12 3 Offset 16 +OpMemberDecorate %12 4 Offset 24 +OpMemberDecorate %12 5 Offset 32 +OpMemberDecorate %12 6 Offset 48 +OpMemberDecorate %12 7 Offset 64 +OpDecorate %14 Location 0 +OpDecorate %18 Location 1 +OpDecorate %20 Location 2 +OpDecorate %24 Location 3 +OpDecorate %26 Location 4 +OpDecorate %30 Location 5 +OpDecorate %32 Location 6 +OpDecorate %36 Location 7 +OpDecorate %38 Location 0 +OpDecorate %40 Location 1 +OpDecorate %41 Location 2 +OpDecorate %43 Location 3 +OpDecorate %44 Location 4 +OpDecorate %46 Location 5 +OpDecorate %47 Location 6 +OpDecorate %49 Location 7 +OpDecorate %112 Location 0 +OpDecorate %115 Location 1 +OpDecorate %117 Location 2 +OpDecorate %120 Location 3 +OpDecorate %122 Location 4 +OpDecorate %125 Location 5 +OpDecorate %127 Location 6 +OpDecorate %130 Location 7 +OpDecorate %132 Location 0 +OpDecorate %133 Location 1 +OpDecorate %134 Location 2 +OpDecorate %135 Location 3 +OpDecorate %136 Location 4 +OpDecorate %137 Location 5 +OpDecorate %138 Location 6 +OpDecorate %139 Location 7 +OpDecorate %183 Location 0 +OpDecorate %186 Location 1 +OpDecorate %188 Location 2 +OpDecorate %191 Location 3 +OpDecorate %193 Location 4 +OpDecorate %196 Location 5 +OpDecorate %198 Location 6 +OpDecorate %201 Location 7 +OpDecorate %203 Location 0 +OpDecorate %204 Location 1 +OpDecorate %205 Location 2 +OpDecorate %206 Location 3 +OpDecorate %207 Location 4 +OpDecorate %208 Location 5 +OpDecorate %209 Location 6 +OpDecorate %210 Location 7 +OpDecorate %264 Location 0 +OpDecorate %267 Location 1 +OpDecorate %269 Location 2 +OpDecorate %272 Location 3 +OpDecorate %274 Location 4 +OpDecorate %277 Location 5 +OpDecorate %279 Location 6 +OpDecorate %282 Location 7 +OpDecorate %284 Location 0 +OpDecorate %296 Location 0 +OpDecorate %299 Location 1 +OpDecorate %301 Location 2 +OpDecorate %304 Location 3 +OpDecorate %306 Location 4 +OpDecorate %309 Location 5 +OpDecorate %311 Location 6 +OpDecorate %314 Location 7 +OpDecorate %316 Location 0 +OpDecorate %317 Location 1 +OpDecorate %318 Location 2 +OpDecorate %319 Location 3 +OpDecorate %320 Location 4 +OpDecorate %321 Location 5 +OpDecorate %322 Location 6 +OpDecorate %323 Location 7 +%2 = OpTypeVoid +%4 = OpTypeFloat 16 +%5 = OpTypeFloat 32 +%6 = OpTypeVector %4 2 +%7 = OpTypeVector %5 2 +%8 = OpTypeVector %4 3 +%9 = OpTypeVector %5 3 +%10 = OpTypeVector %4 4 +%11 = OpTypeVector %5 4 +%12 = OpTypeStruct %4 %5 %6 %7 %8 %9 %10 %11 +%15 = OpTypePointer Input %5 +%14 = OpVariable %15 Input +%18 = OpVariable %15 Input +%21 = OpTypePointer Input %7 +%20 = OpVariable %21 Input +%24 = OpVariable %21 Input +%27 = OpTypePointer Input %9 +%26 = OpVariable %27 Input +%30 = OpVariable %27 Input +%33 = OpTypePointer Input %11 +%32 = OpVariable %33 Input +%36 = OpVariable %33 Input +%39 = OpTypePointer Output %5 +%38 = OpVariable %39 Output +%40 = OpVariable %39 Output +%42 = OpTypePointer Output %7 +%41 = OpVariable %42 Output +%43 = OpVariable %42 Output +%45 = OpTypePointer Output %9 +%44 = OpVariable %45 Output +%46 = OpVariable %45 Output +%48 = OpTypePointer Output %11 +%47 = OpVariable %48 Output +%49 = OpVariable %48 Output +%51 = OpTypeFunction %2 +%52 = OpConstant %4 0.000000000000000000000000000000000000000021524 +%53 = OpConstant %5 1 +%54 = OpConstantComposite %6 %52 %52 +%55 = OpConstantComposite %7 %53 %53 +%56 = OpConstantComposite %8 %52 %52 %52 +%57 = OpConstantComposite %9 %53 %53 %53 +%58 = OpConstantComposite %10 %52 %52 %52 %52 +%59 = OpConstantComposite %11 %53 %53 %53 %53 +%61 = OpTypePointer Function %12 +%62 = OpConstantNull %12 +%64 = OpTypePointer Function %4 +%67 = OpTypeInt 32 0 +%66 = OpConstant %67 0 +%69 = OpTypePointer Function %5 +%71 = OpConstant %67 1 +%73 = OpTypePointer Function %6 +%75 = OpConstant %67 2 +%77 = OpTypePointer Function %7 +%79 = OpConstant %67 3 +%81 = OpTypePointer Function %8 +%83 = OpConstant %67 4 +%85 = OpTypePointer Function %9 +%87 = OpConstant %67 5 +%89 = OpTypePointer Function %10 +%91 = OpConstant %67 6 +%93 = OpTypePointer Function %11 +%95 = OpConstant %67 7 +%112 = OpVariable %15 Input +%115 = OpVariable %15 Input +%117 = OpVariable %21 Input +%120 = OpVariable %21 Input +%122 = OpVariable %27 Input +%125 = OpVariable %27 Input +%127 = OpVariable %33 Input +%130 = OpVariable %33 Input +%132 = OpVariable %39 Output +%133 = OpVariable %39 Output +%134 = OpVariable %42 Output +%135 = OpVariable %42 Output +%136 = OpVariable %45 Output +%137 = OpVariable %45 Output +%138 = OpVariable %48 Output +%139 = OpVariable %48 Output +%142 = OpConstantNull %12 +%183 = OpVariable %15 Input +%186 = OpVariable %15 Input +%188 = OpVariable %21 Input +%191 = OpVariable %21 Input +%193 = OpVariable %27 Input +%196 = OpVariable %27 Input +%198 = OpVariable %33 Input +%201 = OpVariable %33 Input +%203 = OpVariable %39 Output +%204 = OpVariable %39 Output +%205 = OpVariable %42 Output +%206 = OpVariable %42 Output +%207 = OpVariable %45 Output +%208 = OpVariable %45 Output +%209 = OpVariable %48 Output +%210 = OpVariable %48 Output +%213 = OpConstantNull %12 +%215 = OpConstantNull %12 +%264 = OpVariable %15 Input +%267 = OpVariable %15 Input +%269 = OpVariable %21 Input +%272 = OpVariable %21 Input +%274 = OpVariable %27 Input +%277 = OpVariable %27 Input +%279 = OpVariable %33 Input +%282 = OpVariable %33 Input +%284 = OpVariable %39 Output +%286 = OpConstant %4 0 +%288 = OpConstantNull %12 +%296 = OpVariable %15 Input +%299 = OpVariable %15 Input +%301 = OpVariable %21 Input +%304 = OpVariable %21 Input +%306 = OpVariable %27 Input +%309 = OpVariable %27 Input +%311 = OpVariable %33 Input +%314 = OpVariable %33 Input +%316 = OpVariable %39 Output +%317 = OpVariable %39 Output +%318 = OpVariable %42 Output +%319 = OpVariable %42 Output +%320 = OpVariable %45 Output +%321 = OpVariable %45 Output +%322 = OpVariable %48 Output +%323 = OpVariable %48 Output +%326 = OpConstantNull %12 +%50 = OpFunction %2 None %51 +%13 = OpLabel +%60 = OpVariable %61 Function %62 +%16 = OpLoad %5 %14 +%17 = OpFConvert %4 %16 +%19 = OpLoad %5 %18 +%22 = OpLoad %7 %20 +%23 = OpFConvert %6 %22 +%25 = OpLoad %7 %24 +%28 = OpLoad %9 %26 +%29 = OpFConvert %8 %28 +%31 = OpLoad %9 %30 +%34 = OpLoad %11 %32 +%35 = OpFConvert %10 %34 +%37 = OpLoad %11 %36 +OpBranch %63 +%63 = OpLabel +OpLine %3 15 5 +OpLine %3 15 25 +%65 = OpFAdd %4 %17 %52 +OpLine %3 15 5 +%68 = OpAccessChain %64 %60 %66 +OpStore %68 %65 +OpLine %3 16 5 +OpLine %3 16 25 +%70 = OpFAdd %5 %19 %53 +OpLine %3 16 5 +%72 = OpAccessChain %69 %60 %71 +OpStore %72 %70 +OpLine %3 17 5 +OpLine %3 17 23 +%74 = OpFAdd %6 %23 %54 +OpLine %3 17 5 +%76 = OpAccessChain %73 %60 %75 +OpStore %76 %74 +OpLine %3 18 5 +OpLine %3 18 34 +OpLine %3 18 23 +%78 = OpFAdd %7 %25 %55 +OpLine %3 18 5 +%80 = OpAccessChain %77 %60 %79 +OpStore %80 %78 +OpLine %3 19 5 +OpLine %3 19 23 +%82 = OpFAdd %8 %29 %56 +OpLine %3 19 5 +%84 = OpAccessChain %81 %60 %83 +OpStore %84 %82 +OpLine %3 20 5 +OpLine %3 20 34 +OpLine %3 20 23 +%86 = OpFAdd %9 %31 %57 +OpLine %3 20 5 +%88 = OpAccessChain %85 %60 %87 +OpStore %88 %86 +OpLine %3 21 5 +OpLine %3 21 23 +%90 = OpFAdd %10 %35 %58 +OpLine %3 21 5 +%92 = OpAccessChain %89 %60 %91 +OpStore %92 %90 +OpLine %3 22 5 +OpLine %3 22 34 +OpLine %3 22 23 +%94 = OpFAdd %11 %37 %59 +OpLine %3 22 5 +%96 = OpAccessChain %93 %60 %95 +OpStore %96 %94 +OpLine %3 1 1 +%97 = OpLoad %12 %60 +%98 = OpCompositeExtract %4 %97 0 +%99 = OpFConvert %5 %98 +OpStore %38 %99 +%100 = OpCompositeExtract %5 %97 1 +OpStore %40 %100 +%101 = OpCompositeExtract %6 %97 2 +%102 = OpFConvert %7 %101 +OpStore %41 %102 +%103 = OpCompositeExtract %7 %97 3 +OpStore %43 %103 +%104 = OpCompositeExtract %8 %97 4 +%105 = OpFConvert %9 %104 +OpStore %44 %105 +%106 = OpCompositeExtract %9 %97 5 +OpStore %46 %106 +%107 = OpCompositeExtract %10 %97 6 +%108 = OpFConvert %11 %107 +OpStore %47 %108 +%109 = OpCompositeExtract %11 %97 7 +OpStore %49 %109 +OpReturn +OpFunctionEnd +%140 = OpFunction %2 None %51 +%110 = OpLabel +%141 = OpVariable %61 Function %142 +%113 = OpLoad %5 %112 +%114 = OpFConvert %4 %113 +%116 = OpLoad %5 %115 +%118 = OpLoad %7 %117 +%119 = OpFConvert %6 %118 +%121 = OpLoad %7 %120 +%123 = OpLoad %9 %122 +%124 = OpFConvert %8 %123 +%126 = OpLoad %9 %125 +%128 = OpLoad %11 %127 +%129 = OpFConvert %10 %128 +%131 = OpLoad %11 %130 +%111 = OpCompositeConstruct %12 %114 %116 %119 %121 %124 %126 %129 %131 +OpBranch %143 +%143 = OpLabel +OpLine %3 40 5 +%144 = OpCompositeExtract %4 %111 0 +OpLine %3 40 25 +%145 = OpFAdd %4 %144 %52 +OpLine %3 40 5 +%146 = OpAccessChain %64 %141 %66 +OpStore %146 %145 +OpLine %3 41 5 +%147 = OpCompositeExtract %5 %111 1 +OpLine %3 41 25 +%148 = OpFAdd %5 %147 %53 +OpLine %3 41 5 +%149 = OpAccessChain %69 %141 %71 +OpStore %149 %148 +OpLine %3 42 5 +%150 = OpCompositeExtract %6 %111 2 +OpLine %3 42 23 +%151 = OpFAdd %6 %150 %54 +OpLine %3 42 5 +%152 = OpAccessChain %73 %141 %75 +OpStore %152 %151 +OpLine %3 43 5 +%153 = OpCompositeExtract %7 %111 3 +OpLine %3 43 40 +OpLine %3 43 23 +%154 = OpFAdd %7 %153 %55 +OpLine %3 43 5 +%155 = OpAccessChain %77 %141 %79 +OpStore %155 %154 +OpLine %3 44 5 +%156 = OpCompositeExtract %8 %111 4 +OpLine %3 44 23 +%157 = OpFAdd %8 %156 %56 +OpLine %3 44 5 +%158 = OpAccessChain %81 %141 %83 +OpStore %158 %157 +OpLine %3 45 5 +%159 = OpCompositeExtract %9 %111 5 +OpLine %3 45 40 +OpLine %3 45 23 +%160 = OpFAdd %9 %159 %57 +OpLine %3 45 5 +%161 = OpAccessChain %85 %141 %87 +OpStore %161 %160 +OpLine %3 46 5 +%162 = OpCompositeExtract %10 %111 6 +OpLine %3 46 23 +%163 = OpFAdd %10 %162 %58 +OpLine %3 46 5 +%164 = OpAccessChain %89 %141 %91 +OpStore %164 %163 +OpLine %3 47 5 +%165 = OpCompositeExtract %11 %111 7 +OpLine %3 47 40 +OpLine %3 47 23 +%166 = OpFAdd %11 %165 %59 +OpLine %3 47 5 +%167 = OpAccessChain %93 %141 %95 +OpStore %167 %166 +OpLine %3 1 1 +%168 = OpLoad %12 %141 +%169 = OpCompositeExtract %4 %168 0 +%170 = OpFConvert %5 %169 +OpStore %132 %170 +%171 = OpCompositeExtract %5 %168 1 +OpStore %133 %171 +%172 = OpCompositeExtract %6 %168 2 +%173 = OpFConvert %7 %172 +OpStore %134 %173 +%174 = OpCompositeExtract %7 %168 3 +OpStore %135 %174 +%175 = OpCompositeExtract %8 %168 4 +%176 = OpFConvert %9 %175 +OpStore %136 %176 +%177 = OpCompositeExtract %9 %168 5 +OpStore %137 %177 +%178 = OpCompositeExtract %10 %168 6 +%179 = OpFConvert %11 %178 +OpStore %138 %179 +%180 = OpCompositeExtract %11 %168 7 +OpStore %139 %180 +OpReturn +OpFunctionEnd +%211 = OpFunction %2 None %51 +%181 = OpLabel +%212 = OpVariable %61 Function %213 +%214 = OpVariable %61 Function %215 +%184 = OpLoad %5 %183 +%185 = OpFConvert %4 %184 +%187 = OpLoad %5 %186 +%189 = OpLoad %7 %188 +%190 = OpFConvert %6 %189 +%192 = OpLoad %7 %191 +%194 = OpLoad %9 %193 +%195 = OpFConvert %8 %194 +%197 = OpLoad %9 %196 +%199 = OpLoad %11 %198 +%200 = OpFConvert %10 %199 +%202 = OpLoad %11 %201 +%182 = OpCompositeConstruct %12 %185 %187 %190 %192 %195 %197 %200 %202 +OpBranch %216 +%216 = OpLabel +OpLine %3 53 5 +OpStore %212 %182 +OpLine %3 55 5 +%217 = OpAccessChain %64 %212 %66 +%218 = OpLoad %4 %217 +OpLine %3 55 25 +%219 = OpFAdd %4 %218 %52 +OpLine %3 55 5 +%220 = OpAccessChain %64 %214 %66 +OpStore %220 %219 +OpLine %3 56 5 +%221 = OpAccessChain %69 %212 %71 +%222 = OpLoad %5 %221 +OpLine %3 56 25 +%223 = OpFAdd %5 %222 %53 +OpLine %3 56 5 +%224 = OpAccessChain %69 %214 %71 +OpStore %224 %223 +OpLine %3 57 5 +%225 = OpAccessChain %73 %212 %75 +%226 = OpLoad %6 %225 +OpLine %3 57 23 +%227 = OpFAdd %6 %226 %54 +OpLine %3 57 5 +%228 = OpAccessChain %73 %214 %75 +OpStore %228 %227 +OpLine %3 58 5 +%229 = OpAccessChain %77 %212 %79 +%230 = OpLoad %7 %229 +OpLine %3 58 40 +OpLine %3 58 23 +%231 = OpFAdd %7 %230 %55 +OpLine %3 58 5 +%232 = OpAccessChain %77 %214 %79 +OpStore %232 %231 +OpLine %3 59 5 +%233 = OpAccessChain %81 %212 %83 +%234 = OpLoad %8 %233 +OpLine %3 59 23 +%235 = OpFAdd %8 %234 %56 +OpLine %3 59 5 +%236 = OpAccessChain %81 %214 %83 +OpStore %236 %235 +OpLine %3 60 5 +%237 = OpAccessChain %85 %212 %87 +%238 = OpLoad %9 %237 +OpLine %3 60 40 +OpLine %3 60 23 +%239 = OpFAdd %9 %238 %57 +OpLine %3 60 5 +%240 = OpAccessChain %85 %214 %87 +OpStore %240 %239 +OpLine %3 61 5 +%241 = OpAccessChain %89 %212 %91 +%242 = OpLoad %10 %241 +OpLine %3 61 23 +%243 = OpFAdd %10 %242 %58 +OpLine %3 61 5 +%244 = OpAccessChain %89 %214 %91 +OpStore %244 %243 +OpLine %3 62 5 +%245 = OpAccessChain %93 %212 %95 +%246 = OpLoad %11 %245 +OpLine %3 62 40 +OpLine %3 62 23 +%247 = OpFAdd %11 %246 %59 +OpLine %3 62 5 +%248 = OpAccessChain %93 %214 %95 +OpStore %248 %247 +OpLine %3 1 1 +%249 = OpLoad %12 %214 +%250 = OpCompositeExtract %4 %249 0 +%251 = OpFConvert %5 %250 +OpStore %203 %251 +%252 = OpCompositeExtract %5 %249 1 +OpStore %204 %252 +%253 = OpCompositeExtract %6 %249 2 +%254 = OpFConvert %7 %253 +OpStore %205 %254 +%255 = OpCompositeExtract %7 %249 3 +OpStore %206 %255 +%256 = OpCompositeExtract %8 %249 4 +%257 = OpFConvert %9 %256 +OpStore %207 %257 +%258 = OpCompositeExtract %9 %249 5 +OpStore %208 %258 +%259 = OpCompositeExtract %10 %249 6 +%260 = OpFConvert %11 %259 +OpStore %209 %260 +%261 = OpCompositeExtract %11 %249 7 +OpStore %210 %261 +OpReturn +OpFunctionEnd +%285 = OpFunction %2 None %51 +%262 = OpLabel +%287 = OpVariable %61 Function %288 +%265 = OpLoad %5 %264 +%266 = OpFConvert %4 %265 +%268 = OpLoad %5 %267 +%270 = OpLoad %7 %269 +%271 = OpFConvert %6 %270 +%273 = OpLoad %7 %272 +%275 = OpLoad %9 %274 +%276 = OpFConvert %8 %275 +%278 = OpLoad %9 %277 +%280 = OpLoad %11 %279 +%281 = OpFConvert %10 %280 +%283 = OpLoad %11 %282 +%263 = OpCompositeConstruct %12 %266 %268 %271 %273 %276 %278 %281 %283 +OpBranch %289 +%289 = OpLabel +OpLine %3 68 5 +OpStore %287 %263 +OpLine %3 69 5 +OpLine %3 69 5 +%290 = OpAccessChain %64 %287 %66 +OpStore %290 %286 +OpLine %3 70 12 +%291 = OpAccessChain %64 %287 %66 +%292 = OpLoad %4 %291 +%293 = OpFConvert %5 %292 +OpStore %284 %293 +OpReturn +OpFunctionEnd +%324 = OpFunction %2 None %51 +%294 = OpLabel +%325 = OpVariable %61 Function %326 +%297 = OpLoad %5 %296 +%298 = OpFConvert %4 %297 +%300 = OpLoad %5 %299 +%302 = OpLoad %7 %301 +%303 = OpFConvert %6 %302 +%305 = OpLoad %7 %304 +%307 = OpLoad %9 %306 +%308 = OpFConvert %8 %307 +%310 = OpLoad %9 %309 +%312 = OpLoad %11 %311 +%313 = OpFConvert %10 %312 +%315 = OpLoad %11 %314 +%295 = OpCompositeConstruct %12 %298 %300 %303 %305 %308 %310 %313 %315 +OpBranch %327 +%327 = OpLabel +OpLine %3 76 5 +%328 = OpCompositeExtract %6 %295 2 +%329 = OpCompositeExtract %4 %328 1 +OpLine %3 76 5 +%330 = OpAccessChain %64 %325 %75 %66 +OpStore %330 %329 +OpLine %3 77 5 +%331 = OpCompositeExtract %6 %295 2 +%332 = OpCompositeExtract %4 %331 0 +OpLine %3 77 5 +%333 = OpAccessChain %64 %325 %75 %71 +OpStore %333 %332 +OpLine %3 1 1 +%334 = OpLoad %12 %325 +%335 = OpCompositeExtract %4 %334 0 +%336 = OpFConvert %5 %335 +OpStore %316 %336 +%337 = OpCompositeExtract %5 %334 1 +OpStore %317 %337 +%338 = OpCompositeExtract %6 %334 2 +%339 = OpFConvert %7 %338 +OpStore %318 %339 +%340 = OpCompositeExtract %7 %334 3 +OpStore %319 %340 +%341 = OpCompositeExtract %8 %334 4 +%342 = OpFConvert %9 %341 +OpStore %320 %342 +%343 = OpCompositeExtract %9 %334 5 +OpStore %321 %343 +%344 = OpCompositeExtract %10 %334 6 +%345 = OpFConvert %11 %344 +OpStore %322 %345 +%346 = OpCompositeExtract %11 %334 7 +OpStore %323 %346 +OpReturn +OpFunctionEnd \ No newline at end of file diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs index f94e1ac3272..71c469806e0 100644 --- a/wgpu-hal/src/vulkan/adapter.rs +++ b/wgpu-hal/src/vulkan/adapter.rs @@ -198,6 +198,13 @@ impl PhysicalDeviceFeatures { info } + fn supports_storage_input_output_16(&self) -> bool { + self._16bit_storage + .as_ref() + .map(|features| features.storage_input_output16 != 0) + .unwrap_or(false) + } + /// Create a `PhysicalDeviceFeatures` that can be used to create a logical /// device. /// @@ -226,7 +233,7 @@ impl PhysicalDeviceFeatures { /// [`Adapter::required_device_extensions`]: super::Adapter::required_device_extensions fn from_extensions_and_requested_features( phd_capabilities: &PhysicalDeviceProperties, - _phd_features: &PhysicalDeviceFeatures, + phd_features: &PhysicalDeviceFeatures, enabled_extensions: &[&'static CStr], requested_features: wgt::Features, downlevel_flags: wgt::DownlevelFlags, @@ -399,7 +406,7 @@ impl PhysicalDeviceFeatures { Some( vk::PhysicalDevice16BitStorageFeatures::default() .storage_buffer16_bit_access(true) - .storage_input_output16(true) + .storage_input_output16(phd_features.supports_storage_input_output_16()) .uniform_and_storage_buffer16_bit_access(true), ) } else { @@ -736,12 +743,13 @@ impl PhysicalDeviceFeatures { if let (Some(ref f16_i8), Some(ref bit16)) = (self.shader_float16_int8, self._16bit_storage) { + // Note `storage_input_output16` is not required, we polyfill `f16` I/O using `f32` + // types when this capability is not available features.set( F::SHADER_F16, f16_i8.shader_float16 != 0 && bit16.storage_buffer16_bit_access != 0 - && bit16.uniform_and_storage_buffer16_bit_access != 0 - && bit16.storage_input_output16 != 0, + && bit16.uniform_and_storage_buffer16_bit_access != 0, ); } @@ -2148,6 +2156,8 @@ impl super::Adapter { spv::ZeroInitializeWorkgroupMemoryMode::Polyfill }, force_loop_bounding: true, + use_storage_input_output_16: features.contains(wgt::Features::SHADER_F16) + && self.phd_features.supports_storage_input_output_16(), // We need to build this separately for each invocation, so just default it out here binding_map: BTreeMap::default(), debug_info: None,