From 1e59af159248ceaad49043d9ac573ab691ca46d3 Mon Sep 17 00:00:00 2001 From: cryvosh Date: Thu, 1 May 2025 21:59:32 +0100 Subject: [PATCH 01/20] Fix shaderf16 support on vulkan/nvidia --- wgpu-hal/src/vulkan/adapter.rs | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs index 419f8eee503..38e3c78991f 100644 --- a/wgpu-hal/src/vulkan/adapter.rs +++ b/wgpu-hal/src/vulkan/adapter.rs @@ -387,12 +387,22 @@ impl PhysicalDeviceFeatures { None }, shader_float16: if requested_features.contains(wgt::Features::SHADER_F16) { + let supported_f16_features = _phd_features.shader_float16.map(|(_, s)| s) + .unwrap_or_default(); + Some(( vk::PhysicalDeviceShaderFloat16Int8Features::default().shader_float16(true), + // Only enable the 16bit storage features that are actually supported by the hardware vk::PhysicalDevice16BitStorageFeatures::default() - .storage_buffer16_bit_access(true) - .storage_input_output16(true) - .uniform_and_storage_buffer16_bit_access(true), + .storage_buffer16_bit_access( + supported_f16_features.storage_buffer16_bit_access != 0 + ) + .uniform_and_storage_buffer16_bit_access( + supported_f16_features.uniform_and_storage_buffer16_bit_access != 0 + ) + .storage_input_output16( + supported_f16_features.storage_input_output16 != 0 + ), )) } else { None @@ -723,7 +733,6 @@ impl PhysicalDeviceFeatures { f16_i8.shader_float16 != 0 && bit16.storage_buffer16_bit_access != 0 && bit16.uniform_and_storage_buffer16_bit_access != 0 - && bit16.storage_input_output16 != 0, ); } From 3e0ee6c40fae5d0f76f1d4448cb19c598c6065e7 Mon Sep 17 00:00:00 2001 From: cryvosh Date: Thu, 1 May 2025 22:16:54 +0100 Subject: [PATCH 02/20] fmt --- wgpu-hal/src/vulkan/adapter.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs index 38e3c78991f..c4e7319c268 100644 --- a/wgpu-hal/src/vulkan/adapter.rs +++ b/wgpu-hal/src/vulkan/adapter.rs @@ -387,7 +387,9 @@ impl PhysicalDeviceFeatures { None }, shader_float16: if requested_features.contains(wgt::Features::SHADER_F16) { - let supported_f16_features = _phd_features.shader_float16.map(|(_, s)| s) + let supported_f16_features = _phd_features + .shader_float16 + .map(|(_, s)| s) .unwrap_or_default(); Some(( @@ -395,14 +397,12 @@ impl PhysicalDeviceFeatures { // Only enable the 16bit storage features that are actually supported by the hardware vk::PhysicalDevice16BitStorageFeatures::default() .storage_buffer16_bit_access( - supported_f16_features.storage_buffer16_bit_access != 0 + supported_f16_features.storage_buffer16_bit_access != 0, ) .uniform_and_storage_buffer16_bit_access( - supported_f16_features.uniform_and_storage_buffer16_bit_access != 0 + supported_f16_features.uniform_and_storage_buffer16_bit_access != 0, ) - .storage_input_output16( - supported_f16_features.storage_input_output16 != 0 - ), + .storage_input_output16(supported_f16_features.storage_input_output16 != 0), )) } else { None @@ -732,7 +732,7 @@ impl PhysicalDeviceFeatures { F::SHADER_F16, f16_i8.shader_float16 != 0 && bit16.storage_buffer16_bit_access != 0 - && bit16.uniform_and_storage_buffer16_bit_access != 0 + && bit16.uniform_and_storage_buffer16_bit_access != 0, ); } From 888c46144ad1a33ff8245669eef9abc2df9d03ef Mon Sep 17 00:00:00 2001 From: cryvosh Date: Sat, 5 Jul 2025 08:45:18 -0400 Subject: [PATCH 03/20] F16 polyfill --- Cargo.lock | 24 +- naga/src/back/spv/block.rs | 30 +- naga/src/back/spv/mod.rs | 11 + naga/src/back/spv/polyfill.rs | 108 ++++ naga/src/back/spv/writer.rs | 92 ++- naga/tests/in/wgsl/f16-native.toml | 13 + naga/tests/in/wgsl/f16-native.wgsl | 71 ++ naga/tests/in/wgsl/f16-polyfill.toml | 13 + naga/tests/in/wgsl/f16-polyfill.wgsl | 71 ++ naga/tests/naga/snapshots.rs | 21 +- naga/tests/naga/spirv_capabilities.rs | 135 ++++ naga/tests/out/spv/wgsl-f16-native.spvasm | 655 +++++++++++++++++++ naga/tests/out/spv/wgsl-f16-polyfill.spvasm | 675 ++++++++++++++++++++ wgpu-hal/src/vulkan/adapter.rs | 24 +- 14 files changed, 1912 insertions(+), 31 deletions(-) create mode 100644 naga/src/back/spv/polyfill.rs create mode 100644 naga/tests/in/wgsl/f16-native.toml create mode 100644 naga/tests/in/wgsl/f16-native.wgsl create mode 100644 naga/tests/in/wgsl/f16-polyfill.toml create mode 100644 naga/tests/in/wgsl/f16-polyfill.wgsl create mode 100644 naga/tests/out/spv/wgsl-f16-native.spvasm create mode 100644 naga/tests/out/spv/wgsl-f16-polyfill.spvasm diff --git a/Cargo.lock b/Cargo.lock index 5f75584314f..ac5b8a0bb28 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -799,15 +799,15 @@ dependencies = [ [[package]] name = "console" -version = "0.15.11" +version = "0.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "054ccb5b10f9f2cbf51eb355ca1d05c2d279ce1804688d0db74b4733a5aeafd8" +checksum = "2e09ced7ebbccb63b4c65413d821f2e00ce54c5ca4514ddc6b3c892fdbcbc69d" dependencies = [ "encode_unicode", "libc", "once_cell", "unicode-width", - "windows-sys 0.59.0", + "windows-sys 0.60.2", ] [[package]] @@ -2090,14 +2090,14 @@ dependencies = [ [[package]] name = "indicatif" -version = "0.17.11" +version = "0.17.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "183b3088984b400f4cfac3620d5e076c84da5364016b4f49473de574b2586235" +checksum = "4adb2ee6ad319a912210a36e56e3623555817bcc877a7e6e8802d1d69c4d8056" dependencies = [ "console", - "number_prefix", "portable-atomic", "unicode-width", + "unit-prefix", "web-time 1.1.0", ] @@ -2720,12 +2720,6 @@ dependencies = [ "syn", ] -[[package]] -name = "number_prefix" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" - [[package]] name = "nv-flip" version = "0.1.2" @@ -4424,6 +4418,12 @@ version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" +[[package]] +name = "unit-prefix" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "323402cff2dd658f39ca17c789b502021b3f18707c91cdf22e3838e1b4023817" + [[package]] name = "url" version = "2.5.4" diff --git a/naga/src/back/spv/block.rs b/naga/src/back/spv/block.rs index 7ec659e1d90..5de6e91c616 100644 --- a/naga/src/back/spv/block.rs +++ b/naga/src/back/spv/block.rs @@ -237,7 +237,18 @@ impl Writer { } }; - body.push(Instruction::store(res_member.id, member_value_id, None)); + if let Some((f32_ty, _)) = self.io_f16_polyfills.get_polyfill_info(res_member.id) { + let converted = self.id_gen.next(); + super::polyfill::F16IoPolyfill::emit_f16_to_f32_conversion( + member_value_id, + f32_ty, + converted, + body, + ); + body.push(Instruction::store(res_member.id, converted, None)); + } else { + body.push(Instruction::store(res_member.id, member_value_id, None)); + } match res_member.built_in { Some(crate::BuiltIn::Position { .. }) @@ -2313,6 +2324,23 @@ impl BlockContext<'_> { match self.write_access_chain(pointer, block, access_type_adjustment)? { ExpressionPointer::Ready { pointer_id } => { let id = self.gen_id(); + + if let Some((f32_ty, _)) = + self.writer.io_f16_polyfills.get_polyfill_info(pointer_id) + { + block + .body + .push(Instruction::load(f32_ty, id, pointer_id, None)); + let converted = self.gen_id(); + super::polyfill::F16IoPolyfill::emit_f32_to_f16_conversion( + id, + result_type_id, + converted, + &mut block.body, + ); + return Ok(converted); + } + let atomic_space = match *self.fun_info[pointer].ty.inner_with(&self.ir_module.types) { crate::TypeInner::Pointer { base, space } => { diff --git a/naga/src/back/spv/mod.rs b/naga/src/back/spv/mod.rs index 2dcd95957d7..5a37044fe47 100644 --- a/naga/src/back/spv/mod.rs +++ b/naga/src/back/spv/mod.rs @@ -10,6 +10,7 @@ mod image; mod index; mod instructions; mod layout; +mod polyfill; mod ray; mod recyclable; mod selection; @@ -744,6 +745,7 @@ pub struct Writer { bounds_check_policies: BoundsCheckPolicies, zero_initialize_workgroup_memory: ZeroInitializeWorkgroupMemoryMode, force_loop_bounding: bool, + use_storage_input_output_16: bool, void_type: Word, //TODO: convert most of these into vectors, addressable by handle indices lookup_type: crate::FastHashMap, @@ -770,6 +772,10 @@ pub struct Writer { ray_get_committed_intersection_function: Option, ray_get_candidate_intersection_function: Option, + + /// F16 I/O polyfill manager for handling f16 input/output variables + /// when StorageInputOutput16 capability is not available. + io_f16_polyfills: polyfill::F16IoPolyfill, } bitflags::bitflags! { @@ -852,6 +858,10 @@ pub struct Options<'a> { /// to think the number of iterations is bounded. pub force_loop_bounding: bool, + /// Whether to use the StorageInputOutput16 capability for f16 shader I/O. + /// When false, f16 I/O is polyfilled using f32 types with conversions. + pub use_storage_input_output_16: bool, + pub debug_info: Option>, } @@ -871,6 +881,7 @@ impl Default for Options<'_> { bounds_check_policies: BoundsCheckPolicies::default(), zero_initialize_workgroup_memory: ZeroInitializeWorkgroupMemoryMode::Polyfill, force_loop_bounding: true, + use_storage_input_output_16: true, debug_info: None, } } diff --git a/naga/src/back/spv/polyfill.rs b/naga/src/back/spv/polyfill.rs new file mode 100644 index 00000000000..9c50444620d --- /dev/null +++ b/naga/src/back/spv/polyfill.rs @@ -0,0 +1,108 @@ +/*! +This module provides functionality polyfills f16 input/output variables +when the StorageInputOutput16 capability is not available or disabled. + +It works by: +1. Declaring f16 I/O variables as f32 in SPIR-V +2. Converting between f16 and f32 at runtime using OpFConvert +3. Maintaining mappings to track which variables need conversion +*/ + +use crate::back::spv::{Instruction, LocalType, NumericType, Word}; +use std::vec::Vec; + +/// Manages f16 I/O polyfill state and operations. +#[derive(Default)] +pub(super) struct F16IoPolyfill { + use_native: bool, + variable_map: crate::FastHashMap, +} + +impl F16IoPolyfill { + pub fn new(use_storage_input_output_16: bool) -> Self { + Self { + use_native: use_storage_input_output_16, + variable_map: crate::FastHashMap::default(), + } + } + + pub fn needs_polyfill(&self, ty_inner: &crate::TypeInner) -> bool { + use crate::{ScalarKind as Sk, TypeInner}; + + !self.use_native + && match *ty_inner { + TypeInner::Scalar(ref s) if s.kind == Sk::Float && s.width == 2 => true, + TypeInner::Vector { scalar, .. } + if scalar.kind == Sk::Float && scalar.width == 2 => + { + true + } + _ => false, + } + } + + pub fn register_variable(&mut self, variable_id: Word, f32_type_id: Word, f16_type_id: Word) { + self.variable_map + .insert(variable_id, (f32_type_id, f16_type_id)); + } + + pub fn get_polyfill_info(&self, variable_id: Word) -> Option<(Word, Word)> { + self.variable_map.get(&variable_id).copied() + } + + pub fn emit_f16_to_f32_conversion( + f16_value_id: Word, + f32_type_id: Word, + converted_id: Word, + body: &mut Vec, + ) { + body.push(Instruction::unary( + spirv::Op::FConvert, + f32_type_id, + converted_id, + f16_value_id, + )); + } + + pub fn emit_f32_to_f16_conversion( + f32_value_id: Word, + f16_type_id: Word, + converted_id: Word, + body: &mut Vec, + ) { + body.push(Instruction::unary( + spirv::Op::FConvert, + f16_type_id, + converted_id, + f32_value_id, + )); + } + + pub fn create_polyfill_type(ty_inner: &crate::TypeInner) -> Option { + use crate::{ScalarKind as Sk, TypeInner}; + + match *ty_inner { + TypeInner::Scalar(ref s) if s.kind == Sk::Float && s.width == 2 => { + Some(LocalType::Numeric(NumericType::Scalar(crate::Scalar::F32))) + } + TypeInner::Vector { size, scalar } if scalar.kind == Sk::Float && scalar.width == 2 => { + Some(LocalType::Numeric(NumericType::Vector { + size, + scalar: crate::Scalar::F32, + })) + } + _ => None, + } + } + + pub fn clear(&mut self) { + self.variable_map.clear(); + } +} + +impl crate::back::spv::recyclable::Recyclable for F16IoPolyfill { + fn recycle(mut self) -> Self { + self.variable_map = self.variable_map.recycle(); + self + } +} diff --git a/naga/src/back/spv/writer.rs b/naga/src/back/spv/writer.rs index b61747c8326..b9c530fbf8f 100644 --- a/naga/src/back/spv/writer.rs +++ b/naga/src/back/spv/writer.rs @@ -78,6 +78,7 @@ impl Writer { bounds_check_policies: options.bounds_check_policies, zero_initialize_workgroup_memory: options.zero_initialize_workgroup_memory, force_loop_bounding: options.force_loop_bounding, + use_storage_input_output_16: options.use_storage_input_output_16, void_type, lookup_type: crate::FastHashMap::default(), lookup_function: crate::FastHashMap::default(), @@ -92,6 +93,9 @@ impl Writer { temp_list: Vec::new(), ray_get_committed_intersection_function: None, ray_get_candidate_intersection_function: None, + io_f16_polyfills: super::polyfill::F16IoPolyfill::new( + options.use_storage_input_output_16, + ), }) } @@ -125,6 +129,7 @@ impl Writer { bounds_check_policies: self.bounds_check_policies, zero_initialize_workgroup_memory: self.zero_initialize_workgroup_memory, force_loop_bounding: self.force_loop_bounding, + use_storage_input_output_16: self.use_storage_input_output_16, capabilities_available: take(&mut self.capabilities_available), binding_map: take(&mut self.binding_map), @@ -151,6 +156,7 @@ impl Writer { temp_list: take(&mut self.temp_list).recycle(), ray_get_candidate_intersection_function: None, ray_get_committed_intersection_function: None, + io_f16_polyfills: take(&mut self.io_f16_polyfills).recycle(), }; *self = fresh; @@ -726,10 +732,28 @@ impl Writer { binding, )?; iface.varying_ids.push(varying_id); - let id = self.id_gen.next(); - prelude - .body - .push(Instruction::load(argument_type_id, id, varying_id, None)); + let mut id = self.id_gen.next(); + + if let Some((f32_ty, _)) = self.io_f16_polyfills.get_polyfill_info(varying_id) { + prelude + .body + .push(Instruction::load(f32_ty, id, varying_id, None)); + let converted = self.id_gen.next(); + super::polyfill::F16IoPolyfill::emit_f32_to_f16_conversion( + id, + argument_type_id, + converted, + &mut prelude.body, + ); + id = converted; + } else { + prelude.body.push(Instruction::load( + argument_type_id, + id, + varying_id, + None, + )); + } if binding == &crate::Binding::BuiltIn(crate::BuiltIn::LocalInvocationId) { local_invocation_id = Some(id); @@ -755,10 +779,26 @@ impl Writer { )?; iface.varying_ids.push(varying_id); let id = self.id_gen.next(); - prelude - .body - .push(Instruction::load(type_id, id, varying_id, None)); - constituent_ids.push(id); + if let Some((f32_ty, _)) = + self.io_f16_polyfills.get_polyfill_info(varying_id) + { + prelude + .body + .push(Instruction::load(f32_ty, id, varying_id, None)); + let converted = self.id_gen.next(); + super::polyfill::F16IoPolyfill::emit_f32_to_f16_conversion( + id, + type_id, + converted, + &mut prelude.body, + ); + constituent_ids.push(converted); + } else { + prelude + .body + .push(Instruction::load(type_id, id, varying_id, None)); + constituent_ids.push(id); + } if binding == &crate::Binding::BuiltIn(crate::BuiltIn::GlobalInvocationId) { local_invocation_id = Some(id); @@ -1220,8 +1260,10 @@ impl Writer { .insert(spirv::Capability::StorageBuffer16BitAccess); self.capabilities_used .insert(spirv::Capability::UniformAndStorageBuffer16BitAccess); - self.capabilities_used - .insert(spirv::Capability::StorageInputOutput16); + if self.use_storage_input_output_16 { + self.capabilities_used + .insert(spirv::Capability::StorageInputOutput16); + } } Instruction::type_float(id, bits) } @@ -1904,8 +1946,27 @@ impl Writer { ty: Handle, binding: &crate::Binding, ) -> Result { + use crate::TypeInner; + let id = self.id_gen.next(); - let pointer_type_id = self.get_handle_pointer_type_id(ty, class); + let ty_inner = &ir_module.types[ty].inner; + let needs_polyfill = self.needs_f16_polyfill(ty_inner); + + let pointer_type_id = if needs_polyfill { + let f32_value_local = super::polyfill::F16IoPolyfill::create_polyfill_type(ty_inner) + .expect("needs_polyfill returned true but create_polyfill_type returned None"); + + let f32_type_id = self.get_localtype_id(f32_value_local); + let ptr_id = self.get_pointer_type_id(f32_type_id, class); + let f16_type_id = self.get_handle_type_id(ty); + self.io_f16_polyfills + .register_variable(id, f32_type_id, f16_type_id); + + ptr_id + } else { + self.get_handle_pointer_type_id(ty, class) + }; + Instruction::variable(pointer_type_id, id, class, None) .to_words(&mut self.logical_layout.declarations); @@ -2088,8 +2149,9 @@ impl Writer { // > shader, must be decorated Flat if class == spirv::StorageClass::Input && stage == crate::ShaderStage::Fragment { let is_flat = match ir_module.types[ty].inner { - crate::TypeInner::Scalar(scalar) - | crate::TypeInner::Vector { scalar, .. } => match scalar.kind { + TypeInner::Scalar(scalar) | TypeInner::Vector { scalar, .. } => match scalar + .kind + { Sk::Uint | Sk::Sint | Sk::Bool => true, Sk::Float => false, Sk::AbstractInt | Sk::AbstractFloat => { @@ -2584,6 +2646,10 @@ impl Writer { self.decorate(id, spirv::Decoration::NonUniform, &[]); Ok(()) } + + pub(super) fn needs_f16_polyfill(&self, ty_inner: &crate::TypeInner) -> bool { + self.io_f16_polyfills.needs_polyfill(ty_inner) + } } #[test] diff --git a/naga/tests/in/wgsl/f16-native.toml b/naga/tests/in/wgsl/f16-native.toml new file mode 100644 index 00000000000..529d34f80da --- /dev/null +++ b/naga/tests/in/wgsl/f16-native.toml @@ -0,0 +1,13 @@ +targets = "SPIRV" +god_mode = true + +[spv] +debug = true +version = [1, 1] +use_storage_input_output_16 = true +capabilities = ["Float16"] + +[bounds_check_policies] +index = "ReadZeroSkipWrite" +buffer = "ReadZeroSkipWrite" +image = "ReadZeroSkipWrite" diff --git a/naga/tests/in/wgsl/f16-native.wgsl b/naga/tests/in/wgsl/f16-native.wgsl new file mode 100644 index 00000000000..2dea0baaa29 --- /dev/null +++ b/naga/tests/in/wgsl/f16-native.wgsl @@ -0,0 +1,71 @@ +enable f16; + +@fragment +fn test_direct( + @location(0) scalar_f16: f16, + @location(1) scalar_f32: f32, + @location(2) vec2_f16: vec2, + @location(3) vec2_f32: vec2, + @location(4) vec3_f16: vec3, + @location(5) vec3_f32: vec3, + @location(6) vec4_f16: vec4, + @location(7) vec4_f32: vec4, +) -> F16IO { + var output: F16IO; + output.scalar_f16 = scalar_f16 + 1.0h; + output.scalar_f32 = scalar_f32 + 1.0; + output.vec2_f16 = vec2_f16 + vec2(1.0h); + output.vec2_f32 = vec2_f32 + vec2(1.0); + output.vec3_f16 = vec3_f16 + vec3(1.0h); + output.vec3_f32 = vec3_f32 + vec3(1.0); + output.vec4_f16 = vec4_f16 + vec4(1.0h); + output.vec4_f32 = vec4_f32 + vec4(1.0); + return output; +} + +struct F16IO { + @location(0) scalar_f16: f16, + @location(1) scalar_f32: f32, + @location(2) vec2_f16: vec2, + @location(3) vec2_f32: vec2, + @location(4) vec3_f16: vec3, + @location(5) vec3_f32: vec3, + @location(6) vec4_f16: vec4, + @location(7) vec4_f32: vec4, +} + +@fragment +fn test_struct(input: F16IO) -> F16IO { + var output: F16IO; + output.scalar_f16 = input.scalar_f16 + 1.0h; + output.scalar_f32 = input.scalar_f32 + 1.0; + output.vec2_f16 = input.vec2_f16 + vec2(1.0h); + output.vec2_f32 = input.vec2_f32 + vec2(1.0); + output.vec3_f16 = input.vec3_f16 + vec3(1.0h); + output.vec3_f32 = input.vec3_f32 + vec3(1.0); + output.vec4_f16 = input.vec4_f16 + vec4(1.0h); + output.vec4_f32 = input.vec4_f32 + vec4(1.0); + return output; +} + +@fragment +fn test_copy_input(input_original: F16IO) -> F16IO { + var input = input_original; + var output: F16IO; + output.scalar_f16 = input.scalar_f16 + 1.0h; + output.scalar_f32 = input.scalar_f32 + 1.0; + output.vec2_f16 = input.vec2_f16 + vec2(1.0h); + output.vec2_f32 = input.vec2_f32 + vec2(1.0); + output.vec3_f16 = input.vec3_f16 + vec3(1.0h); + output.vec3_f32 = input.vec3_f32 + vec3(1.0); + output.vec4_f16 = input.vec4_f16 + vec4(1.0h); + output.vec4_f32 = input.vec4_f32 + vec4(1.0); + return output; +} + +@fragment +fn test_return_partial(input_original: F16IO) -> @location(0) f16 { + var input = input_original; + input.scalar_f16 = 0.0h; + return input.scalar_f16; +} diff --git a/naga/tests/in/wgsl/f16-polyfill.toml b/naga/tests/in/wgsl/f16-polyfill.toml new file mode 100644 index 00000000000..96160063e05 --- /dev/null +++ b/naga/tests/in/wgsl/f16-polyfill.toml @@ -0,0 +1,13 @@ +targets = "SPIRV" +god_mode = true + +[spv] +debug = true +version = [1, 1] +use_storage_input_output_16 = false +capabilities = ["Float16"] + +[bounds_check_policies] +index = "ReadZeroSkipWrite" +buffer = "ReadZeroSkipWrite" +image = "ReadZeroSkipWrite" diff --git a/naga/tests/in/wgsl/f16-polyfill.wgsl b/naga/tests/in/wgsl/f16-polyfill.wgsl new file mode 100644 index 00000000000..2dea0baaa29 --- /dev/null +++ b/naga/tests/in/wgsl/f16-polyfill.wgsl @@ -0,0 +1,71 @@ +enable f16; + +@fragment +fn test_direct( + @location(0) scalar_f16: f16, + @location(1) scalar_f32: f32, + @location(2) vec2_f16: vec2, + @location(3) vec2_f32: vec2, + @location(4) vec3_f16: vec3, + @location(5) vec3_f32: vec3, + @location(6) vec4_f16: vec4, + @location(7) vec4_f32: vec4, +) -> F16IO { + var output: F16IO; + output.scalar_f16 = scalar_f16 + 1.0h; + output.scalar_f32 = scalar_f32 + 1.0; + output.vec2_f16 = vec2_f16 + vec2(1.0h); + output.vec2_f32 = vec2_f32 + vec2(1.0); + output.vec3_f16 = vec3_f16 + vec3(1.0h); + output.vec3_f32 = vec3_f32 + vec3(1.0); + output.vec4_f16 = vec4_f16 + vec4(1.0h); + output.vec4_f32 = vec4_f32 + vec4(1.0); + return output; +} + +struct F16IO { + @location(0) scalar_f16: f16, + @location(1) scalar_f32: f32, + @location(2) vec2_f16: vec2, + @location(3) vec2_f32: vec2, + @location(4) vec3_f16: vec3, + @location(5) vec3_f32: vec3, + @location(6) vec4_f16: vec4, + @location(7) vec4_f32: vec4, +} + +@fragment +fn test_struct(input: F16IO) -> F16IO { + var output: F16IO; + output.scalar_f16 = input.scalar_f16 + 1.0h; + output.scalar_f32 = input.scalar_f32 + 1.0; + output.vec2_f16 = input.vec2_f16 + vec2(1.0h); + output.vec2_f32 = input.vec2_f32 + vec2(1.0); + output.vec3_f16 = input.vec3_f16 + vec3(1.0h); + output.vec3_f32 = input.vec3_f32 + vec3(1.0); + output.vec4_f16 = input.vec4_f16 + vec4(1.0h); + output.vec4_f32 = input.vec4_f32 + vec4(1.0); + return output; +} + +@fragment +fn test_copy_input(input_original: F16IO) -> F16IO { + var input = input_original; + var output: F16IO; + output.scalar_f16 = input.scalar_f16 + 1.0h; + output.scalar_f32 = input.scalar_f32 + 1.0; + output.vec2_f16 = input.vec2_f16 + vec2(1.0h); + output.vec2_f32 = input.vec2_f32 + vec2(1.0); + output.vec3_f16 = input.vec3_f16 + vec3(1.0h); + output.vec3_f32 = input.vec3_f32 + vec3(1.0); + output.vec4_f16 = input.vec4_f16 + vec4(1.0h); + output.vec4_f32 = input.vec4_f32 + vec4(1.0); + return output; +} + +@fragment +fn test_return_partial(input_original: F16IO) -> @location(0) f16 { + var input = input_original; + input.scalar_f16 = 0.0h; + return input.scalar_f16; +} diff --git a/naga/tests/naga/snapshots.rs b/naga/tests/naga/snapshots.rs index 32e2f5e0285..b00ef579203 100644 --- a/naga/tests/naga/snapshots.rs +++ b/naga/tests/naga/snapshots.rs @@ -91,7 +91,7 @@ struct SpirvInParameters { adjust_coordinate_space: bool, } -#[derive(Default, serde::Deserialize)] +#[derive(serde::Deserialize)] #[serde(default)] struct SpirvOutParameters { version: SpvOutVersion, @@ -101,11 +101,29 @@ struct SpirvOutParameters { force_point_size: bool, clamp_frag_depth: bool, separate_entry_points: bool, + use_storage_input_output_16: bool, #[cfg(all(feature = "deserialize", spv_out))] #[serde(deserialize_with = "deserialize_binding_map")] binding_map: naga::back::spv::BindingMap, } +impl Default for SpirvOutParameters { + fn default() -> Self { + Self { + version: SpvOutVersion::default(), + capabilities: naga::FastHashSet::default(), + debug: false, + adjust_coordinate_space: false, + force_point_size: false, + clamp_frag_depth: false, + separate_entry_points: false, + use_storage_input_output_16: true, + #[cfg(all(feature = "deserialize", spv_out))] + binding_map: naga::back::spv::BindingMap::default(), + } + } +} + #[derive(Default, serde::Deserialize)] #[serde(default)] struct WgslOutParameters { @@ -617,6 +635,7 @@ fn write_output_spv( binding_map: params.binding_map.clone(), zero_initialize_workgroup_memory: spv::ZeroInitializeWorkgroupMemoryMode::Polyfill, force_loop_bounding: true, + use_storage_input_output_16: params.use_storage_input_output_16, debug_info, }; diff --git a/naga/tests/naga/spirv_capabilities.rs b/naga/tests/naga/spirv_capabilities.rs index 2d46e37f72d..aa99298273d 100644 --- a/naga/tests/naga/spirv_capabilities.rs +++ b/naga/tests/naga/spirv_capabilities.rs @@ -6,6 +6,9 @@ Test SPIR-V backend capability checks. use spirv::Capability as Ca; +#[cfg(spv_out)] +use rspirv::binary::Disassemble; + fn capabilities_used(source: &str) -> naga::FastIndexSet { use naga::back::spv; use naga::valid; @@ -213,3 +216,135 @@ fn int64() { fn float16() { require(&[Ca::Float16], "enable f16; fn f(x: f16) { }"); } + +#[test] +fn f16_io_capabilities() { + let source = r#" + enable f16; + + struct VertexOutput { + @location(0) color: vec3, + } + + @fragment + fn main(input: VertexOutput) -> @location(0) vec4 { + return vec4(input.color, f16(1.0)); + } + "#; + + use naga::back::spv; + use naga::valid; + + let module = naga::front::wgsl::parse_str(source).unwrap(); + let info = valid::Validator::new(valid::ValidationFlags::all(), valid::Capabilities::all()) + .validate(&module) + .unwrap(); + + // Test native path: use_storage_input_output_16 = true + let options_native = spv::Options { + use_storage_input_output_16: true, + ..Default::default() + }; + + let mut words_native = vec![]; + let mut writer_native = spv::Writer::new(&options_native).unwrap(); + writer_native + .write(&module, &info, None, &None, &mut words_native) + .unwrap(); + let caps_native = writer_native.get_capabilities_used(); + + // Should include StorageInputOutput16 for native f16 I/O + assert!(caps_native.contains(&Ca::StorageInputOutput16)); + + // Test polyfill path: use_storage_input_output_16 = false + let options_polyfill = spv::Options { + use_storage_input_output_16: false, + ..Default::default() + }; + + let mut words_polyfill = vec![]; + let mut writer_polyfill = spv::Writer::new(&options_polyfill).unwrap(); + writer_polyfill + .write(&module, &info, None, &None, &mut words_polyfill) + .unwrap(); + let caps_polyfill = writer_polyfill.get_capabilities_used(); + + // Should not include StorageInputOutput16 when polyfilled + assert!(!caps_polyfill.contains(&Ca::StorageInputOutput16)); + + // But should still include the basic f16 capabilities + assert!(caps_polyfill.contains(&Ca::Float16)); +} + +#[cfg(spv_out)] +#[test] +fn f16_io_polyfill_codegen() { + let source = r#" + enable f16; + + struct F16IO { + @location(0) scalar_f16: f16, + @location(1) scalar_f32: f32, + @location(2) vec2_f16: vec2, + @location(3) vec2_f32: vec2, + } + + @fragment + fn main(input: F16IO) -> F16IO { + var output = input; + output.scalar_f16 = input.scalar_f16 + 1.0h; + output.vec2_f16.x = input.vec2_f16.y; + return output; + } + "#; + + use naga::{back::spv, valid}; + + let module = naga::front::wgsl::parse_str(source).unwrap(); + let info = valid::Validator::new(valid::ValidationFlags::all(), valid::Capabilities::all()) + .validate(&module) + .unwrap(); + + // Test Native Path + let options_native = spv::Options { + use_storage_input_output_16: true, + ..Default::default() + }; + let mut words_native = vec![]; + let mut writer_native = spv::Writer::new(&options_native).unwrap(); + writer_native + .write(&module, &info, None, &None, &mut words_native) + .unwrap(); + let caps_native = writer_native.get_capabilities_used(); + let dis_native = rspirv::dr::load_words(words_native).unwrap().disassemble(); + + // Native path must request the capability and must NOT have conversions. + assert!(caps_native.contains(&Ca::StorageInputOutput16)); + assert!(!dis_native.contains("OpFConvert")); + + // Test Polyfill Path + let options_polyfill = spv::Options { + use_storage_input_output_16: false, + ..Default::default() + }; + let mut words_polyfill = vec![]; + let mut writer_polyfill = spv::Writer::new(&options_polyfill).unwrap(); + writer_polyfill + .write(&module, &info, None, &None, &mut words_polyfill) + .unwrap(); + let caps_polyfill = writer_polyfill.get_capabilities_used(); + let dis_polyfill = rspirv::dr::load_words(words_polyfill) + .unwrap() + .disassemble(); + + // Polyfill path should request the capability but not have conversions. + assert!(!caps_polyfill.contains(&Ca::StorageInputOutput16)); + assert!(dis_polyfill.contains("OpFConvert")); + + // Should have 2 input conversions, and 2 output conversions + let fconvert_count = dis_polyfill.matches("OpFConvert").count(); + assert_eq!( + fconvert_count, 4, + "Expected 4 OpFConvert instructions for polyfilled I/O" + ); +} diff --git a/naga/tests/out/spv/wgsl-f16-native.spvasm b/naga/tests/out/spv/wgsl-f16-native.spvasm new file mode 100644 index 00000000000..096b92bf03c --- /dev/null +++ b/naga/tests/out/spv/wgsl-f16-native.spvasm @@ -0,0 +1,655 @@ +; SPIR-V +; Version: 1.1 +; Generator: rspirv +; Bound: 273 +OpCapability Shader +OpCapability Float16 +OpCapability StorageBuffer16BitAccess +OpCapability UniformAndStorageBuffer16BitAccess +OpCapability StorageInputOutput16 +OpExtension "SPV_KHR_16bit_storage" +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %54 "test_direct" %14 %17 %20 %23 %26 %29 %32 %35 %38 %40 %42 %44 %46 %48 %50 %52 +OpEntryPoint Fragment %136 "test_struct" %112 %114 %116 %118 %120 %122 %124 %126 %128 %129 %130 %131 %132 %133 %134 %135 +OpEntryPoint Fragment %199 "test_copy_input" %175 %177 %179 %181 %183 %185 %187 %189 %191 %192 %193 %194 %195 %196 %197 %198 +OpEntryPoint Fragment %265 "test_return_partial" %248 %250 %252 %254 %256 %258 %260 %262 %264 +OpExecutionMode %54 OriginUpperLeft +OpExecutionMode %136 OriginUpperLeft +OpExecutionMode %199 OriginUpperLeft +OpExecutionMode %265 OriginUpperLeft +%3 = OpString "f16-native.wgsl" +OpSource Unknown 0 %3 "enable f16; + +@fragment +fn test_direct( + @location(0) scalar_f16: f16, + @location(1) scalar_f32: f32, + @location(2) vec2_f16: vec2, + @location(3) vec2_f32: vec2, + @location(4) vec3_f16: vec3, + @location(5) vec3_f32: vec3, + @location(6) vec4_f16: vec4, + @location(7) vec4_f32: vec4, +) -> F16IO { + var output: F16IO; + output.scalar_f16 = scalar_f16 + 1.0h; + output.scalar_f32 = scalar_f32 + 1.0; + output.vec2_f16 = vec2_f16 + vec2(1.0h); + output.vec2_f32 = vec2_f32 + vec2(1.0); + output.vec3_f16 = vec3_f16 + vec3(1.0h); + output.vec3_f32 = vec3_f32 + vec3(1.0); + output.vec4_f16 = vec4_f16 + vec4(1.0h); + output.vec4_f32 = vec4_f32 + vec4(1.0); + return output; +} + +struct F16IO { + @location(0) scalar_f16: f16, + @location(1) scalar_f32: f32, + @location(2) vec2_f16: vec2, + @location(3) vec2_f32: vec2, + @location(4) vec3_f16: vec3, + @location(5) vec3_f32: vec3, + @location(6) vec4_f16: vec4, + @location(7) vec4_f32: vec4, +} + +@fragment +fn test_struct(input: F16IO) -> F16IO { + var output: F16IO; + output.scalar_f16 = input.scalar_f16 + 1.0h; + output.scalar_f32 = input.scalar_f32 + 1.0; + output.vec2_f16 = input.vec2_f16 + vec2(1.0h); + output.vec2_f32 = input.vec2_f32 + vec2(1.0); + output.vec3_f16 = input.vec3_f16 + vec3(1.0h); + output.vec3_f32 = input.vec3_f32 + vec3(1.0); + output.vec4_f16 = input.vec4_f16 + vec4(1.0h); + output.vec4_f32 = input.vec4_f32 + vec4(1.0); + return output; +} + +@fragment +fn test_copy_input(input_original: F16IO) -> F16IO { + var input = input_original; + var output: F16IO; + output.scalar_f16 = input.scalar_f16 + 1.0h; + output.scalar_f32 = input.scalar_f32 + 1.0; + output.vec2_f16 = input.vec2_f16 + vec2(1.0h); + output.vec2_f32 = input.vec2_f32 + vec2(1.0); + output.vec3_f16 = input.vec3_f16 + vec3(1.0h); + output.vec3_f32 = input.vec3_f32 + vec3(1.0); + output.vec4_f16 = input.vec4_f16 + vec4(1.0h); + output.vec4_f32 = input.vec4_f32 + vec4(1.0); + return output; +} + +@fragment +fn test_return_partial(input_original: F16IO) -> @location(0) f16 { + var input = input_original; + input.scalar_f16 = 0.0h; + return input.scalar_f16; +} +" +OpMemberName %12 0 "scalar_f16" +OpMemberName %12 1 "scalar_f32" +OpMemberName %12 2 "vec2_f16" +OpMemberName %12 3 "vec2_f32" +OpMemberName %12 4 "vec3_f16" +OpMemberName %12 5 "vec3_f32" +OpMemberName %12 6 "vec4_f16" +OpMemberName %12 7 "vec4_f32" +OpName %12 "F16IO" +OpName %14 "scalar_f16" +OpName %17 "scalar_f32" +OpName %20 "vec2_f16" +OpName %23 "vec2_f32" +OpName %26 "vec3_f16" +OpName %29 "vec3_f32" +OpName %32 "vec4_f16" +OpName %35 "vec4_f32" +OpName %38 "scalar_f16" +OpName %40 "scalar_f32" +OpName %42 "vec2_f16" +OpName %44 "vec2_f32" +OpName %46 "vec3_f16" +OpName %48 "vec3_f32" +OpName %50 "vec4_f16" +OpName %52 "vec4_f32" +OpName %54 "test_direct" +OpName %64 "output" +OpName %112 "scalar_f16" +OpName %114 "scalar_f32" +OpName %116 "vec2_f16" +OpName %118 "vec2_f32" +OpName %120 "vec3_f16" +OpName %122 "vec3_f32" +OpName %124 "vec4_f16" +OpName %126 "vec4_f32" +OpName %128 "scalar_f16" +OpName %129 "scalar_f32" +OpName %130 "vec2_f16" +OpName %131 "vec2_f32" +OpName %132 "vec3_f16" +OpName %133 "vec3_f32" +OpName %134 "vec4_f16" +OpName %135 "vec4_f32" +OpName %136 "test_struct" +OpName %137 "output" +OpName %175 "scalar_f16" +OpName %177 "scalar_f32" +OpName %179 "vec2_f16" +OpName %181 "vec2_f32" +OpName %183 "vec3_f16" +OpName %185 "vec3_f32" +OpName %187 "vec4_f16" +OpName %189 "vec4_f32" +OpName %191 "scalar_f16" +OpName %192 "scalar_f32" +OpName %193 "vec2_f16" +OpName %194 "vec2_f32" +OpName %195 "vec3_f16" +OpName %196 "vec3_f32" +OpName %197 "vec4_f16" +OpName %198 "vec4_f32" +OpName %199 "test_copy_input" +OpName %200 "input" +OpName %202 "output" +OpName %248 "scalar_f16" +OpName %250 "scalar_f32" +OpName %252 "vec2_f16" +OpName %254 "vec2_f32" +OpName %256 "vec3_f16" +OpName %258 "vec3_f32" +OpName %260 "vec4_f16" +OpName %262 "vec4_f32" +OpName %265 "test_return_partial" +OpName %267 "input" +OpMemberDecorate %12 0 Offset 0 +OpMemberDecorate %12 1 Offset 4 +OpMemberDecorate %12 2 Offset 8 +OpMemberDecorate %12 3 Offset 16 +OpMemberDecorate %12 4 Offset 24 +OpMemberDecorate %12 5 Offset 32 +OpMemberDecorate %12 6 Offset 48 +OpMemberDecorate %12 7 Offset 64 +OpDecorate %14 Location 0 +OpDecorate %17 Location 1 +OpDecorate %20 Location 2 +OpDecorate %23 Location 3 +OpDecorate %26 Location 4 +OpDecorate %29 Location 5 +OpDecorate %32 Location 6 +OpDecorate %35 Location 7 +OpDecorate %38 Location 0 +OpDecorate %40 Location 1 +OpDecorate %42 Location 2 +OpDecorate %44 Location 3 +OpDecorate %46 Location 4 +OpDecorate %48 Location 5 +OpDecorate %50 Location 6 +OpDecorate %52 Location 7 +OpDecorate %112 Location 0 +OpDecorate %114 Location 1 +OpDecorate %116 Location 2 +OpDecorate %118 Location 3 +OpDecorate %120 Location 4 +OpDecorate %122 Location 5 +OpDecorate %124 Location 6 +OpDecorate %126 Location 7 +OpDecorate %128 Location 0 +OpDecorate %129 Location 1 +OpDecorate %130 Location 2 +OpDecorate %131 Location 3 +OpDecorate %132 Location 4 +OpDecorate %133 Location 5 +OpDecorate %134 Location 6 +OpDecorate %135 Location 7 +OpDecorate %175 Location 0 +OpDecorate %177 Location 1 +OpDecorate %179 Location 2 +OpDecorate %181 Location 3 +OpDecorate %183 Location 4 +OpDecorate %185 Location 5 +OpDecorate %187 Location 6 +OpDecorate %189 Location 7 +OpDecorate %191 Location 0 +OpDecorate %192 Location 1 +OpDecorate %193 Location 2 +OpDecorate %194 Location 3 +OpDecorate %195 Location 4 +OpDecorate %196 Location 5 +OpDecorate %197 Location 6 +OpDecorate %198 Location 7 +OpDecorate %248 Location 0 +OpDecorate %250 Location 1 +OpDecorate %252 Location 2 +OpDecorate %254 Location 3 +OpDecorate %256 Location 4 +OpDecorate %258 Location 5 +OpDecorate %260 Location 6 +OpDecorate %262 Location 7 +OpDecorate %264 Location 0 +%2 = OpTypeVoid +%4 = OpTypeFloat 16 +%5 = OpTypeFloat 32 +%6 = OpTypeVector %4 2 +%7 = OpTypeVector %5 2 +%8 = OpTypeVector %4 3 +%9 = OpTypeVector %5 3 +%10 = OpTypeVector %4 4 +%11 = OpTypeVector %5 4 +%12 = OpTypeStruct %4 %5 %6 %7 %8 %9 %10 %11 +%15 = OpTypePointer Input %4 +%14 = OpVariable %15 Input +%18 = OpTypePointer Input %5 +%17 = OpVariable %18 Input +%21 = OpTypePointer Input %6 +%20 = OpVariable %21 Input +%24 = OpTypePointer Input %7 +%23 = OpVariable %24 Input +%27 = OpTypePointer Input %8 +%26 = OpVariable %27 Input +%30 = OpTypePointer Input %9 +%29 = OpVariable %30 Input +%33 = OpTypePointer Input %10 +%32 = OpVariable %33 Input +%36 = OpTypePointer Input %11 +%35 = OpVariable %36 Input +%39 = OpTypePointer Output %4 +%38 = OpVariable %39 Output +%41 = OpTypePointer Output %5 +%40 = OpVariable %41 Output +%43 = OpTypePointer Output %6 +%42 = OpVariable %43 Output +%45 = OpTypePointer Output %7 +%44 = OpVariable %45 Output +%47 = OpTypePointer Output %8 +%46 = OpVariable %47 Output +%49 = OpTypePointer Output %9 +%48 = OpVariable %49 Output +%51 = OpTypePointer Output %10 +%50 = OpVariable %51 Output +%53 = OpTypePointer Output %11 +%52 = OpVariable %53 Output +%55 = OpTypeFunction %2 +%56 = OpConstant %4 2.1524e-41 +%57 = OpConstant %5 1.0 +%58 = OpConstantComposite %6 %56 %56 +%59 = OpConstantComposite %7 %57 %57 +%60 = OpConstantComposite %8 %56 %56 %56 +%61 = OpConstantComposite %9 %57 %57 %57 +%62 = OpConstantComposite %10 %56 %56 %56 %56 +%63 = OpConstantComposite %11 %57 %57 %57 %57 +%65 = OpTypePointer Function %12 +%66 = OpConstantNull %12 +%68 = OpTypePointer Function %4 +%71 = OpTypeInt 32 0 +%70 = OpConstant %71 0 +%73 = OpTypePointer Function %5 +%75 = OpConstant %71 1 +%77 = OpTypePointer Function %6 +%79 = OpConstant %71 2 +%81 = OpTypePointer Function %7 +%83 = OpConstant %71 3 +%85 = OpTypePointer Function %8 +%87 = OpConstant %71 4 +%89 = OpTypePointer Function %9 +%91 = OpConstant %71 5 +%93 = OpTypePointer Function %10 +%95 = OpConstant %71 6 +%97 = OpTypePointer Function %11 +%99 = OpConstant %71 7 +%112 = OpVariable %15 Input +%114 = OpVariable %18 Input +%116 = OpVariable %21 Input +%118 = OpVariable %24 Input +%120 = OpVariable %27 Input +%122 = OpVariable %30 Input +%124 = OpVariable %33 Input +%126 = OpVariable %36 Input +%128 = OpVariable %39 Output +%129 = OpVariable %41 Output +%130 = OpVariable %43 Output +%131 = OpVariable %45 Output +%132 = OpVariable %47 Output +%133 = OpVariable %49 Output +%134 = OpVariable %51 Output +%135 = OpVariable %53 Output +%138 = OpConstantNull %12 +%175 = OpVariable %15 Input +%177 = OpVariable %18 Input +%179 = OpVariable %21 Input +%181 = OpVariable %24 Input +%183 = OpVariable %27 Input +%185 = OpVariable %30 Input +%187 = OpVariable %33 Input +%189 = OpVariable %36 Input +%191 = OpVariable %39 Output +%192 = OpVariable %41 Output +%193 = OpVariable %43 Output +%194 = OpVariable %45 Output +%195 = OpVariable %47 Output +%196 = OpVariable %49 Output +%197 = OpVariable %51 Output +%198 = OpVariable %53 Output +%201 = OpConstantNull %12 +%203 = OpConstantNull %12 +%248 = OpVariable %15 Input +%250 = OpVariable %18 Input +%252 = OpVariable %21 Input +%254 = OpVariable %24 Input +%256 = OpVariable %27 Input +%258 = OpVariable %30 Input +%260 = OpVariable %33 Input +%262 = OpVariable %36 Input +%264 = OpVariable %39 Output +%266 = OpConstant %4 0.0 +%268 = OpConstantNull %12 +%54 = OpFunction %2 None %55 +%13 = OpLabel +%64 = OpVariable %65 Function %66 +%16 = OpLoad %4 %14 +%19 = OpLoad %5 %17 +%22 = OpLoad %6 %20 +%25 = OpLoad %7 %23 +%28 = OpLoad %8 %26 +%31 = OpLoad %9 %29 +%34 = OpLoad %10 %32 +%37 = OpLoad %11 %35 +OpBranch %67 +%67 = OpLabel +OpLine %3 15 5 +OpLine %3 15 25 +%69 = OpFAdd %4 %16 %56 +OpLine %3 15 5 +%72 = OpAccessChain %68 %64 %70 +OpStore %72 %69 +OpLine %3 16 5 +OpLine %3 16 25 +%74 = OpFAdd %5 %19 %57 +OpLine %3 16 5 +%76 = OpAccessChain %73 %64 %75 +OpStore %76 %74 +OpLine %3 17 5 +OpLine %3 17 23 +%78 = OpFAdd %6 %22 %58 +OpLine %3 17 5 +%80 = OpAccessChain %77 %64 %79 +OpStore %80 %78 +OpLine %3 18 5 +OpLine %3 18 34 +OpLine %3 18 23 +%82 = OpFAdd %7 %25 %59 +OpLine %3 18 5 +%84 = OpAccessChain %81 %64 %83 +OpStore %84 %82 +OpLine %3 19 5 +OpLine %3 19 23 +%86 = OpFAdd %8 %28 %60 +OpLine %3 19 5 +%88 = OpAccessChain %85 %64 %87 +OpStore %88 %86 +OpLine %3 20 5 +OpLine %3 20 34 +OpLine %3 20 23 +%90 = OpFAdd %9 %31 %61 +OpLine %3 20 5 +%92 = OpAccessChain %89 %64 %91 +OpStore %92 %90 +OpLine %3 21 5 +OpLine %3 21 23 +%94 = OpFAdd %10 %34 %62 +OpLine %3 21 5 +%96 = OpAccessChain %93 %64 %95 +OpStore %96 %94 +OpLine %3 22 5 +OpLine %3 22 34 +OpLine %3 22 23 +%98 = OpFAdd %11 %37 %63 +OpLine %3 22 5 +%100 = OpAccessChain %97 %64 %99 +OpStore %100 %98 +OpLine %3 1 1 +%101 = OpLoad %12 %64 +%102 = OpCompositeExtract %4 %101 0 +OpStore %38 %102 +%103 = OpCompositeExtract %5 %101 1 +OpStore %40 %103 +%104 = OpCompositeExtract %6 %101 2 +OpStore %42 %104 +%105 = OpCompositeExtract %7 %101 3 +OpStore %44 %105 +%106 = OpCompositeExtract %8 %101 4 +OpStore %46 %106 +%107 = OpCompositeExtract %9 %101 5 +OpStore %48 %107 +%108 = OpCompositeExtract %10 %101 6 +OpStore %50 %108 +%109 = OpCompositeExtract %11 %101 7 +OpStore %52 %109 +OpReturn +OpFunctionEnd +%136 = OpFunction %2 None %55 +%110 = OpLabel +%137 = OpVariable %65 Function %138 +%113 = OpLoad %4 %112 +%115 = OpLoad %5 %114 +%117 = OpLoad %6 %116 +%119 = OpLoad %7 %118 +%121 = OpLoad %8 %120 +%123 = OpLoad %9 %122 +%125 = OpLoad %10 %124 +%127 = OpLoad %11 %126 +%111 = OpCompositeConstruct %12 %113 %115 %117 %119 %121 %123 %125 %127 +OpBranch %139 +%139 = OpLabel +OpLine %3 40 5 +%140 = OpCompositeExtract %4 %111 0 +OpLine %3 40 25 +%141 = OpFAdd %4 %140 %56 +OpLine %3 40 5 +%142 = OpAccessChain %68 %137 %70 +OpStore %142 %141 +OpLine %3 41 5 +%143 = OpCompositeExtract %5 %111 1 +OpLine %3 41 25 +%144 = OpFAdd %5 %143 %57 +OpLine %3 41 5 +%145 = OpAccessChain %73 %137 %75 +OpStore %145 %144 +OpLine %3 42 5 +%146 = OpCompositeExtract %6 %111 2 +OpLine %3 42 23 +%147 = OpFAdd %6 %146 %58 +OpLine %3 42 5 +%148 = OpAccessChain %77 %137 %79 +OpStore %148 %147 +OpLine %3 43 5 +%149 = OpCompositeExtract %7 %111 3 +OpLine %3 43 40 +OpLine %3 43 23 +%150 = OpFAdd %7 %149 %59 +OpLine %3 43 5 +%151 = OpAccessChain %81 %137 %83 +OpStore %151 %150 +OpLine %3 44 5 +%152 = OpCompositeExtract %8 %111 4 +OpLine %3 44 23 +%153 = OpFAdd %8 %152 %60 +OpLine %3 44 5 +%154 = OpAccessChain %85 %137 %87 +OpStore %154 %153 +OpLine %3 45 5 +%155 = OpCompositeExtract %9 %111 5 +OpLine %3 45 40 +OpLine %3 45 23 +%156 = OpFAdd %9 %155 %61 +OpLine %3 45 5 +%157 = OpAccessChain %89 %137 %91 +OpStore %157 %156 +OpLine %3 46 5 +%158 = OpCompositeExtract %10 %111 6 +OpLine %3 46 23 +%159 = OpFAdd %10 %158 %62 +OpLine %3 46 5 +%160 = OpAccessChain %93 %137 %95 +OpStore %160 %159 +OpLine %3 47 5 +%161 = OpCompositeExtract %11 %111 7 +OpLine %3 47 40 +OpLine %3 47 23 +%162 = OpFAdd %11 %161 %63 +OpLine %3 47 5 +%163 = OpAccessChain %97 %137 %99 +OpStore %163 %162 +OpLine %3 1 1 +%164 = OpLoad %12 %137 +%165 = OpCompositeExtract %4 %164 0 +OpStore %128 %165 +%166 = OpCompositeExtract %5 %164 1 +OpStore %129 %166 +%167 = OpCompositeExtract %6 %164 2 +OpStore %130 %167 +%168 = OpCompositeExtract %7 %164 3 +OpStore %131 %168 +%169 = OpCompositeExtract %8 %164 4 +OpStore %132 %169 +%170 = OpCompositeExtract %9 %164 5 +OpStore %133 %170 +%171 = OpCompositeExtract %10 %164 6 +OpStore %134 %171 +%172 = OpCompositeExtract %11 %164 7 +OpStore %135 %172 +OpReturn +OpFunctionEnd +%199 = OpFunction %2 None %55 +%173 = OpLabel +%200 = OpVariable %65 Function %201 +%202 = OpVariable %65 Function %203 +%176 = OpLoad %4 %175 +%178 = OpLoad %5 %177 +%180 = OpLoad %6 %179 +%182 = OpLoad %7 %181 +%184 = OpLoad %8 %183 +%186 = OpLoad %9 %185 +%188 = OpLoad %10 %187 +%190 = OpLoad %11 %189 +%174 = OpCompositeConstruct %12 %176 %178 %180 %182 %184 %186 %188 %190 +OpBranch %204 +%204 = OpLabel +OpLine %3 53 5 +OpStore %200 %174 +OpLine %3 55 5 +%205 = OpAccessChain %68 %200 %70 +%206 = OpLoad %4 %205 +OpLine %3 55 25 +%207 = OpFAdd %4 %206 %56 +OpLine %3 55 5 +%208 = OpAccessChain %68 %202 %70 +OpStore %208 %207 +OpLine %3 56 5 +%209 = OpAccessChain %73 %200 %75 +%210 = OpLoad %5 %209 +OpLine %3 56 25 +%211 = OpFAdd %5 %210 %57 +OpLine %3 56 5 +%212 = OpAccessChain %73 %202 %75 +OpStore %212 %211 +OpLine %3 57 5 +%213 = OpAccessChain %77 %200 %79 +%214 = OpLoad %6 %213 +OpLine %3 57 23 +%215 = OpFAdd %6 %214 %58 +OpLine %3 57 5 +%216 = OpAccessChain %77 %202 %79 +OpStore %216 %215 +OpLine %3 58 5 +%217 = OpAccessChain %81 %200 %83 +%218 = OpLoad %7 %217 +OpLine %3 58 40 +OpLine %3 58 23 +%219 = OpFAdd %7 %218 %59 +OpLine %3 58 5 +%220 = OpAccessChain %81 %202 %83 +OpStore %220 %219 +OpLine %3 59 5 +%221 = OpAccessChain %85 %200 %87 +%222 = OpLoad %8 %221 +OpLine %3 59 23 +%223 = OpFAdd %8 %222 %60 +OpLine %3 59 5 +%224 = OpAccessChain %85 %202 %87 +OpStore %224 %223 +OpLine %3 60 5 +%225 = OpAccessChain %89 %200 %91 +%226 = OpLoad %9 %225 +OpLine %3 60 40 +OpLine %3 60 23 +%227 = OpFAdd %9 %226 %61 +OpLine %3 60 5 +%228 = OpAccessChain %89 %202 %91 +OpStore %228 %227 +OpLine %3 61 5 +%229 = OpAccessChain %93 %200 %95 +%230 = OpLoad %10 %229 +OpLine %3 61 23 +%231 = OpFAdd %10 %230 %62 +OpLine %3 61 5 +%232 = OpAccessChain %93 %202 %95 +OpStore %232 %231 +OpLine %3 62 5 +%233 = OpAccessChain %97 %200 %99 +%234 = OpLoad %11 %233 +OpLine %3 62 40 +OpLine %3 62 23 +%235 = OpFAdd %11 %234 %63 +OpLine %3 62 5 +%236 = OpAccessChain %97 %202 %99 +OpStore %236 %235 +OpLine %3 1 1 +%237 = OpLoad %12 %202 +%238 = OpCompositeExtract %4 %237 0 +OpStore %191 %238 +%239 = OpCompositeExtract %5 %237 1 +OpStore %192 %239 +%240 = OpCompositeExtract %6 %237 2 +OpStore %193 %240 +%241 = OpCompositeExtract %7 %237 3 +OpStore %194 %241 +%242 = OpCompositeExtract %8 %237 4 +OpStore %195 %242 +%243 = OpCompositeExtract %9 %237 5 +OpStore %196 %243 +%244 = OpCompositeExtract %10 %237 6 +OpStore %197 %244 +%245 = OpCompositeExtract %11 %237 7 +OpStore %198 %245 +OpReturn +OpFunctionEnd +%265 = OpFunction %2 None %55 +%246 = OpLabel +%267 = OpVariable %65 Function %268 +%249 = OpLoad %4 %248 +%251 = OpLoad %5 %250 +%253 = OpLoad %6 %252 +%255 = OpLoad %7 %254 +%257 = OpLoad %8 %256 +%259 = OpLoad %9 %258 +%261 = OpLoad %10 %260 +%263 = OpLoad %11 %262 +%247 = OpCompositeConstruct %12 %249 %251 %253 %255 %257 %259 %261 %263 +OpBranch %269 +%269 = OpLabel +OpLine %3 68 5 +OpStore %267 %247 +OpLine %3 69 5 +OpLine %3 69 5 +%270 = OpAccessChain %68 %267 %70 +OpStore %270 %266 +OpLine %3 70 12 +%271 = OpAccessChain %68 %267 %70 +%272 = OpLoad %4 %271 +OpStore %264 %272 +OpReturn +OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/spv/wgsl-f16-polyfill.spvasm b/naga/tests/out/spv/wgsl-f16-polyfill.spvasm new file mode 100644 index 00000000000..f3613b5f64c --- /dev/null +++ b/naga/tests/out/spv/wgsl-f16-polyfill.spvasm @@ -0,0 +1,675 @@ +; SPIR-V +; Version: 1.1 +; Generator: rspirv +; Bound: 294 +OpCapability Shader +OpCapability Float16 +OpCapability StorageBuffer16BitAccess +OpCapability UniformAndStorageBuffer16BitAccess +OpExtension "SPV_KHR_16bit_storage" +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %50 "test_direct" %14 %18 %20 %24 %26 %30 %32 %36 %38 %40 %41 %43 %44 %46 %47 %49 +OpEntryPoint Fragment %140 "test_struct" %112 %115 %117 %120 %122 %125 %127 %130 %132 %133 %134 %135 %136 %137 %138 %139 +OpEntryPoint Fragment %211 "test_copy_input" %183 %186 %188 %191 %193 %196 %198 %201 %203 %204 %205 %206 %207 %208 %209 %210 +OpEntryPoint Fragment %285 "test_return_partial" %264 %267 %269 %272 %274 %277 %279 %282 %284 +OpExecutionMode %50 OriginUpperLeft +OpExecutionMode %140 OriginUpperLeft +OpExecutionMode %211 OriginUpperLeft +OpExecutionMode %285 OriginUpperLeft +%3 = OpString "f16-polyfill.wgsl" +OpSource Unknown 0 %3 "enable f16; + +@fragment +fn test_direct( + @location(0) scalar_f16: f16, + @location(1) scalar_f32: f32, + @location(2) vec2_f16: vec2, + @location(3) vec2_f32: vec2, + @location(4) vec3_f16: vec3, + @location(5) vec3_f32: vec3, + @location(6) vec4_f16: vec4, + @location(7) vec4_f32: vec4, +) -> F16IO { + var output: F16IO; + output.scalar_f16 = scalar_f16 + 1.0h; + output.scalar_f32 = scalar_f32 + 1.0; + output.vec2_f16 = vec2_f16 + vec2(1.0h); + output.vec2_f32 = vec2_f32 + vec2(1.0); + output.vec3_f16 = vec3_f16 + vec3(1.0h); + output.vec3_f32 = vec3_f32 + vec3(1.0); + output.vec4_f16 = vec4_f16 + vec4(1.0h); + output.vec4_f32 = vec4_f32 + vec4(1.0); + return output; +} + +struct F16IO { + @location(0) scalar_f16: f16, + @location(1) scalar_f32: f32, + @location(2) vec2_f16: vec2, + @location(3) vec2_f32: vec2, + @location(4) vec3_f16: vec3, + @location(5) vec3_f32: vec3, + @location(6) vec4_f16: vec4, + @location(7) vec4_f32: vec4, +} + +@fragment +fn test_struct(input: F16IO) -> F16IO { + var output: F16IO; + output.scalar_f16 = input.scalar_f16 + 1.0h; + output.scalar_f32 = input.scalar_f32 + 1.0; + output.vec2_f16 = input.vec2_f16 + vec2(1.0h); + output.vec2_f32 = input.vec2_f32 + vec2(1.0); + output.vec3_f16 = input.vec3_f16 + vec3(1.0h); + output.vec3_f32 = input.vec3_f32 + vec3(1.0); + output.vec4_f16 = input.vec4_f16 + vec4(1.0h); + output.vec4_f32 = input.vec4_f32 + vec4(1.0); + return output; +} + +@fragment +fn test_copy_input(input_original: F16IO) -> F16IO { + var input = input_original; + var output: F16IO; + output.scalar_f16 = input.scalar_f16 + 1.0h; + output.scalar_f32 = input.scalar_f32 + 1.0; + output.vec2_f16 = input.vec2_f16 + vec2(1.0h); + output.vec2_f32 = input.vec2_f32 + vec2(1.0); + output.vec3_f16 = input.vec3_f16 + vec3(1.0h); + output.vec3_f32 = input.vec3_f32 + vec3(1.0); + output.vec4_f16 = input.vec4_f16 + vec4(1.0h); + output.vec4_f32 = input.vec4_f32 + vec4(1.0); + return output; +} + +@fragment +fn test_return_partial(input_original: F16IO) -> @location(0) f16 { + var input = input_original; + input.scalar_f16 = 0.0h; + return input.scalar_f16; +} +" +OpMemberName %12 0 "scalar_f16" +OpMemberName %12 1 "scalar_f32" +OpMemberName %12 2 "vec2_f16" +OpMemberName %12 3 "vec2_f32" +OpMemberName %12 4 "vec3_f16" +OpMemberName %12 5 "vec3_f32" +OpMemberName %12 6 "vec4_f16" +OpMemberName %12 7 "vec4_f32" +OpName %12 "F16IO" +OpName %14 "scalar_f16" +OpName %18 "scalar_f32" +OpName %20 "vec2_f16" +OpName %24 "vec2_f32" +OpName %26 "vec3_f16" +OpName %30 "vec3_f32" +OpName %32 "vec4_f16" +OpName %36 "vec4_f32" +OpName %38 "scalar_f16" +OpName %40 "scalar_f32" +OpName %41 "vec2_f16" +OpName %43 "vec2_f32" +OpName %44 "vec3_f16" +OpName %46 "vec3_f32" +OpName %47 "vec4_f16" +OpName %49 "vec4_f32" +OpName %50 "test_direct" +OpName %60 "output" +OpName %112 "scalar_f16" +OpName %115 "scalar_f32" +OpName %117 "vec2_f16" +OpName %120 "vec2_f32" +OpName %122 "vec3_f16" +OpName %125 "vec3_f32" +OpName %127 "vec4_f16" +OpName %130 "vec4_f32" +OpName %132 "scalar_f16" +OpName %133 "scalar_f32" +OpName %134 "vec2_f16" +OpName %135 "vec2_f32" +OpName %136 "vec3_f16" +OpName %137 "vec3_f32" +OpName %138 "vec4_f16" +OpName %139 "vec4_f32" +OpName %140 "test_struct" +OpName %141 "output" +OpName %183 "scalar_f16" +OpName %186 "scalar_f32" +OpName %188 "vec2_f16" +OpName %191 "vec2_f32" +OpName %193 "vec3_f16" +OpName %196 "vec3_f32" +OpName %198 "vec4_f16" +OpName %201 "vec4_f32" +OpName %203 "scalar_f16" +OpName %204 "scalar_f32" +OpName %205 "vec2_f16" +OpName %206 "vec2_f32" +OpName %207 "vec3_f16" +OpName %208 "vec3_f32" +OpName %209 "vec4_f16" +OpName %210 "vec4_f32" +OpName %211 "test_copy_input" +OpName %212 "input" +OpName %214 "output" +OpName %264 "scalar_f16" +OpName %267 "scalar_f32" +OpName %269 "vec2_f16" +OpName %272 "vec2_f32" +OpName %274 "vec3_f16" +OpName %277 "vec3_f32" +OpName %279 "vec4_f16" +OpName %282 "vec4_f32" +OpName %285 "test_return_partial" +OpName %287 "input" +OpMemberDecorate %12 0 Offset 0 +OpMemberDecorate %12 1 Offset 4 +OpMemberDecorate %12 2 Offset 8 +OpMemberDecorate %12 3 Offset 16 +OpMemberDecorate %12 4 Offset 24 +OpMemberDecorate %12 5 Offset 32 +OpMemberDecorate %12 6 Offset 48 +OpMemberDecorate %12 7 Offset 64 +OpDecorate %14 Location 0 +OpDecorate %18 Location 1 +OpDecorate %20 Location 2 +OpDecorate %24 Location 3 +OpDecorate %26 Location 4 +OpDecorate %30 Location 5 +OpDecorate %32 Location 6 +OpDecorate %36 Location 7 +OpDecorate %38 Location 0 +OpDecorate %40 Location 1 +OpDecorate %41 Location 2 +OpDecorate %43 Location 3 +OpDecorate %44 Location 4 +OpDecorate %46 Location 5 +OpDecorate %47 Location 6 +OpDecorate %49 Location 7 +OpDecorate %112 Location 0 +OpDecorate %115 Location 1 +OpDecorate %117 Location 2 +OpDecorate %120 Location 3 +OpDecorate %122 Location 4 +OpDecorate %125 Location 5 +OpDecorate %127 Location 6 +OpDecorate %130 Location 7 +OpDecorate %132 Location 0 +OpDecorate %133 Location 1 +OpDecorate %134 Location 2 +OpDecorate %135 Location 3 +OpDecorate %136 Location 4 +OpDecorate %137 Location 5 +OpDecorate %138 Location 6 +OpDecorate %139 Location 7 +OpDecorate %183 Location 0 +OpDecorate %186 Location 1 +OpDecorate %188 Location 2 +OpDecorate %191 Location 3 +OpDecorate %193 Location 4 +OpDecorate %196 Location 5 +OpDecorate %198 Location 6 +OpDecorate %201 Location 7 +OpDecorate %203 Location 0 +OpDecorate %204 Location 1 +OpDecorate %205 Location 2 +OpDecorate %206 Location 3 +OpDecorate %207 Location 4 +OpDecorate %208 Location 5 +OpDecorate %209 Location 6 +OpDecorate %210 Location 7 +OpDecorate %264 Location 0 +OpDecorate %267 Location 1 +OpDecorate %269 Location 2 +OpDecorate %272 Location 3 +OpDecorate %274 Location 4 +OpDecorate %277 Location 5 +OpDecorate %279 Location 6 +OpDecorate %282 Location 7 +OpDecorate %284 Location 0 +%2 = OpTypeVoid +%4 = OpTypeFloat 16 +%5 = OpTypeFloat 32 +%6 = OpTypeVector %4 2 +%7 = OpTypeVector %5 2 +%8 = OpTypeVector %4 3 +%9 = OpTypeVector %5 3 +%10 = OpTypeVector %4 4 +%11 = OpTypeVector %5 4 +%12 = OpTypeStruct %4 %5 %6 %7 %8 %9 %10 %11 +%15 = OpTypePointer Input %5 +%14 = OpVariable %15 Input +%18 = OpVariable %15 Input +%21 = OpTypePointer Input %7 +%20 = OpVariable %21 Input +%24 = OpVariable %21 Input +%27 = OpTypePointer Input %9 +%26 = OpVariable %27 Input +%30 = OpVariable %27 Input +%33 = OpTypePointer Input %11 +%32 = OpVariable %33 Input +%36 = OpVariable %33 Input +%39 = OpTypePointer Output %5 +%38 = OpVariable %39 Output +%40 = OpVariable %39 Output +%42 = OpTypePointer Output %7 +%41 = OpVariable %42 Output +%43 = OpVariable %42 Output +%45 = OpTypePointer Output %9 +%44 = OpVariable %45 Output +%46 = OpVariable %45 Output +%48 = OpTypePointer Output %11 +%47 = OpVariable %48 Output +%49 = OpVariable %48 Output +%51 = OpTypeFunction %2 +%52 = OpConstant %4 2.1524e-41 +%53 = OpConstant %5 1.0 +%54 = OpConstantComposite %6 %52 %52 +%55 = OpConstantComposite %7 %53 %53 +%56 = OpConstantComposite %8 %52 %52 %52 +%57 = OpConstantComposite %9 %53 %53 %53 +%58 = OpConstantComposite %10 %52 %52 %52 %52 +%59 = OpConstantComposite %11 %53 %53 %53 %53 +%61 = OpTypePointer Function %12 +%62 = OpConstantNull %12 +%64 = OpTypePointer Function %4 +%67 = OpTypeInt 32 0 +%66 = OpConstant %67 0 +%69 = OpTypePointer Function %5 +%71 = OpConstant %67 1 +%73 = OpTypePointer Function %6 +%75 = OpConstant %67 2 +%77 = OpTypePointer Function %7 +%79 = OpConstant %67 3 +%81 = OpTypePointer Function %8 +%83 = OpConstant %67 4 +%85 = OpTypePointer Function %9 +%87 = OpConstant %67 5 +%89 = OpTypePointer Function %10 +%91 = OpConstant %67 6 +%93 = OpTypePointer Function %11 +%95 = OpConstant %67 7 +%112 = OpVariable %15 Input +%115 = OpVariable %15 Input +%117 = OpVariable %21 Input +%120 = OpVariable %21 Input +%122 = OpVariable %27 Input +%125 = OpVariable %27 Input +%127 = OpVariable %33 Input +%130 = OpVariable %33 Input +%132 = OpVariable %39 Output +%133 = OpVariable %39 Output +%134 = OpVariable %42 Output +%135 = OpVariable %42 Output +%136 = OpVariable %45 Output +%137 = OpVariable %45 Output +%138 = OpVariable %48 Output +%139 = OpVariable %48 Output +%142 = OpConstantNull %12 +%183 = OpVariable %15 Input +%186 = OpVariable %15 Input +%188 = OpVariable %21 Input +%191 = OpVariable %21 Input +%193 = OpVariable %27 Input +%196 = OpVariable %27 Input +%198 = OpVariable %33 Input +%201 = OpVariable %33 Input +%203 = OpVariable %39 Output +%204 = OpVariable %39 Output +%205 = OpVariable %42 Output +%206 = OpVariable %42 Output +%207 = OpVariable %45 Output +%208 = OpVariable %45 Output +%209 = OpVariable %48 Output +%210 = OpVariable %48 Output +%213 = OpConstantNull %12 +%215 = OpConstantNull %12 +%264 = OpVariable %15 Input +%267 = OpVariable %15 Input +%269 = OpVariable %21 Input +%272 = OpVariable %21 Input +%274 = OpVariable %27 Input +%277 = OpVariable %27 Input +%279 = OpVariable %33 Input +%282 = OpVariable %33 Input +%284 = OpVariable %39 Output +%286 = OpConstant %4 0.0 +%288 = OpConstantNull %12 +%50 = OpFunction %2 None %51 +%13 = OpLabel +%60 = OpVariable %61 Function %62 +%16 = OpLoad %5 %14 +%17 = OpFConvert %4 %16 +%19 = OpLoad %5 %18 +%22 = OpLoad %7 %20 +%23 = OpFConvert %6 %22 +%25 = OpLoad %7 %24 +%28 = OpLoad %9 %26 +%29 = OpFConvert %8 %28 +%31 = OpLoad %9 %30 +%34 = OpLoad %11 %32 +%35 = OpFConvert %10 %34 +%37 = OpLoad %11 %36 +OpBranch %63 +%63 = OpLabel +OpLine %3 15 5 +OpLine %3 15 25 +%65 = OpFAdd %4 %17 %52 +OpLine %3 15 5 +%68 = OpAccessChain %64 %60 %66 +OpStore %68 %65 +OpLine %3 16 5 +OpLine %3 16 25 +%70 = OpFAdd %5 %19 %53 +OpLine %3 16 5 +%72 = OpAccessChain %69 %60 %71 +OpStore %72 %70 +OpLine %3 17 5 +OpLine %3 17 23 +%74 = OpFAdd %6 %23 %54 +OpLine %3 17 5 +%76 = OpAccessChain %73 %60 %75 +OpStore %76 %74 +OpLine %3 18 5 +OpLine %3 18 34 +OpLine %3 18 23 +%78 = OpFAdd %7 %25 %55 +OpLine %3 18 5 +%80 = OpAccessChain %77 %60 %79 +OpStore %80 %78 +OpLine %3 19 5 +OpLine %3 19 23 +%82 = OpFAdd %8 %29 %56 +OpLine %3 19 5 +%84 = OpAccessChain %81 %60 %83 +OpStore %84 %82 +OpLine %3 20 5 +OpLine %3 20 34 +OpLine %3 20 23 +%86 = OpFAdd %9 %31 %57 +OpLine %3 20 5 +%88 = OpAccessChain %85 %60 %87 +OpStore %88 %86 +OpLine %3 21 5 +OpLine %3 21 23 +%90 = OpFAdd %10 %35 %58 +OpLine %3 21 5 +%92 = OpAccessChain %89 %60 %91 +OpStore %92 %90 +OpLine %3 22 5 +OpLine %3 22 34 +OpLine %3 22 23 +%94 = OpFAdd %11 %37 %59 +OpLine %3 22 5 +%96 = OpAccessChain %93 %60 %95 +OpStore %96 %94 +OpLine %3 1 1 +%97 = OpLoad %12 %60 +%98 = OpCompositeExtract %4 %97 0 +%99 = OpFConvert %5 %98 +OpStore %38 %99 +%100 = OpCompositeExtract %5 %97 1 +OpStore %40 %100 +%101 = OpCompositeExtract %6 %97 2 +%102 = OpFConvert %7 %101 +OpStore %41 %102 +%103 = OpCompositeExtract %7 %97 3 +OpStore %43 %103 +%104 = OpCompositeExtract %8 %97 4 +%105 = OpFConvert %9 %104 +OpStore %44 %105 +%106 = OpCompositeExtract %9 %97 5 +OpStore %46 %106 +%107 = OpCompositeExtract %10 %97 6 +%108 = OpFConvert %11 %107 +OpStore %47 %108 +%109 = OpCompositeExtract %11 %97 7 +OpStore %49 %109 +OpReturn +OpFunctionEnd +%140 = OpFunction %2 None %51 +%110 = OpLabel +%141 = OpVariable %61 Function %142 +%113 = OpLoad %5 %112 +%114 = OpFConvert %4 %113 +%116 = OpLoad %5 %115 +%118 = OpLoad %7 %117 +%119 = OpFConvert %6 %118 +%121 = OpLoad %7 %120 +%123 = OpLoad %9 %122 +%124 = OpFConvert %8 %123 +%126 = OpLoad %9 %125 +%128 = OpLoad %11 %127 +%129 = OpFConvert %10 %128 +%131 = OpLoad %11 %130 +%111 = OpCompositeConstruct %12 %114 %116 %119 %121 %124 %126 %129 %131 +OpBranch %143 +%143 = OpLabel +OpLine %3 40 5 +%144 = OpCompositeExtract %4 %111 0 +OpLine %3 40 25 +%145 = OpFAdd %4 %144 %52 +OpLine %3 40 5 +%146 = OpAccessChain %64 %141 %66 +OpStore %146 %145 +OpLine %3 41 5 +%147 = OpCompositeExtract %5 %111 1 +OpLine %3 41 25 +%148 = OpFAdd %5 %147 %53 +OpLine %3 41 5 +%149 = OpAccessChain %69 %141 %71 +OpStore %149 %148 +OpLine %3 42 5 +%150 = OpCompositeExtract %6 %111 2 +OpLine %3 42 23 +%151 = OpFAdd %6 %150 %54 +OpLine %3 42 5 +%152 = OpAccessChain %73 %141 %75 +OpStore %152 %151 +OpLine %3 43 5 +%153 = OpCompositeExtract %7 %111 3 +OpLine %3 43 40 +OpLine %3 43 23 +%154 = OpFAdd %7 %153 %55 +OpLine %3 43 5 +%155 = OpAccessChain %77 %141 %79 +OpStore %155 %154 +OpLine %3 44 5 +%156 = OpCompositeExtract %8 %111 4 +OpLine %3 44 23 +%157 = OpFAdd %8 %156 %56 +OpLine %3 44 5 +%158 = OpAccessChain %81 %141 %83 +OpStore %158 %157 +OpLine %3 45 5 +%159 = OpCompositeExtract %9 %111 5 +OpLine %3 45 40 +OpLine %3 45 23 +%160 = OpFAdd %9 %159 %57 +OpLine %3 45 5 +%161 = OpAccessChain %85 %141 %87 +OpStore %161 %160 +OpLine %3 46 5 +%162 = OpCompositeExtract %10 %111 6 +OpLine %3 46 23 +%163 = OpFAdd %10 %162 %58 +OpLine %3 46 5 +%164 = OpAccessChain %89 %141 %91 +OpStore %164 %163 +OpLine %3 47 5 +%165 = OpCompositeExtract %11 %111 7 +OpLine %3 47 40 +OpLine %3 47 23 +%166 = OpFAdd %11 %165 %59 +OpLine %3 47 5 +%167 = OpAccessChain %93 %141 %95 +OpStore %167 %166 +OpLine %3 1 1 +%168 = OpLoad %12 %141 +%169 = OpCompositeExtract %4 %168 0 +%170 = OpFConvert %5 %169 +OpStore %132 %170 +%171 = OpCompositeExtract %5 %168 1 +OpStore %133 %171 +%172 = OpCompositeExtract %6 %168 2 +%173 = OpFConvert %7 %172 +OpStore %134 %173 +%174 = OpCompositeExtract %7 %168 3 +OpStore %135 %174 +%175 = OpCompositeExtract %8 %168 4 +%176 = OpFConvert %9 %175 +OpStore %136 %176 +%177 = OpCompositeExtract %9 %168 5 +OpStore %137 %177 +%178 = OpCompositeExtract %10 %168 6 +%179 = OpFConvert %11 %178 +OpStore %138 %179 +%180 = OpCompositeExtract %11 %168 7 +OpStore %139 %180 +OpReturn +OpFunctionEnd +%211 = OpFunction %2 None %51 +%181 = OpLabel +%212 = OpVariable %61 Function %213 +%214 = OpVariable %61 Function %215 +%184 = OpLoad %5 %183 +%185 = OpFConvert %4 %184 +%187 = OpLoad %5 %186 +%189 = OpLoad %7 %188 +%190 = OpFConvert %6 %189 +%192 = OpLoad %7 %191 +%194 = OpLoad %9 %193 +%195 = OpFConvert %8 %194 +%197 = OpLoad %9 %196 +%199 = OpLoad %11 %198 +%200 = OpFConvert %10 %199 +%202 = OpLoad %11 %201 +%182 = OpCompositeConstruct %12 %185 %187 %190 %192 %195 %197 %200 %202 +OpBranch %216 +%216 = OpLabel +OpLine %3 53 5 +OpStore %212 %182 +OpLine %3 55 5 +%217 = OpAccessChain %64 %212 %66 +%218 = OpLoad %4 %217 +OpLine %3 55 25 +%219 = OpFAdd %4 %218 %52 +OpLine %3 55 5 +%220 = OpAccessChain %64 %214 %66 +OpStore %220 %219 +OpLine %3 56 5 +%221 = OpAccessChain %69 %212 %71 +%222 = OpLoad %5 %221 +OpLine %3 56 25 +%223 = OpFAdd %5 %222 %53 +OpLine %3 56 5 +%224 = OpAccessChain %69 %214 %71 +OpStore %224 %223 +OpLine %3 57 5 +%225 = OpAccessChain %73 %212 %75 +%226 = OpLoad %6 %225 +OpLine %3 57 23 +%227 = OpFAdd %6 %226 %54 +OpLine %3 57 5 +%228 = OpAccessChain %73 %214 %75 +OpStore %228 %227 +OpLine %3 58 5 +%229 = OpAccessChain %77 %212 %79 +%230 = OpLoad %7 %229 +OpLine %3 58 40 +OpLine %3 58 23 +%231 = OpFAdd %7 %230 %55 +OpLine %3 58 5 +%232 = OpAccessChain %77 %214 %79 +OpStore %232 %231 +OpLine %3 59 5 +%233 = OpAccessChain %81 %212 %83 +%234 = OpLoad %8 %233 +OpLine %3 59 23 +%235 = OpFAdd %8 %234 %56 +OpLine %3 59 5 +%236 = OpAccessChain %81 %214 %83 +OpStore %236 %235 +OpLine %3 60 5 +%237 = OpAccessChain %85 %212 %87 +%238 = OpLoad %9 %237 +OpLine %3 60 40 +OpLine %3 60 23 +%239 = OpFAdd %9 %238 %57 +OpLine %3 60 5 +%240 = OpAccessChain %85 %214 %87 +OpStore %240 %239 +OpLine %3 61 5 +%241 = OpAccessChain %89 %212 %91 +%242 = OpLoad %10 %241 +OpLine %3 61 23 +%243 = OpFAdd %10 %242 %58 +OpLine %3 61 5 +%244 = OpAccessChain %89 %214 %91 +OpStore %244 %243 +OpLine %3 62 5 +%245 = OpAccessChain %93 %212 %95 +%246 = OpLoad %11 %245 +OpLine %3 62 40 +OpLine %3 62 23 +%247 = OpFAdd %11 %246 %59 +OpLine %3 62 5 +%248 = OpAccessChain %93 %214 %95 +OpStore %248 %247 +OpLine %3 1 1 +%249 = OpLoad %12 %214 +%250 = OpCompositeExtract %4 %249 0 +%251 = OpFConvert %5 %250 +OpStore %203 %251 +%252 = OpCompositeExtract %5 %249 1 +OpStore %204 %252 +%253 = OpCompositeExtract %6 %249 2 +%254 = OpFConvert %7 %253 +OpStore %205 %254 +%255 = OpCompositeExtract %7 %249 3 +OpStore %206 %255 +%256 = OpCompositeExtract %8 %249 4 +%257 = OpFConvert %9 %256 +OpStore %207 %257 +%258 = OpCompositeExtract %9 %249 5 +OpStore %208 %258 +%259 = OpCompositeExtract %10 %249 6 +%260 = OpFConvert %11 %259 +OpStore %209 %260 +%261 = OpCompositeExtract %11 %249 7 +OpStore %210 %261 +OpReturn +OpFunctionEnd +%285 = OpFunction %2 None %51 +%262 = OpLabel +%287 = OpVariable %61 Function %288 +%265 = OpLoad %5 %264 +%266 = OpFConvert %4 %265 +%268 = OpLoad %5 %267 +%270 = OpLoad %7 %269 +%271 = OpFConvert %6 %270 +%273 = OpLoad %7 %272 +%275 = OpLoad %9 %274 +%276 = OpFConvert %8 %275 +%278 = OpLoad %9 %277 +%280 = OpLoad %11 %279 +%281 = OpFConvert %10 %280 +%283 = OpLoad %11 %282 +%263 = OpCompositeConstruct %12 %266 %268 %271 %273 %276 %278 %281 %283 +OpBranch %289 +%289 = OpLabel +OpLine %3 68 5 +OpStore %287 %263 +OpLine %3 69 5 +OpLine %3 69 5 +%290 = OpAccessChain %64 %287 %66 +OpStore %290 %286 +OpLine %3 70 12 +%291 = OpAccessChain %64 %287 %66 +%292 = OpLoad %4 %291 +%293 = OpFConvert %5 %292 +OpStore %284 %293 +OpReturn +OpFunctionEnd \ No newline at end of file diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs index b429f2314dc..6667c71561e 100644 --- a/wgpu-hal/src/vulkan/adapter.rs +++ b/wgpu-hal/src/vulkan/adapter.rs @@ -226,7 +226,7 @@ impl PhysicalDeviceFeatures { /// [`Adapter::required_device_extensions`]: super::Adapter::required_device_extensions fn from_extensions_and_requested_features( phd_capabilities: &PhysicalDeviceProperties, - _phd_features: &PhysicalDeviceFeatures, + phd_features: &PhysicalDeviceFeatures, enabled_extensions: &[&'static CStr], requested_features: wgt::Features, downlevel_flags: wgt::DownlevelFlags, @@ -396,10 +396,17 @@ impl PhysicalDeviceFeatures { _ => None, }, _16bit_storage: if requested_features.contains(wgt::Features::SHADER_F16) { + // Check if the device actually supports storage_input_output16 + let storage_input_output16_supported = phd_features + ._16bit_storage + .as_ref() + .map(|features| features.storage_input_output16 != 0) + .unwrap_or(false); + Some( vk::PhysicalDevice16BitStorageFeatures::default() .storage_buffer16_bit_access(true) - .storage_input_output16(true) + .storage_input_output16(storage_input_output16_supported) .uniform_and_storage_buffer16_bit_access(true), ) } else { @@ -736,12 +743,12 @@ impl PhysicalDeviceFeatures { if let (Some(ref f16_i8), Some(ref bit16)) = (self.shader_float16_int8, self._16bit_storage) { + // Note storage_input_output16 is not required, we polyfill f16 I/O using f32 types when this capability is not available features.set( F::SHADER_F16, f16_i8.shader_float16 != 0 && bit16.storage_buffer16_bit_access != 0 - && bit16.uniform_and_storage_buffer16_bit_access != 0 - && bit16.storage_input_output16 != 0, + && bit16.uniform_and_storage_buffer16_bit_access != 0, ); } @@ -2115,6 +2122,15 @@ impl super::Adapter { spv::ZeroInitializeWorkgroupMemoryMode::Polyfill }, force_loop_bounding: true, + use_storage_input_output_16: features.contains(wgt::Features::SHADER_F16) && { + // Check if the device actually supports storage_input_output16 + let phd_features = self.physical_device_features(enabled_extensions, features); + phd_features + ._16bit_storage + .as_ref() + .map(|storage_features| storage_features.storage_input_output16 != 0) + .unwrap_or(false) + }, // We need to build this separately for each invocation, so just default it out here binding_map: BTreeMap::default(), debug_info: None, From 5694706a01d7d4ac834d0c18dd48cb893590ecad Mon Sep 17 00:00:00 2001 From: cryvosh Date: Sat, 5 Jul 2025 08:59:34 -0400 Subject: [PATCH 04/20] Cleanup --- naga/src/back/spv/polyfill.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/naga/src/back/spv/polyfill.rs b/naga/src/back/spv/polyfill.rs index 9c50444620d..abcdb2092f2 100644 --- a/naga/src/back/spv/polyfill.rs +++ b/naga/src/back/spv/polyfill.rs @@ -9,7 +9,7 @@ It works by: */ use crate::back::spv::{Instruction, LocalType, NumericType, Word}; -use std::vec::Vec; +use alloc::vec::Vec; /// Manages f16 I/O polyfill state and operations. #[derive(Default)] @@ -94,10 +94,6 @@ impl F16IoPolyfill { _ => None, } } - - pub fn clear(&mut self) { - self.variable_map.clear(); - } } impl crate::back::spv::recyclable::Recyclable for F16IoPolyfill { From 1fc98ffccf0a527bce6db239bd66c8f680f337f2 Mon Sep 17 00:00:00 2001 From: cryvosh Date: Sat, 5 Jul 2025 09:23:34 -0400 Subject: [PATCH 05/20] Rename file --- naga/src/back/spv/block.rs | 4 ++-- naga/src/back/spv/{polyfill.rs => f16_polyfill.rs} | 0 naga/src/back/spv/mod.rs | 4 ++-- naga/src/back/spv/writer.rs | 8 ++++---- 4 files changed, 8 insertions(+), 8 deletions(-) rename naga/src/back/spv/{polyfill.rs => f16_polyfill.rs} (100%) diff --git a/naga/src/back/spv/block.rs b/naga/src/back/spv/block.rs index 5de6e91c616..5114b348546 100644 --- a/naga/src/back/spv/block.rs +++ b/naga/src/back/spv/block.rs @@ -239,7 +239,7 @@ impl Writer { if let Some((f32_ty, _)) = self.io_f16_polyfills.get_polyfill_info(res_member.id) { let converted = self.id_gen.next(); - super::polyfill::F16IoPolyfill::emit_f16_to_f32_conversion( + super::f16_polyfill::F16IoPolyfill::emit_f16_to_f32_conversion( member_value_id, f32_ty, converted, @@ -2332,7 +2332,7 @@ impl BlockContext<'_> { .body .push(Instruction::load(f32_ty, id, pointer_id, None)); let converted = self.gen_id(); - super::polyfill::F16IoPolyfill::emit_f32_to_f16_conversion( + super::f16_polyfill::F16IoPolyfill::emit_f32_to_f16_conversion( id, result_type_id, converted, diff --git a/naga/src/back/spv/polyfill.rs b/naga/src/back/spv/f16_polyfill.rs similarity index 100% rename from naga/src/back/spv/polyfill.rs rename to naga/src/back/spv/f16_polyfill.rs diff --git a/naga/src/back/spv/mod.rs b/naga/src/back/spv/mod.rs index 5a37044fe47..8ac85978fbc 100644 --- a/naga/src/back/spv/mod.rs +++ b/naga/src/back/spv/mod.rs @@ -10,7 +10,7 @@ mod image; mod index; mod instructions; mod layout; -mod polyfill; +mod f16_polyfill; mod ray; mod recyclable; mod selection; @@ -775,7 +775,7 @@ pub struct Writer { /// F16 I/O polyfill manager for handling f16 input/output variables /// when StorageInputOutput16 capability is not available. - io_f16_polyfills: polyfill::F16IoPolyfill, + io_f16_polyfills: f16_polyfill::F16IoPolyfill, } bitflags::bitflags! { diff --git a/naga/src/back/spv/writer.rs b/naga/src/back/spv/writer.rs index b9c530fbf8f..510f841dc61 100644 --- a/naga/src/back/spv/writer.rs +++ b/naga/src/back/spv/writer.rs @@ -93,7 +93,7 @@ impl Writer { temp_list: Vec::new(), ray_get_committed_intersection_function: None, ray_get_candidate_intersection_function: None, - io_f16_polyfills: super::polyfill::F16IoPolyfill::new( + io_f16_polyfills: super::f16_polyfill::F16IoPolyfill::new( options.use_storage_input_output_16, ), }) @@ -739,7 +739,7 @@ impl Writer { .body .push(Instruction::load(f32_ty, id, varying_id, None)); let converted = self.id_gen.next(); - super::polyfill::F16IoPolyfill::emit_f32_to_f16_conversion( + super::f16_polyfill::F16IoPolyfill::emit_f32_to_f16_conversion( id, argument_type_id, converted, @@ -786,7 +786,7 @@ impl Writer { .body .push(Instruction::load(f32_ty, id, varying_id, None)); let converted = self.id_gen.next(); - super::polyfill::F16IoPolyfill::emit_f32_to_f16_conversion( + super::f16_polyfill::F16IoPolyfill::emit_f32_to_f16_conversion( id, type_id, converted, @@ -1953,7 +1953,7 @@ impl Writer { let needs_polyfill = self.needs_f16_polyfill(ty_inner); let pointer_type_id = if needs_polyfill { - let f32_value_local = super::polyfill::F16IoPolyfill::create_polyfill_type(ty_inner) + let f32_value_local = super::f16_polyfill::F16IoPolyfill::create_polyfill_type(ty_inner) .expect("needs_polyfill returned true but create_polyfill_type returned None"); let f32_type_id = self.get_localtype_id(f32_value_local); From 5201714279937bb129b28d131e4d97ed3e5701ad Mon Sep 17 00:00:00 2001 From: cryvosh Date: Sat, 5 Jul 2025 09:24:47 -0400 Subject: [PATCH 06/20] fmt --- naga/src/back/spv/mod.rs | 2 +- naga/src/back/spv/writer.rs | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/naga/src/back/spv/mod.rs b/naga/src/back/spv/mod.rs index 8ac85978fbc..87d201ef212 100644 --- a/naga/src/back/spv/mod.rs +++ b/naga/src/back/spv/mod.rs @@ -5,12 +5,12 @@ Backend for [SPIR-V][spv] (Standard Portable Intermediate Representation). */ mod block; +mod f16_polyfill; mod helpers; mod image; mod index; mod instructions; mod layout; -mod f16_polyfill; mod ray; mod recyclable; mod selection; diff --git a/naga/src/back/spv/writer.rs b/naga/src/back/spv/writer.rs index 510f841dc61..53ac4051853 100644 --- a/naga/src/back/spv/writer.rs +++ b/naga/src/back/spv/writer.rs @@ -1953,8 +1953,9 @@ impl Writer { let needs_polyfill = self.needs_f16_polyfill(ty_inner); let pointer_type_id = if needs_polyfill { - let f32_value_local = super::f16_polyfill::F16IoPolyfill::create_polyfill_type(ty_inner) - .expect("needs_polyfill returned true but create_polyfill_type returned None"); + let f32_value_local = + super::f16_polyfill::F16IoPolyfill::create_polyfill_type(ty_inner) + .expect("needs_polyfill returned true but create_polyfill_type returned None"); let f32_type_id = self.get_localtype_id(f32_value_local); let ptr_id = self.get_pointer_type_id(f32_type_id, class); From 44c88df7cf83fa0ccd1acdbbb1c9696c077528ec Mon Sep 17 00:00:00 2001 From: cryvosh Date: Sat, 5 Jul 2025 14:22:41 -0400 Subject: [PATCH 07/20] Changelog entry --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 946473f8127..07a87b7c3ed 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -50,6 +50,7 @@ Bottom level categories: - Diagnostic rendering methods (i.e., `naga::{front::wgsl::ParseError,WithSpan}::emit_error_to_string_with_path`) now accept more types for their `path` argument via a new sealed `AsDiagnosticFilePath` trait. By @atlv24, @bushrat011899, and @ErichDonGubler in [#7643](https://github.com/gfx-rs/wgpu/pull/7643). - Add support for [quad operations](https://www.w3.org/TR/WGSL/#quad-builtin-functions) (requires `SUBGROUP` feature to be enabled). By @dzamkov and @valaphee in [#7683](https://github.com/gfx-rs/wgpu/pull/7683). - Add support for `atomicCompareExchangeWeak` in HLSL and GLSL backends. By @cryvosh in [#7658](https://github.com/gfx-rs/wgpu/pull/7658) +- Add f16 IO polyfill on Vulkan backend to enable SHADER_F16 use without requiring `storageInputOutput16`. By @cryvosh in [#7884](https://github.com/gfx-rs/wgpu/pull/7884) #### General From 2151560851632082426b99c1d4d9a574b53e1614 Mon Sep 17 00:00:00 2001 From: cryvosh Date: Sat, 5 Jul 2025 14:22:41 -0400 Subject: [PATCH 08/20] [naga spv-out] Add f16 io polyfill --- CHANGELOG.md | 1 + naga/src/back/spv/block.rs | 30 +- naga/src/back/spv/f16_polyfill.rs | 104 +++ naga/src/back/spv/mod.rs | 11 + naga/src/back/spv/writer.rs | 93 ++- naga/tests/in/wgsl/f16-native.toml | 13 + naga/tests/in/wgsl/f16-native.wgsl | 71 ++ naga/tests/in/wgsl/f16-polyfill.toml | 13 + naga/tests/in/wgsl/f16-polyfill.wgsl | 71 ++ naga/tests/naga/snapshots.rs | 21 +- naga/tests/naga/spirv_capabilities.rs | 135 ++++ naga/tests/out/spv/wgsl-f16-native.spvasm | 655 +++++++++++++++++++ naga/tests/out/spv/wgsl-f16-polyfill.spvasm | 675 ++++++++++++++++++++ wgpu-hal/src/vulkan/adapter.rs | 24 +- 14 files changed, 1898 insertions(+), 19 deletions(-) create mode 100644 naga/src/back/spv/f16_polyfill.rs create mode 100644 naga/tests/in/wgsl/f16-native.toml create mode 100644 naga/tests/in/wgsl/f16-native.wgsl create mode 100644 naga/tests/in/wgsl/f16-polyfill.toml create mode 100644 naga/tests/in/wgsl/f16-polyfill.wgsl create mode 100644 naga/tests/out/spv/wgsl-f16-native.spvasm create mode 100644 naga/tests/out/spv/wgsl-f16-polyfill.spvasm diff --git a/CHANGELOG.md b/CHANGELOG.md index c27699365fb..febf490551b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -130,6 +130,7 @@ By @Vecvec in [#7829](https://github.com/gfx-rs/wgpu/pull/7829). - Diagnostic rendering methods (i.e., `naga::{front::wgsl::ParseError,WithSpan}::emit_error_to_string_with_path`) now accept more types for their `path` argument via a new sealed `AsDiagnosticFilePath` trait. By @atlv24, @bushrat011899, and @ErichDonGubler in [#7643](https://github.com/gfx-rs/wgpu/pull/7643). - Add support for [quad operations](https://www.w3.org/TR/WGSL/#quad-builtin-functions) (requires `SUBGROUP` feature to be enabled). By @dzamkov and @valaphee in [#7683](https://github.com/gfx-rs/wgpu/pull/7683). - Add support for `atomicCompareExchangeWeak` in HLSL and GLSL backends. By @cryvosh in [#7658](https://github.com/gfx-rs/wgpu/pull/7658) +- Add f16 IO polyfill on Vulkan backend to enable SHADER_F16 use without requiring `storageInputOutput16`. By @cryvosh in [#7884](https://github.com/gfx-rs/wgpu/pull/7884) ### General diff --git a/naga/src/back/spv/block.rs b/naga/src/back/spv/block.rs index 7ec659e1d90..5114b348546 100644 --- a/naga/src/back/spv/block.rs +++ b/naga/src/back/spv/block.rs @@ -237,7 +237,18 @@ impl Writer { } }; - body.push(Instruction::store(res_member.id, member_value_id, None)); + if let Some((f32_ty, _)) = self.io_f16_polyfills.get_polyfill_info(res_member.id) { + let converted = self.id_gen.next(); + super::f16_polyfill::F16IoPolyfill::emit_f16_to_f32_conversion( + member_value_id, + f32_ty, + converted, + body, + ); + body.push(Instruction::store(res_member.id, converted, None)); + } else { + body.push(Instruction::store(res_member.id, member_value_id, None)); + } match res_member.built_in { Some(crate::BuiltIn::Position { .. }) @@ -2313,6 +2324,23 @@ impl BlockContext<'_> { match self.write_access_chain(pointer, block, access_type_adjustment)? { ExpressionPointer::Ready { pointer_id } => { let id = self.gen_id(); + + if let Some((f32_ty, _)) = + self.writer.io_f16_polyfills.get_polyfill_info(pointer_id) + { + block + .body + .push(Instruction::load(f32_ty, id, pointer_id, None)); + let converted = self.gen_id(); + super::f16_polyfill::F16IoPolyfill::emit_f32_to_f16_conversion( + id, + result_type_id, + converted, + &mut block.body, + ); + return Ok(converted); + } + let atomic_space = match *self.fun_info[pointer].ty.inner_with(&self.ir_module.types) { crate::TypeInner::Pointer { base, space } => { diff --git a/naga/src/back/spv/f16_polyfill.rs b/naga/src/back/spv/f16_polyfill.rs new file mode 100644 index 00000000000..abcdb2092f2 --- /dev/null +++ b/naga/src/back/spv/f16_polyfill.rs @@ -0,0 +1,104 @@ +/*! +This module provides functionality polyfills f16 input/output variables +when the StorageInputOutput16 capability is not available or disabled. + +It works by: +1. Declaring f16 I/O variables as f32 in SPIR-V +2. Converting between f16 and f32 at runtime using OpFConvert +3. Maintaining mappings to track which variables need conversion +*/ + +use crate::back::spv::{Instruction, LocalType, NumericType, Word}; +use alloc::vec::Vec; + +/// Manages f16 I/O polyfill state and operations. +#[derive(Default)] +pub(super) struct F16IoPolyfill { + use_native: bool, + variable_map: crate::FastHashMap, +} + +impl F16IoPolyfill { + pub fn new(use_storage_input_output_16: bool) -> Self { + Self { + use_native: use_storage_input_output_16, + variable_map: crate::FastHashMap::default(), + } + } + + pub fn needs_polyfill(&self, ty_inner: &crate::TypeInner) -> bool { + use crate::{ScalarKind as Sk, TypeInner}; + + !self.use_native + && match *ty_inner { + TypeInner::Scalar(ref s) if s.kind == Sk::Float && s.width == 2 => true, + TypeInner::Vector { scalar, .. } + if scalar.kind == Sk::Float && scalar.width == 2 => + { + true + } + _ => false, + } + } + + pub fn register_variable(&mut self, variable_id: Word, f32_type_id: Word, f16_type_id: Word) { + self.variable_map + .insert(variable_id, (f32_type_id, f16_type_id)); + } + + pub fn get_polyfill_info(&self, variable_id: Word) -> Option<(Word, Word)> { + self.variable_map.get(&variable_id).copied() + } + + pub fn emit_f16_to_f32_conversion( + f16_value_id: Word, + f32_type_id: Word, + converted_id: Word, + body: &mut Vec, + ) { + body.push(Instruction::unary( + spirv::Op::FConvert, + f32_type_id, + converted_id, + f16_value_id, + )); + } + + pub fn emit_f32_to_f16_conversion( + f32_value_id: Word, + f16_type_id: Word, + converted_id: Word, + body: &mut Vec, + ) { + body.push(Instruction::unary( + spirv::Op::FConvert, + f16_type_id, + converted_id, + f32_value_id, + )); + } + + pub fn create_polyfill_type(ty_inner: &crate::TypeInner) -> Option { + use crate::{ScalarKind as Sk, TypeInner}; + + match *ty_inner { + TypeInner::Scalar(ref s) if s.kind == Sk::Float && s.width == 2 => { + Some(LocalType::Numeric(NumericType::Scalar(crate::Scalar::F32))) + } + TypeInner::Vector { size, scalar } if scalar.kind == Sk::Float && scalar.width == 2 => { + Some(LocalType::Numeric(NumericType::Vector { + size, + scalar: crate::Scalar::F32, + })) + } + _ => None, + } + } +} + +impl crate::back::spv::recyclable::Recyclable for F16IoPolyfill { + fn recycle(mut self) -> Self { + self.variable_map = self.variable_map.recycle(); + self + } +} diff --git a/naga/src/back/spv/mod.rs b/naga/src/back/spv/mod.rs index 2dcd95957d7..87d201ef212 100644 --- a/naga/src/back/spv/mod.rs +++ b/naga/src/back/spv/mod.rs @@ -5,6 +5,7 @@ Backend for [SPIR-V][spv] (Standard Portable Intermediate Representation). */ mod block; +mod f16_polyfill; mod helpers; mod image; mod index; @@ -744,6 +745,7 @@ pub struct Writer { bounds_check_policies: BoundsCheckPolicies, zero_initialize_workgroup_memory: ZeroInitializeWorkgroupMemoryMode, force_loop_bounding: bool, + use_storage_input_output_16: bool, void_type: Word, //TODO: convert most of these into vectors, addressable by handle indices lookup_type: crate::FastHashMap, @@ -770,6 +772,10 @@ pub struct Writer { ray_get_committed_intersection_function: Option, ray_get_candidate_intersection_function: Option, + + /// F16 I/O polyfill manager for handling f16 input/output variables + /// when StorageInputOutput16 capability is not available. + io_f16_polyfills: f16_polyfill::F16IoPolyfill, } bitflags::bitflags! { @@ -852,6 +858,10 @@ pub struct Options<'a> { /// to think the number of iterations is bounded. pub force_loop_bounding: bool, + /// Whether to use the StorageInputOutput16 capability for f16 shader I/O. + /// When false, f16 I/O is polyfilled using f32 types with conversions. + pub use_storage_input_output_16: bool, + pub debug_info: Option>, } @@ -871,6 +881,7 @@ impl Default for Options<'_> { bounds_check_policies: BoundsCheckPolicies::default(), zero_initialize_workgroup_memory: ZeroInitializeWorkgroupMemoryMode::Polyfill, force_loop_bounding: true, + use_storage_input_output_16: true, debug_info: None, } } diff --git a/naga/src/back/spv/writer.rs b/naga/src/back/spv/writer.rs index b61747c8326..53ac4051853 100644 --- a/naga/src/back/spv/writer.rs +++ b/naga/src/back/spv/writer.rs @@ -78,6 +78,7 @@ impl Writer { bounds_check_policies: options.bounds_check_policies, zero_initialize_workgroup_memory: options.zero_initialize_workgroup_memory, force_loop_bounding: options.force_loop_bounding, + use_storage_input_output_16: options.use_storage_input_output_16, void_type, lookup_type: crate::FastHashMap::default(), lookup_function: crate::FastHashMap::default(), @@ -92,6 +93,9 @@ impl Writer { temp_list: Vec::new(), ray_get_committed_intersection_function: None, ray_get_candidate_intersection_function: None, + io_f16_polyfills: super::f16_polyfill::F16IoPolyfill::new( + options.use_storage_input_output_16, + ), }) } @@ -125,6 +129,7 @@ impl Writer { bounds_check_policies: self.bounds_check_policies, zero_initialize_workgroup_memory: self.zero_initialize_workgroup_memory, force_loop_bounding: self.force_loop_bounding, + use_storage_input_output_16: self.use_storage_input_output_16, capabilities_available: take(&mut self.capabilities_available), binding_map: take(&mut self.binding_map), @@ -151,6 +156,7 @@ impl Writer { temp_list: take(&mut self.temp_list).recycle(), ray_get_candidate_intersection_function: None, ray_get_committed_intersection_function: None, + io_f16_polyfills: take(&mut self.io_f16_polyfills).recycle(), }; *self = fresh; @@ -726,10 +732,28 @@ impl Writer { binding, )?; iface.varying_ids.push(varying_id); - let id = self.id_gen.next(); - prelude - .body - .push(Instruction::load(argument_type_id, id, varying_id, None)); + let mut id = self.id_gen.next(); + + if let Some((f32_ty, _)) = self.io_f16_polyfills.get_polyfill_info(varying_id) { + prelude + .body + .push(Instruction::load(f32_ty, id, varying_id, None)); + let converted = self.id_gen.next(); + super::f16_polyfill::F16IoPolyfill::emit_f32_to_f16_conversion( + id, + argument_type_id, + converted, + &mut prelude.body, + ); + id = converted; + } else { + prelude.body.push(Instruction::load( + argument_type_id, + id, + varying_id, + None, + )); + } if binding == &crate::Binding::BuiltIn(crate::BuiltIn::LocalInvocationId) { local_invocation_id = Some(id); @@ -755,10 +779,26 @@ impl Writer { )?; iface.varying_ids.push(varying_id); let id = self.id_gen.next(); - prelude - .body - .push(Instruction::load(type_id, id, varying_id, None)); - constituent_ids.push(id); + if let Some((f32_ty, _)) = + self.io_f16_polyfills.get_polyfill_info(varying_id) + { + prelude + .body + .push(Instruction::load(f32_ty, id, varying_id, None)); + let converted = self.id_gen.next(); + super::f16_polyfill::F16IoPolyfill::emit_f32_to_f16_conversion( + id, + type_id, + converted, + &mut prelude.body, + ); + constituent_ids.push(converted); + } else { + prelude + .body + .push(Instruction::load(type_id, id, varying_id, None)); + constituent_ids.push(id); + } if binding == &crate::Binding::BuiltIn(crate::BuiltIn::GlobalInvocationId) { local_invocation_id = Some(id); @@ -1220,8 +1260,10 @@ impl Writer { .insert(spirv::Capability::StorageBuffer16BitAccess); self.capabilities_used .insert(spirv::Capability::UniformAndStorageBuffer16BitAccess); - self.capabilities_used - .insert(spirv::Capability::StorageInputOutput16); + if self.use_storage_input_output_16 { + self.capabilities_used + .insert(spirv::Capability::StorageInputOutput16); + } } Instruction::type_float(id, bits) } @@ -1904,8 +1946,28 @@ impl Writer { ty: Handle, binding: &crate::Binding, ) -> Result { + use crate::TypeInner; + let id = self.id_gen.next(); - let pointer_type_id = self.get_handle_pointer_type_id(ty, class); + let ty_inner = &ir_module.types[ty].inner; + let needs_polyfill = self.needs_f16_polyfill(ty_inner); + + let pointer_type_id = if needs_polyfill { + let f32_value_local = + super::f16_polyfill::F16IoPolyfill::create_polyfill_type(ty_inner) + .expect("needs_polyfill returned true but create_polyfill_type returned None"); + + let f32_type_id = self.get_localtype_id(f32_value_local); + let ptr_id = self.get_pointer_type_id(f32_type_id, class); + let f16_type_id = self.get_handle_type_id(ty); + self.io_f16_polyfills + .register_variable(id, f32_type_id, f16_type_id); + + ptr_id + } else { + self.get_handle_pointer_type_id(ty, class) + }; + Instruction::variable(pointer_type_id, id, class, None) .to_words(&mut self.logical_layout.declarations); @@ -2088,8 +2150,9 @@ impl Writer { // > shader, must be decorated Flat if class == spirv::StorageClass::Input && stage == crate::ShaderStage::Fragment { let is_flat = match ir_module.types[ty].inner { - crate::TypeInner::Scalar(scalar) - | crate::TypeInner::Vector { scalar, .. } => match scalar.kind { + TypeInner::Scalar(scalar) | TypeInner::Vector { scalar, .. } => match scalar + .kind + { Sk::Uint | Sk::Sint | Sk::Bool => true, Sk::Float => false, Sk::AbstractInt | Sk::AbstractFloat => { @@ -2584,6 +2647,10 @@ impl Writer { self.decorate(id, spirv::Decoration::NonUniform, &[]); Ok(()) } + + pub(super) fn needs_f16_polyfill(&self, ty_inner: &crate::TypeInner) -> bool { + self.io_f16_polyfills.needs_polyfill(ty_inner) + } } #[test] diff --git a/naga/tests/in/wgsl/f16-native.toml b/naga/tests/in/wgsl/f16-native.toml new file mode 100644 index 00000000000..529d34f80da --- /dev/null +++ b/naga/tests/in/wgsl/f16-native.toml @@ -0,0 +1,13 @@ +targets = "SPIRV" +god_mode = true + +[spv] +debug = true +version = [1, 1] +use_storage_input_output_16 = true +capabilities = ["Float16"] + +[bounds_check_policies] +index = "ReadZeroSkipWrite" +buffer = "ReadZeroSkipWrite" +image = "ReadZeroSkipWrite" diff --git a/naga/tests/in/wgsl/f16-native.wgsl b/naga/tests/in/wgsl/f16-native.wgsl new file mode 100644 index 00000000000..2dea0baaa29 --- /dev/null +++ b/naga/tests/in/wgsl/f16-native.wgsl @@ -0,0 +1,71 @@ +enable f16; + +@fragment +fn test_direct( + @location(0) scalar_f16: f16, + @location(1) scalar_f32: f32, + @location(2) vec2_f16: vec2, + @location(3) vec2_f32: vec2, + @location(4) vec3_f16: vec3, + @location(5) vec3_f32: vec3, + @location(6) vec4_f16: vec4, + @location(7) vec4_f32: vec4, +) -> F16IO { + var output: F16IO; + output.scalar_f16 = scalar_f16 + 1.0h; + output.scalar_f32 = scalar_f32 + 1.0; + output.vec2_f16 = vec2_f16 + vec2(1.0h); + output.vec2_f32 = vec2_f32 + vec2(1.0); + output.vec3_f16 = vec3_f16 + vec3(1.0h); + output.vec3_f32 = vec3_f32 + vec3(1.0); + output.vec4_f16 = vec4_f16 + vec4(1.0h); + output.vec4_f32 = vec4_f32 + vec4(1.0); + return output; +} + +struct F16IO { + @location(0) scalar_f16: f16, + @location(1) scalar_f32: f32, + @location(2) vec2_f16: vec2, + @location(3) vec2_f32: vec2, + @location(4) vec3_f16: vec3, + @location(5) vec3_f32: vec3, + @location(6) vec4_f16: vec4, + @location(7) vec4_f32: vec4, +} + +@fragment +fn test_struct(input: F16IO) -> F16IO { + var output: F16IO; + output.scalar_f16 = input.scalar_f16 + 1.0h; + output.scalar_f32 = input.scalar_f32 + 1.0; + output.vec2_f16 = input.vec2_f16 + vec2(1.0h); + output.vec2_f32 = input.vec2_f32 + vec2(1.0); + output.vec3_f16 = input.vec3_f16 + vec3(1.0h); + output.vec3_f32 = input.vec3_f32 + vec3(1.0); + output.vec4_f16 = input.vec4_f16 + vec4(1.0h); + output.vec4_f32 = input.vec4_f32 + vec4(1.0); + return output; +} + +@fragment +fn test_copy_input(input_original: F16IO) -> F16IO { + var input = input_original; + var output: F16IO; + output.scalar_f16 = input.scalar_f16 + 1.0h; + output.scalar_f32 = input.scalar_f32 + 1.0; + output.vec2_f16 = input.vec2_f16 + vec2(1.0h); + output.vec2_f32 = input.vec2_f32 + vec2(1.0); + output.vec3_f16 = input.vec3_f16 + vec3(1.0h); + output.vec3_f32 = input.vec3_f32 + vec3(1.0); + output.vec4_f16 = input.vec4_f16 + vec4(1.0h); + output.vec4_f32 = input.vec4_f32 + vec4(1.0); + return output; +} + +@fragment +fn test_return_partial(input_original: F16IO) -> @location(0) f16 { + var input = input_original; + input.scalar_f16 = 0.0h; + return input.scalar_f16; +} diff --git a/naga/tests/in/wgsl/f16-polyfill.toml b/naga/tests/in/wgsl/f16-polyfill.toml new file mode 100644 index 00000000000..96160063e05 --- /dev/null +++ b/naga/tests/in/wgsl/f16-polyfill.toml @@ -0,0 +1,13 @@ +targets = "SPIRV" +god_mode = true + +[spv] +debug = true +version = [1, 1] +use_storage_input_output_16 = false +capabilities = ["Float16"] + +[bounds_check_policies] +index = "ReadZeroSkipWrite" +buffer = "ReadZeroSkipWrite" +image = "ReadZeroSkipWrite" diff --git a/naga/tests/in/wgsl/f16-polyfill.wgsl b/naga/tests/in/wgsl/f16-polyfill.wgsl new file mode 100644 index 00000000000..2dea0baaa29 --- /dev/null +++ b/naga/tests/in/wgsl/f16-polyfill.wgsl @@ -0,0 +1,71 @@ +enable f16; + +@fragment +fn test_direct( + @location(0) scalar_f16: f16, + @location(1) scalar_f32: f32, + @location(2) vec2_f16: vec2, + @location(3) vec2_f32: vec2, + @location(4) vec3_f16: vec3, + @location(5) vec3_f32: vec3, + @location(6) vec4_f16: vec4, + @location(7) vec4_f32: vec4, +) -> F16IO { + var output: F16IO; + output.scalar_f16 = scalar_f16 + 1.0h; + output.scalar_f32 = scalar_f32 + 1.0; + output.vec2_f16 = vec2_f16 + vec2(1.0h); + output.vec2_f32 = vec2_f32 + vec2(1.0); + output.vec3_f16 = vec3_f16 + vec3(1.0h); + output.vec3_f32 = vec3_f32 + vec3(1.0); + output.vec4_f16 = vec4_f16 + vec4(1.0h); + output.vec4_f32 = vec4_f32 + vec4(1.0); + return output; +} + +struct F16IO { + @location(0) scalar_f16: f16, + @location(1) scalar_f32: f32, + @location(2) vec2_f16: vec2, + @location(3) vec2_f32: vec2, + @location(4) vec3_f16: vec3, + @location(5) vec3_f32: vec3, + @location(6) vec4_f16: vec4, + @location(7) vec4_f32: vec4, +} + +@fragment +fn test_struct(input: F16IO) -> F16IO { + var output: F16IO; + output.scalar_f16 = input.scalar_f16 + 1.0h; + output.scalar_f32 = input.scalar_f32 + 1.0; + output.vec2_f16 = input.vec2_f16 + vec2(1.0h); + output.vec2_f32 = input.vec2_f32 + vec2(1.0); + output.vec3_f16 = input.vec3_f16 + vec3(1.0h); + output.vec3_f32 = input.vec3_f32 + vec3(1.0); + output.vec4_f16 = input.vec4_f16 + vec4(1.0h); + output.vec4_f32 = input.vec4_f32 + vec4(1.0); + return output; +} + +@fragment +fn test_copy_input(input_original: F16IO) -> F16IO { + var input = input_original; + var output: F16IO; + output.scalar_f16 = input.scalar_f16 + 1.0h; + output.scalar_f32 = input.scalar_f32 + 1.0; + output.vec2_f16 = input.vec2_f16 + vec2(1.0h); + output.vec2_f32 = input.vec2_f32 + vec2(1.0); + output.vec3_f16 = input.vec3_f16 + vec3(1.0h); + output.vec3_f32 = input.vec3_f32 + vec3(1.0); + output.vec4_f16 = input.vec4_f16 + vec4(1.0h); + output.vec4_f32 = input.vec4_f32 + vec4(1.0); + return output; +} + +@fragment +fn test_return_partial(input_original: F16IO) -> @location(0) f16 { + var input = input_original; + input.scalar_f16 = 0.0h; + return input.scalar_f16; +} diff --git a/naga/tests/naga/snapshots.rs b/naga/tests/naga/snapshots.rs index 32e2f5e0285..b00ef579203 100644 --- a/naga/tests/naga/snapshots.rs +++ b/naga/tests/naga/snapshots.rs @@ -91,7 +91,7 @@ struct SpirvInParameters { adjust_coordinate_space: bool, } -#[derive(Default, serde::Deserialize)] +#[derive(serde::Deserialize)] #[serde(default)] struct SpirvOutParameters { version: SpvOutVersion, @@ -101,11 +101,29 @@ struct SpirvOutParameters { force_point_size: bool, clamp_frag_depth: bool, separate_entry_points: bool, + use_storage_input_output_16: bool, #[cfg(all(feature = "deserialize", spv_out))] #[serde(deserialize_with = "deserialize_binding_map")] binding_map: naga::back::spv::BindingMap, } +impl Default for SpirvOutParameters { + fn default() -> Self { + Self { + version: SpvOutVersion::default(), + capabilities: naga::FastHashSet::default(), + debug: false, + adjust_coordinate_space: false, + force_point_size: false, + clamp_frag_depth: false, + separate_entry_points: false, + use_storage_input_output_16: true, + #[cfg(all(feature = "deserialize", spv_out))] + binding_map: naga::back::spv::BindingMap::default(), + } + } +} + #[derive(Default, serde::Deserialize)] #[serde(default)] struct WgslOutParameters { @@ -617,6 +635,7 @@ fn write_output_spv( binding_map: params.binding_map.clone(), zero_initialize_workgroup_memory: spv::ZeroInitializeWorkgroupMemoryMode::Polyfill, force_loop_bounding: true, + use_storage_input_output_16: params.use_storage_input_output_16, debug_info, }; diff --git a/naga/tests/naga/spirv_capabilities.rs b/naga/tests/naga/spirv_capabilities.rs index 2d46e37f72d..aa99298273d 100644 --- a/naga/tests/naga/spirv_capabilities.rs +++ b/naga/tests/naga/spirv_capabilities.rs @@ -6,6 +6,9 @@ Test SPIR-V backend capability checks. use spirv::Capability as Ca; +#[cfg(spv_out)] +use rspirv::binary::Disassemble; + fn capabilities_used(source: &str) -> naga::FastIndexSet { use naga::back::spv; use naga::valid; @@ -213,3 +216,135 @@ fn int64() { fn float16() { require(&[Ca::Float16], "enable f16; fn f(x: f16) { }"); } + +#[test] +fn f16_io_capabilities() { + let source = r#" + enable f16; + + struct VertexOutput { + @location(0) color: vec3, + } + + @fragment + fn main(input: VertexOutput) -> @location(0) vec4 { + return vec4(input.color, f16(1.0)); + } + "#; + + use naga::back::spv; + use naga::valid; + + let module = naga::front::wgsl::parse_str(source).unwrap(); + let info = valid::Validator::new(valid::ValidationFlags::all(), valid::Capabilities::all()) + .validate(&module) + .unwrap(); + + // Test native path: use_storage_input_output_16 = true + let options_native = spv::Options { + use_storage_input_output_16: true, + ..Default::default() + }; + + let mut words_native = vec![]; + let mut writer_native = spv::Writer::new(&options_native).unwrap(); + writer_native + .write(&module, &info, None, &None, &mut words_native) + .unwrap(); + let caps_native = writer_native.get_capabilities_used(); + + // Should include StorageInputOutput16 for native f16 I/O + assert!(caps_native.contains(&Ca::StorageInputOutput16)); + + // Test polyfill path: use_storage_input_output_16 = false + let options_polyfill = spv::Options { + use_storage_input_output_16: false, + ..Default::default() + }; + + let mut words_polyfill = vec![]; + let mut writer_polyfill = spv::Writer::new(&options_polyfill).unwrap(); + writer_polyfill + .write(&module, &info, None, &None, &mut words_polyfill) + .unwrap(); + let caps_polyfill = writer_polyfill.get_capabilities_used(); + + // Should not include StorageInputOutput16 when polyfilled + assert!(!caps_polyfill.contains(&Ca::StorageInputOutput16)); + + // But should still include the basic f16 capabilities + assert!(caps_polyfill.contains(&Ca::Float16)); +} + +#[cfg(spv_out)] +#[test] +fn f16_io_polyfill_codegen() { + let source = r#" + enable f16; + + struct F16IO { + @location(0) scalar_f16: f16, + @location(1) scalar_f32: f32, + @location(2) vec2_f16: vec2, + @location(3) vec2_f32: vec2, + } + + @fragment + fn main(input: F16IO) -> F16IO { + var output = input; + output.scalar_f16 = input.scalar_f16 + 1.0h; + output.vec2_f16.x = input.vec2_f16.y; + return output; + } + "#; + + use naga::{back::spv, valid}; + + let module = naga::front::wgsl::parse_str(source).unwrap(); + let info = valid::Validator::new(valid::ValidationFlags::all(), valid::Capabilities::all()) + .validate(&module) + .unwrap(); + + // Test Native Path + let options_native = spv::Options { + use_storage_input_output_16: true, + ..Default::default() + }; + let mut words_native = vec![]; + let mut writer_native = spv::Writer::new(&options_native).unwrap(); + writer_native + .write(&module, &info, None, &None, &mut words_native) + .unwrap(); + let caps_native = writer_native.get_capabilities_used(); + let dis_native = rspirv::dr::load_words(words_native).unwrap().disassemble(); + + // Native path must request the capability and must NOT have conversions. + assert!(caps_native.contains(&Ca::StorageInputOutput16)); + assert!(!dis_native.contains("OpFConvert")); + + // Test Polyfill Path + let options_polyfill = spv::Options { + use_storage_input_output_16: false, + ..Default::default() + }; + let mut words_polyfill = vec![]; + let mut writer_polyfill = spv::Writer::new(&options_polyfill).unwrap(); + writer_polyfill + .write(&module, &info, None, &None, &mut words_polyfill) + .unwrap(); + let caps_polyfill = writer_polyfill.get_capabilities_used(); + let dis_polyfill = rspirv::dr::load_words(words_polyfill) + .unwrap() + .disassemble(); + + // Polyfill path should request the capability but not have conversions. + assert!(!caps_polyfill.contains(&Ca::StorageInputOutput16)); + assert!(dis_polyfill.contains("OpFConvert")); + + // Should have 2 input conversions, and 2 output conversions + let fconvert_count = dis_polyfill.matches("OpFConvert").count(); + assert_eq!( + fconvert_count, 4, + "Expected 4 OpFConvert instructions for polyfilled I/O" + ); +} diff --git a/naga/tests/out/spv/wgsl-f16-native.spvasm b/naga/tests/out/spv/wgsl-f16-native.spvasm new file mode 100644 index 00000000000..78f1b0d9b58 --- /dev/null +++ b/naga/tests/out/spv/wgsl-f16-native.spvasm @@ -0,0 +1,655 @@ +; SPIR-V +; Version: 1.1 +; Generator: rspirv +; Bound: 273 +OpCapability Shader +OpCapability Float16 +OpCapability StorageBuffer16BitAccess +OpCapability UniformAndStorageBuffer16BitAccess +OpCapability StorageInputOutput16 +OpExtension "SPV_KHR_16bit_storage" +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %54 "test_direct" %14 %17 %20 %23 %26 %29 %32 %35 %38 %40 %42 %44 %46 %48 %50 %52 +OpEntryPoint Fragment %136 "test_struct" %112 %114 %116 %118 %120 %122 %124 %126 %128 %129 %130 %131 %132 %133 %134 %135 +OpEntryPoint Fragment %199 "test_copy_input" %175 %177 %179 %181 %183 %185 %187 %189 %191 %192 %193 %194 %195 %196 %197 %198 +OpEntryPoint Fragment %265 "test_return_partial" %248 %250 %252 %254 %256 %258 %260 %262 %264 +OpExecutionMode %54 OriginUpperLeft +OpExecutionMode %136 OriginUpperLeft +OpExecutionMode %199 OriginUpperLeft +OpExecutionMode %265 OriginUpperLeft +%3 = OpString "f16-native.wgsl" +OpSource Unknown 0 %3 "enable f16; + +@fragment +fn test_direct( + @location(0) scalar_f16: f16, + @location(1) scalar_f32: f32, + @location(2) vec2_f16: vec2, + @location(3) vec2_f32: vec2, + @location(4) vec3_f16: vec3, + @location(5) vec3_f32: vec3, + @location(6) vec4_f16: vec4, + @location(7) vec4_f32: vec4, +) -> F16IO { + var output: F16IO; + output.scalar_f16 = scalar_f16 + 1.0h; + output.scalar_f32 = scalar_f32 + 1.0; + output.vec2_f16 = vec2_f16 + vec2(1.0h); + output.vec2_f32 = vec2_f32 + vec2(1.0); + output.vec3_f16 = vec3_f16 + vec3(1.0h); + output.vec3_f32 = vec3_f32 + vec3(1.0); + output.vec4_f16 = vec4_f16 + vec4(1.0h); + output.vec4_f32 = vec4_f32 + vec4(1.0); + return output; +} + +struct F16IO { + @location(0) scalar_f16: f16, + @location(1) scalar_f32: f32, + @location(2) vec2_f16: vec2, + @location(3) vec2_f32: vec2, + @location(4) vec3_f16: vec3, + @location(5) vec3_f32: vec3, + @location(6) vec4_f16: vec4, + @location(7) vec4_f32: vec4, +} + +@fragment +fn test_struct(input: F16IO) -> F16IO { + var output: F16IO; + output.scalar_f16 = input.scalar_f16 + 1.0h; + output.scalar_f32 = input.scalar_f32 + 1.0; + output.vec2_f16 = input.vec2_f16 + vec2(1.0h); + output.vec2_f32 = input.vec2_f32 + vec2(1.0); + output.vec3_f16 = input.vec3_f16 + vec3(1.0h); + output.vec3_f32 = input.vec3_f32 + vec3(1.0); + output.vec4_f16 = input.vec4_f16 + vec4(1.0h); + output.vec4_f32 = input.vec4_f32 + vec4(1.0); + return output; +} + +@fragment +fn test_copy_input(input_original: F16IO) -> F16IO { + var input = input_original; + var output: F16IO; + output.scalar_f16 = input.scalar_f16 + 1.0h; + output.scalar_f32 = input.scalar_f32 + 1.0; + output.vec2_f16 = input.vec2_f16 + vec2(1.0h); + output.vec2_f32 = input.vec2_f32 + vec2(1.0); + output.vec3_f16 = input.vec3_f16 + vec3(1.0h); + output.vec3_f32 = input.vec3_f32 + vec3(1.0); + output.vec4_f16 = input.vec4_f16 + vec4(1.0h); + output.vec4_f32 = input.vec4_f32 + vec4(1.0); + return output; +} + +@fragment +fn test_return_partial(input_original: F16IO) -> @location(0) f16 { + var input = input_original; + input.scalar_f16 = 0.0h; + return input.scalar_f16; +} +" +OpMemberName %12 0 "scalar_f16" +OpMemberName %12 1 "scalar_f32" +OpMemberName %12 2 "vec2_f16" +OpMemberName %12 3 "vec2_f32" +OpMemberName %12 4 "vec3_f16" +OpMemberName %12 5 "vec3_f32" +OpMemberName %12 6 "vec4_f16" +OpMemberName %12 7 "vec4_f32" +OpName %12 "F16IO" +OpName %14 "scalar_f16" +OpName %17 "scalar_f32" +OpName %20 "vec2_f16" +OpName %23 "vec2_f32" +OpName %26 "vec3_f16" +OpName %29 "vec3_f32" +OpName %32 "vec4_f16" +OpName %35 "vec4_f32" +OpName %38 "scalar_f16" +OpName %40 "scalar_f32" +OpName %42 "vec2_f16" +OpName %44 "vec2_f32" +OpName %46 "vec3_f16" +OpName %48 "vec3_f32" +OpName %50 "vec4_f16" +OpName %52 "vec4_f32" +OpName %54 "test_direct" +OpName %64 "output" +OpName %112 "scalar_f16" +OpName %114 "scalar_f32" +OpName %116 "vec2_f16" +OpName %118 "vec2_f32" +OpName %120 "vec3_f16" +OpName %122 "vec3_f32" +OpName %124 "vec4_f16" +OpName %126 "vec4_f32" +OpName %128 "scalar_f16" +OpName %129 "scalar_f32" +OpName %130 "vec2_f16" +OpName %131 "vec2_f32" +OpName %132 "vec3_f16" +OpName %133 "vec3_f32" +OpName %134 "vec4_f16" +OpName %135 "vec4_f32" +OpName %136 "test_struct" +OpName %137 "output" +OpName %175 "scalar_f16" +OpName %177 "scalar_f32" +OpName %179 "vec2_f16" +OpName %181 "vec2_f32" +OpName %183 "vec3_f16" +OpName %185 "vec3_f32" +OpName %187 "vec4_f16" +OpName %189 "vec4_f32" +OpName %191 "scalar_f16" +OpName %192 "scalar_f32" +OpName %193 "vec2_f16" +OpName %194 "vec2_f32" +OpName %195 "vec3_f16" +OpName %196 "vec3_f32" +OpName %197 "vec4_f16" +OpName %198 "vec4_f32" +OpName %199 "test_copy_input" +OpName %200 "input" +OpName %202 "output" +OpName %248 "scalar_f16" +OpName %250 "scalar_f32" +OpName %252 "vec2_f16" +OpName %254 "vec2_f32" +OpName %256 "vec3_f16" +OpName %258 "vec3_f32" +OpName %260 "vec4_f16" +OpName %262 "vec4_f32" +OpName %265 "test_return_partial" +OpName %267 "input" +OpMemberDecorate %12 0 Offset 0 +OpMemberDecorate %12 1 Offset 4 +OpMemberDecorate %12 2 Offset 8 +OpMemberDecorate %12 3 Offset 16 +OpMemberDecorate %12 4 Offset 24 +OpMemberDecorate %12 5 Offset 32 +OpMemberDecorate %12 6 Offset 48 +OpMemberDecorate %12 7 Offset 64 +OpDecorate %14 Location 0 +OpDecorate %17 Location 1 +OpDecorate %20 Location 2 +OpDecorate %23 Location 3 +OpDecorate %26 Location 4 +OpDecorate %29 Location 5 +OpDecorate %32 Location 6 +OpDecorate %35 Location 7 +OpDecorate %38 Location 0 +OpDecorate %40 Location 1 +OpDecorate %42 Location 2 +OpDecorate %44 Location 3 +OpDecorate %46 Location 4 +OpDecorate %48 Location 5 +OpDecorate %50 Location 6 +OpDecorate %52 Location 7 +OpDecorate %112 Location 0 +OpDecorate %114 Location 1 +OpDecorate %116 Location 2 +OpDecorate %118 Location 3 +OpDecorate %120 Location 4 +OpDecorate %122 Location 5 +OpDecorate %124 Location 6 +OpDecorate %126 Location 7 +OpDecorate %128 Location 0 +OpDecorate %129 Location 1 +OpDecorate %130 Location 2 +OpDecorate %131 Location 3 +OpDecorate %132 Location 4 +OpDecorate %133 Location 5 +OpDecorate %134 Location 6 +OpDecorate %135 Location 7 +OpDecorate %175 Location 0 +OpDecorate %177 Location 1 +OpDecorate %179 Location 2 +OpDecorate %181 Location 3 +OpDecorate %183 Location 4 +OpDecorate %185 Location 5 +OpDecorate %187 Location 6 +OpDecorate %189 Location 7 +OpDecorate %191 Location 0 +OpDecorate %192 Location 1 +OpDecorate %193 Location 2 +OpDecorate %194 Location 3 +OpDecorate %195 Location 4 +OpDecorate %196 Location 5 +OpDecorate %197 Location 6 +OpDecorate %198 Location 7 +OpDecorate %248 Location 0 +OpDecorate %250 Location 1 +OpDecorate %252 Location 2 +OpDecorate %254 Location 3 +OpDecorate %256 Location 4 +OpDecorate %258 Location 5 +OpDecorate %260 Location 6 +OpDecorate %262 Location 7 +OpDecorate %264 Location 0 +%2 = OpTypeVoid +%4 = OpTypeFloat 16 +%5 = OpTypeFloat 32 +%6 = OpTypeVector %4 2 +%7 = OpTypeVector %5 2 +%8 = OpTypeVector %4 3 +%9 = OpTypeVector %5 3 +%10 = OpTypeVector %4 4 +%11 = OpTypeVector %5 4 +%12 = OpTypeStruct %4 %5 %6 %7 %8 %9 %10 %11 +%15 = OpTypePointer Input %4 +%14 = OpVariable %15 Input +%18 = OpTypePointer Input %5 +%17 = OpVariable %18 Input +%21 = OpTypePointer Input %6 +%20 = OpVariable %21 Input +%24 = OpTypePointer Input %7 +%23 = OpVariable %24 Input +%27 = OpTypePointer Input %8 +%26 = OpVariable %27 Input +%30 = OpTypePointer Input %9 +%29 = OpVariable %30 Input +%33 = OpTypePointer Input %10 +%32 = OpVariable %33 Input +%36 = OpTypePointer Input %11 +%35 = OpVariable %36 Input +%39 = OpTypePointer Output %4 +%38 = OpVariable %39 Output +%41 = OpTypePointer Output %5 +%40 = OpVariable %41 Output +%43 = OpTypePointer Output %6 +%42 = OpVariable %43 Output +%45 = OpTypePointer Output %7 +%44 = OpVariable %45 Output +%47 = OpTypePointer Output %8 +%46 = OpVariable %47 Output +%49 = OpTypePointer Output %9 +%48 = OpVariable %49 Output +%51 = OpTypePointer Output %10 +%50 = OpVariable %51 Output +%53 = OpTypePointer Output %11 +%52 = OpVariable %53 Output +%55 = OpTypeFunction %2 +%56 = OpConstant %4 0.000000000000000000000000000000000000000021524 +%57 = OpConstant %5 1 +%58 = OpConstantComposite %6 %56 %56 +%59 = OpConstantComposite %7 %57 %57 +%60 = OpConstantComposite %8 %56 %56 %56 +%61 = OpConstantComposite %9 %57 %57 %57 +%62 = OpConstantComposite %10 %56 %56 %56 %56 +%63 = OpConstantComposite %11 %57 %57 %57 %57 +%65 = OpTypePointer Function %12 +%66 = OpConstantNull %12 +%68 = OpTypePointer Function %4 +%71 = OpTypeInt 32 0 +%70 = OpConstant %71 0 +%73 = OpTypePointer Function %5 +%75 = OpConstant %71 1 +%77 = OpTypePointer Function %6 +%79 = OpConstant %71 2 +%81 = OpTypePointer Function %7 +%83 = OpConstant %71 3 +%85 = OpTypePointer Function %8 +%87 = OpConstant %71 4 +%89 = OpTypePointer Function %9 +%91 = OpConstant %71 5 +%93 = OpTypePointer Function %10 +%95 = OpConstant %71 6 +%97 = OpTypePointer Function %11 +%99 = OpConstant %71 7 +%112 = OpVariable %15 Input +%114 = OpVariable %18 Input +%116 = OpVariable %21 Input +%118 = OpVariable %24 Input +%120 = OpVariable %27 Input +%122 = OpVariable %30 Input +%124 = OpVariable %33 Input +%126 = OpVariable %36 Input +%128 = OpVariable %39 Output +%129 = OpVariable %41 Output +%130 = OpVariable %43 Output +%131 = OpVariable %45 Output +%132 = OpVariable %47 Output +%133 = OpVariable %49 Output +%134 = OpVariable %51 Output +%135 = OpVariable %53 Output +%138 = OpConstantNull %12 +%175 = OpVariable %15 Input +%177 = OpVariable %18 Input +%179 = OpVariable %21 Input +%181 = OpVariable %24 Input +%183 = OpVariable %27 Input +%185 = OpVariable %30 Input +%187 = OpVariable %33 Input +%189 = OpVariable %36 Input +%191 = OpVariable %39 Output +%192 = OpVariable %41 Output +%193 = OpVariable %43 Output +%194 = OpVariable %45 Output +%195 = OpVariable %47 Output +%196 = OpVariable %49 Output +%197 = OpVariable %51 Output +%198 = OpVariable %53 Output +%201 = OpConstantNull %12 +%203 = OpConstantNull %12 +%248 = OpVariable %15 Input +%250 = OpVariable %18 Input +%252 = OpVariable %21 Input +%254 = OpVariable %24 Input +%256 = OpVariable %27 Input +%258 = OpVariable %30 Input +%260 = OpVariable %33 Input +%262 = OpVariable %36 Input +%264 = OpVariable %39 Output +%266 = OpConstant %4 0 +%268 = OpConstantNull %12 +%54 = OpFunction %2 None %55 +%13 = OpLabel +%64 = OpVariable %65 Function %66 +%16 = OpLoad %4 %14 +%19 = OpLoad %5 %17 +%22 = OpLoad %6 %20 +%25 = OpLoad %7 %23 +%28 = OpLoad %8 %26 +%31 = OpLoad %9 %29 +%34 = OpLoad %10 %32 +%37 = OpLoad %11 %35 +OpBranch %67 +%67 = OpLabel +OpLine %3 15 5 +OpLine %3 15 25 +%69 = OpFAdd %4 %16 %56 +OpLine %3 15 5 +%72 = OpAccessChain %68 %64 %70 +OpStore %72 %69 +OpLine %3 16 5 +OpLine %3 16 25 +%74 = OpFAdd %5 %19 %57 +OpLine %3 16 5 +%76 = OpAccessChain %73 %64 %75 +OpStore %76 %74 +OpLine %3 17 5 +OpLine %3 17 23 +%78 = OpFAdd %6 %22 %58 +OpLine %3 17 5 +%80 = OpAccessChain %77 %64 %79 +OpStore %80 %78 +OpLine %3 18 5 +OpLine %3 18 34 +OpLine %3 18 23 +%82 = OpFAdd %7 %25 %59 +OpLine %3 18 5 +%84 = OpAccessChain %81 %64 %83 +OpStore %84 %82 +OpLine %3 19 5 +OpLine %3 19 23 +%86 = OpFAdd %8 %28 %60 +OpLine %3 19 5 +%88 = OpAccessChain %85 %64 %87 +OpStore %88 %86 +OpLine %3 20 5 +OpLine %3 20 34 +OpLine %3 20 23 +%90 = OpFAdd %9 %31 %61 +OpLine %3 20 5 +%92 = OpAccessChain %89 %64 %91 +OpStore %92 %90 +OpLine %3 21 5 +OpLine %3 21 23 +%94 = OpFAdd %10 %34 %62 +OpLine %3 21 5 +%96 = OpAccessChain %93 %64 %95 +OpStore %96 %94 +OpLine %3 22 5 +OpLine %3 22 34 +OpLine %3 22 23 +%98 = OpFAdd %11 %37 %63 +OpLine %3 22 5 +%100 = OpAccessChain %97 %64 %99 +OpStore %100 %98 +OpLine %3 1 1 +%101 = OpLoad %12 %64 +%102 = OpCompositeExtract %4 %101 0 +OpStore %38 %102 +%103 = OpCompositeExtract %5 %101 1 +OpStore %40 %103 +%104 = OpCompositeExtract %6 %101 2 +OpStore %42 %104 +%105 = OpCompositeExtract %7 %101 3 +OpStore %44 %105 +%106 = OpCompositeExtract %8 %101 4 +OpStore %46 %106 +%107 = OpCompositeExtract %9 %101 5 +OpStore %48 %107 +%108 = OpCompositeExtract %10 %101 6 +OpStore %50 %108 +%109 = OpCompositeExtract %11 %101 7 +OpStore %52 %109 +OpReturn +OpFunctionEnd +%136 = OpFunction %2 None %55 +%110 = OpLabel +%137 = OpVariable %65 Function %138 +%113 = OpLoad %4 %112 +%115 = OpLoad %5 %114 +%117 = OpLoad %6 %116 +%119 = OpLoad %7 %118 +%121 = OpLoad %8 %120 +%123 = OpLoad %9 %122 +%125 = OpLoad %10 %124 +%127 = OpLoad %11 %126 +%111 = OpCompositeConstruct %12 %113 %115 %117 %119 %121 %123 %125 %127 +OpBranch %139 +%139 = OpLabel +OpLine %3 40 5 +%140 = OpCompositeExtract %4 %111 0 +OpLine %3 40 25 +%141 = OpFAdd %4 %140 %56 +OpLine %3 40 5 +%142 = OpAccessChain %68 %137 %70 +OpStore %142 %141 +OpLine %3 41 5 +%143 = OpCompositeExtract %5 %111 1 +OpLine %3 41 25 +%144 = OpFAdd %5 %143 %57 +OpLine %3 41 5 +%145 = OpAccessChain %73 %137 %75 +OpStore %145 %144 +OpLine %3 42 5 +%146 = OpCompositeExtract %6 %111 2 +OpLine %3 42 23 +%147 = OpFAdd %6 %146 %58 +OpLine %3 42 5 +%148 = OpAccessChain %77 %137 %79 +OpStore %148 %147 +OpLine %3 43 5 +%149 = OpCompositeExtract %7 %111 3 +OpLine %3 43 40 +OpLine %3 43 23 +%150 = OpFAdd %7 %149 %59 +OpLine %3 43 5 +%151 = OpAccessChain %81 %137 %83 +OpStore %151 %150 +OpLine %3 44 5 +%152 = OpCompositeExtract %8 %111 4 +OpLine %3 44 23 +%153 = OpFAdd %8 %152 %60 +OpLine %3 44 5 +%154 = OpAccessChain %85 %137 %87 +OpStore %154 %153 +OpLine %3 45 5 +%155 = OpCompositeExtract %9 %111 5 +OpLine %3 45 40 +OpLine %3 45 23 +%156 = OpFAdd %9 %155 %61 +OpLine %3 45 5 +%157 = OpAccessChain %89 %137 %91 +OpStore %157 %156 +OpLine %3 46 5 +%158 = OpCompositeExtract %10 %111 6 +OpLine %3 46 23 +%159 = OpFAdd %10 %158 %62 +OpLine %3 46 5 +%160 = OpAccessChain %93 %137 %95 +OpStore %160 %159 +OpLine %3 47 5 +%161 = OpCompositeExtract %11 %111 7 +OpLine %3 47 40 +OpLine %3 47 23 +%162 = OpFAdd %11 %161 %63 +OpLine %3 47 5 +%163 = OpAccessChain %97 %137 %99 +OpStore %163 %162 +OpLine %3 1 1 +%164 = OpLoad %12 %137 +%165 = OpCompositeExtract %4 %164 0 +OpStore %128 %165 +%166 = OpCompositeExtract %5 %164 1 +OpStore %129 %166 +%167 = OpCompositeExtract %6 %164 2 +OpStore %130 %167 +%168 = OpCompositeExtract %7 %164 3 +OpStore %131 %168 +%169 = OpCompositeExtract %8 %164 4 +OpStore %132 %169 +%170 = OpCompositeExtract %9 %164 5 +OpStore %133 %170 +%171 = OpCompositeExtract %10 %164 6 +OpStore %134 %171 +%172 = OpCompositeExtract %11 %164 7 +OpStore %135 %172 +OpReturn +OpFunctionEnd +%199 = OpFunction %2 None %55 +%173 = OpLabel +%200 = OpVariable %65 Function %201 +%202 = OpVariable %65 Function %203 +%176 = OpLoad %4 %175 +%178 = OpLoad %5 %177 +%180 = OpLoad %6 %179 +%182 = OpLoad %7 %181 +%184 = OpLoad %8 %183 +%186 = OpLoad %9 %185 +%188 = OpLoad %10 %187 +%190 = OpLoad %11 %189 +%174 = OpCompositeConstruct %12 %176 %178 %180 %182 %184 %186 %188 %190 +OpBranch %204 +%204 = OpLabel +OpLine %3 53 5 +OpStore %200 %174 +OpLine %3 55 5 +%205 = OpAccessChain %68 %200 %70 +%206 = OpLoad %4 %205 +OpLine %3 55 25 +%207 = OpFAdd %4 %206 %56 +OpLine %3 55 5 +%208 = OpAccessChain %68 %202 %70 +OpStore %208 %207 +OpLine %3 56 5 +%209 = OpAccessChain %73 %200 %75 +%210 = OpLoad %5 %209 +OpLine %3 56 25 +%211 = OpFAdd %5 %210 %57 +OpLine %3 56 5 +%212 = OpAccessChain %73 %202 %75 +OpStore %212 %211 +OpLine %3 57 5 +%213 = OpAccessChain %77 %200 %79 +%214 = OpLoad %6 %213 +OpLine %3 57 23 +%215 = OpFAdd %6 %214 %58 +OpLine %3 57 5 +%216 = OpAccessChain %77 %202 %79 +OpStore %216 %215 +OpLine %3 58 5 +%217 = OpAccessChain %81 %200 %83 +%218 = OpLoad %7 %217 +OpLine %3 58 40 +OpLine %3 58 23 +%219 = OpFAdd %7 %218 %59 +OpLine %3 58 5 +%220 = OpAccessChain %81 %202 %83 +OpStore %220 %219 +OpLine %3 59 5 +%221 = OpAccessChain %85 %200 %87 +%222 = OpLoad %8 %221 +OpLine %3 59 23 +%223 = OpFAdd %8 %222 %60 +OpLine %3 59 5 +%224 = OpAccessChain %85 %202 %87 +OpStore %224 %223 +OpLine %3 60 5 +%225 = OpAccessChain %89 %200 %91 +%226 = OpLoad %9 %225 +OpLine %3 60 40 +OpLine %3 60 23 +%227 = OpFAdd %9 %226 %61 +OpLine %3 60 5 +%228 = OpAccessChain %89 %202 %91 +OpStore %228 %227 +OpLine %3 61 5 +%229 = OpAccessChain %93 %200 %95 +%230 = OpLoad %10 %229 +OpLine %3 61 23 +%231 = OpFAdd %10 %230 %62 +OpLine %3 61 5 +%232 = OpAccessChain %93 %202 %95 +OpStore %232 %231 +OpLine %3 62 5 +%233 = OpAccessChain %97 %200 %99 +%234 = OpLoad %11 %233 +OpLine %3 62 40 +OpLine %3 62 23 +%235 = OpFAdd %11 %234 %63 +OpLine %3 62 5 +%236 = OpAccessChain %97 %202 %99 +OpStore %236 %235 +OpLine %3 1 1 +%237 = OpLoad %12 %202 +%238 = OpCompositeExtract %4 %237 0 +OpStore %191 %238 +%239 = OpCompositeExtract %5 %237 1 +OpStore %192 %239 +%240 = OpCompositeExtract %6 %237 2 +OpStore %193 %240 +%241 = OpCompositeExtract %7 %237 3 +OpStore %194 %241 +%242 = OpCompositeExtract %8 %237 4 +OpStore %195 %242 +%243 = OpCompositeExtract %9 %237 5 +OpStore %196 %243 +%244 = OpCompositeExtract %10 %237 6 +OpStore %197 %244 +%245 = OpCompositeExtract %11 %237 7 +OpStore %198 %245 +OpReturn +OpFunctionEnd +%265 = OpFunction %2 None %55 +%246 = OpLabel +%267 = OpVariable %65 Function %268 +%249 = OpLoad %4 %248 +%251 = OpLoad %5 %250 +%253 = OpLoad %6 %252 +%255 = OpLoad %7 %254 +%257 = OpLoad %8 %256 +%259 = OpLoad %9 %258 +%261 = OpLoad %10 %260 +%263 = OpLoad %11 %262 +%247 = OpCompositeConstruct %12 %249 %251 %253 %255 %257 %259 %261 %263 +OpBranch %269 +%269 = OpLabel +OpLine %3 68 5 +OpStore %267 %247 +OpLine %3 69 5 +OpLine %3 69 5 +%270 = OpAccessChain %68 %267 %70 +OpStore %270 %266 +OpLine %3 70 12 +%271 = OpAccessChain %68 %267 %70 +%272 = OpLoad %4 %271 +OpStore %264 %272 +OpReturn +OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/spv/wgsl-f16-polyfill.spvasm b/naga/tests/out/spv/wgsl-f16-polyfill.spvasm new file mode 100644 index 00000000000..8b6b111750f --- /dev/null +++ b/naga/tests/out/spv/wgsl-f16-polyfill.spvasm @@ -0,0 +1,675 @@ +; SPIR-V +; Version: 1.1 +; Generator: rspirv +; Bound: 294 +OpCapability Shader +OpCapability Float16 +OpCapability StorageBuffer16BitAccess +OpCapability UniformAndStorageBuffer16BitAccess +OpExtension "SPV_KHR_16bit_storage" +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint Fragment %50 "test_direct" %14 %18 %20 %24 %26 %30 %32 %36 %38 %40 %41 %43 %44 %46 %47 %49 +OpEntryPoint Fragment %140 "test_struct" %112 %115 %117 %120 %122 %125 %127 %130 %132 %133 %134 %135 %136 %137 %138 %139 +OpEntryPoint Fragment %211 "test_copy_input" %183 %186 %188 %191 %193 %196 %198 %201 %203 %204 %205 %206 %207 %208 %209 %210 +OpEntryPoint Fragment %285 "test_return_partial" %264 %267 %269 %272 %274 %277 %279 %282 %284 +OpExecutionMode %50 OriginUpperLeft +OpExecutionMode %140 OriginUpperLeft +OpExecutionMode %211 OriginUpperLeft +OpExecutionMode %285 OriginUpperLeft +%3 = OpString "f16-polyfill.wgsl" +OpSource Unknown 0 %3 "enable f16; + +@fragment +fn test_direct( + @location(0) scalar_f16: f16, + @location(1) scalar_f32: f32, + @location(2) vec2_f16: vec2, + @location(3) vec2_f32: vec2, + @location(4) vec3_f16: vec3, + @location(5) vec3_f32: vec3, + @location(6) vec4_f16: vec4, + @location(7) vec4_f32: vec4, +) -> F16IO { + var output: F16IO; + output.scalar_f16 = scalar_f16 + 1.0h; + output.scalar_f32 = scalar_f32 + 1.0; + output.vec2_f16 = vec2_f16 + vec2(1.0h); + output.vec2_f32 = vec2_f32 + vec2(1.0); + output.vec3_f16 = vec3_f16 + vec3(1.0h); + output.vec3_f32 = vec3_f32 + vec3(1.0); + output.vec4_f16 = vec4_f16 + vec4(1.0h); + output.vec4_f32 = vec4_f32 + vec4(1.0); + return output; +} + +struct F16IO { + @location(0) scalar_f16: f16, + @location(1) scalar_f32: f32, + @location(2) vec2_f16: vec2, + @location(3) vec2_f32: vec2, + @location(4) vec3_f16: vec3, + @location(5) vec3_f32: vec3, + @location(6) vec4_f16: vec4, + @location(7) vec4_f32: vec4, +} + +@fragment +fn test_struct(input: F16IO) -> F16IO { + var output: F16IO; + output.scalar_f16 = input.scalar_f16 + 1.0h; + output.scalar_f32 = input.scalar_f32 + 1.0; + output.vec2_f16 = input.vec2_f16 + vec2(1.0h); + output.vec2_f32 = input.vec2_f32 + vec2(1.0); + output.vec3_f16 = input.vec3_f16 + vec3(1.0h); + output.vec3_f32 = input.vec3_f32 + vec3(1.0); + output.vec4_f16 = input.vec4_f16 + vec4(1.0h); + output.vec4_f32 = input.vec4_f32 + vec4(1.0); + return output; +} + +@fragment +fn test_copy_input(input_original: F16IO) -> F16IO { + var input = input_original; + var output: F16IO; + output.scalar_f16 = input.scalar_f16 + 1.0h; + output.scalar_f32 = input.scalar_f32 + 1.0; + output.vec2_f16 = input.vec2_f16 + vec2(1.0h); + output.vec2_f32 = input.vec2_f32 + vec2(1.0); + output.vec3_f16 = input.vec3_f16 + vec3(1.0h); + output.vec3_f32 = input.vec3_f32 + vec3(1.0); + output.vec4_f16 = input.vec4_f16 + vec4(1.0h); + output.vec4_f32 = input.vec4_f32 + vec4(1.0); + return output; +} + +@fragment +fn test_return_partial(input_original: F16IO) -> @location(0) f16 { + var input = input_original; + input.scalar_f16 = 0.0h; + return input.scalar_f16; +} +" +OpMemberName %12 0 "scalar_f16" +OpMemberName %12 1 "scalar_f32" +OpMemberName %12 2 "vec2_f16" +OpMemberName %12 3 "vec2_f32" +OpMemberName %12 4 "vec3_f16" +OpMemberName %12 5 "vec3_f32" +OpMemberName %12 6 "vec4_f16" +OpMemberName %12 7 "vec4_f32" +OpName %12 "F16IO" +OpName %14 "scalar_f16" +OpName %18 "scalar_f32" +OpName %20 "vec2_f16" +OpName %24 "vec2_f32" +OpName %26 "vec3_f16" +OpName %30 "vec3_f32" +OpName %32 "vec4_f16" +OpName %36 "vec4_f32" +OpName %38 "scalar_f16" +OpName %40 "scalar_f32" +OpName %41 "vec2_f16" +OpName %43 "vec2_f32" +OpName %44 "vec3_f16" +OpName %46 "vec3_f32" +OpName %47 "vec4_f16" +OpName %49 "vec4_f32" +OpName %50 "test_direct" +OpName %60 "output" +OpName %112 "scalar_f16" +OpName %115 "scalar_f32" +OpName %117 "vec2_f16" +OpName %120 "vec2_f32" +OpName %122 "vec3_f16" +OpName %125 "vec3_f32" +OpName %127 "vec4_f16" +OpName %130 "vec4_f32" +OpName %132 "scalar_f16" +OpName %133 "scalar_f32" +OpName %134 "vec2_f16" +OpName %135 "vec2_f32" +OpName %136 "vec3_f16" +OpName %137 "vec3_f32" +OpName %138 "vec4_f16" +OpName %139 "vec4_f32" +OpName %140 "test_struct" +OpName %141 "output" +OpName %183 "scalar_f16" +OpName %186 "scalar_f32" +OpName %188 "vec2_f16" +OpName %191 "vec2_f32" +OpName %193 "vec3_f16" +OpName %196 "vec3_f32" +OpName %198 "vec4_f16" +OpName %201 "vec4_f32" +OpName %203 "scalar_f16" +OpName %204 "scalar_f32" +OpName %205 "vec2_f16" +OpName %206 "vec2_f32" +OpName %207 "vec3_f16" +OpName %208 "vec3_f32" +OpName %209 "vec4_f16" +OpName %210 "vec4_f32" +OpName %211 "test_copy_input" +OpName %212 "input" +OpName %214 "output" +OpName %264 "scalar_f16" +OpName %267 "scalar_f32" +OpName %269 "vec2_f16" +OpName %272 "vec2_f32" +OpName %274 "vec3_f16" +OpName %277 "vec3_f32" +OpName %279 "vec4_f16" +OpName %282 "vec4_f32" +OpName %285 "test_return_partial" +OpName %287 "input" +OpMemberDecorate %12 0 Offset 0 +OpMemberDecorate %12 1 Offset 4 +OpMemberDecorate %12 2 Offset 8 +OpMemberDecorate %12 3 Offset 16 +OpMemberDecorate %12 4 Offset 24 +OpMemberDecorate %12 5 Offset 32 +OpMemberDecorate %12 6 Offset 48 +OpMemberDecorate %12 7 Offset 64 +OpDecorate %14 Location 0 +OpDecorate %18 Location 1 +OpDecorate %20 Location 2 +OpDecorate %24 Location 3 +OpDecorate %26 Location 4 +OpDecorate %30 Location 5 +OpDecorate %32 Location 6 +OpDecorate %36 Location 7 +OpDecorate %38 Location 0 +OpDecorate %40 Location 1 +OpDecorate %41 Location 2 +OpDecorate %43 Location 3 +OpDecorate %44 Location 4 +OpDecorate %46 Location 5 +OpDecorate %47 Location 6 +OpDecorate %49 Location 7 +OpDecorate %112 Location 0 +OpDecorate %115 Location 1 +OpDecorate %117 Location 2 +OpDecorate %120 Location 3 +OpDecorate %122 Location 4 +OpDecorate %125 Location 5 +OpDecorate %127 Location 6 +OpDecorate %130 Location 7 +OpDecorate %132 Location 0 +OpDecorate %133 Location 1 +OpDecorate %134 Location 2 +OpDecorate %135 Location 3 +OpDecorate %136 Location 4 +OpDecorate %137 Location 5 +OpDecorate %138 Location 6 +OpDecorate %139 Location 7 +OpDecorate %183 Location 0 +OpDecorate %186 Location 1 +OpDecorate %188 Location 2 +OpDecorate %191 Location 3 +OpDecorate %193 Location 4 +OpDecorate %196 Location 5 +OpDecorate %198 Location 6 +OpDecorate %201 Location 7 +OpDecorate %203 Location 0 +OpDecorate %204 Location 1 +OpDecorate %205 Location 2 +OpDecorate %206 Location 3 +OpDecorate %207 Location 4 +OpDecorate %208 Location 5 +OpDecorate %209 Location 6 +OpDecorate %210 Location 7 +OpDecorate %264 Location 0 +OpDecorate %267 Location 1 +OpDecorate %269 Location 2 +OpDecorate %272 Location 3 +OpDecorate %274 Location 4 +OpDecorate %277 Location 5 +OpDecorate %279 Location 6 +OpDecorate %282 Location 7 +OpDecorate %284 Location 0 +%2 = OpTypeVoid +%4 = OpTypeFloat 16 +%5 = OpTypeFloat 32 +%6 = OpTypeVector %4 2 +%7 = OpTypeVector %5 2 +%8 = OpTypeVector %4 3 +%9 = OpTypeVector %5 3 +%10 = OpTypeVector %4 4 +%11 = OpTypeVector %5 4 +%12 = OpTypeStruct %4 %5 %6 %7 %8 %9 %10 %11 +%15 = OpTypePointer Input %5 +%14 = OpVariable %15 Input +%18 = OpVariable %15 Input +%21 = OpTypePointer Input %7 +%20 = OpVariable %21 Input +%24 = OpVariable %21 Input +%27 = OpTypePointer Input %9 +%26 = OpVariable %27 Input +%30 = OpVariable %27 Input +%33 = OpTypePointer Input %11 +%32 = OpVariable %33 Input +%36 = OpVariable %33 Input +%39 = OpTypePointer Output %5 +%38 = OpVariable %39 Output +%40 = OpVariable %39 Output +%42 = OpTypePointer Output %7 +%41 = OpVariable %42 Output +%43 = OpVariable %42 Output +%45 = OpTypePointer Output %9 +%44 = OpVariable %45 Output +%46 = OpVariable %45 Output +%48 = OpTypePointer Output %11 +%47 = OpVariable %48 Output +%49 = OpVariable %48 Output +%51 = OpTypeFunction %2 +%52 = OpConstant %4 0.000000000000000000000000000000000000000021524 +%53 = OpConstant %5 1 +%54 = OpConstantComposite %6 %52 %52 +%55 = OpConstantComposite %7 %53 %53 +%56 = OpConstantComposite %8 %52 %52 %52 +%57 = OpConstantComposite %9 %53 %53 %53 +%58 = OpConstantComposite %10 %52 %52 %52 %52 +%59 = OpConstantComposite %11 %53 %53 %53 %53 +%61 = OpTypePointer Function %12 +%62 = OpConstantNull %12 +%64 = OpTypePointer Function %4 +%67 = OpTypeInt 32 0 +%66 = OpConstant %67 0 +%69 = OpTypePointer Function %5 +%71 = OpConstant %67 1 +%73 = OpTypePointer Function %6 +%75 = OpConstant %67 2 +%77 = OpTypePointer Function %7 +%79 = OpConstant %67 3 +%81 = OpTypePointer Function %8 +%83 = OpConstant %67 4 +%85 = OpTypePointer Function %9 +%87 = OpConstant %67 5 +%89 = OpTypePointer Function %10 +%91 = OpConstant %67 6 +%93 = OpTypePointer Function %11 +%95 = OpConstant %67 7 +%112 = OpVariable %15 Input +%115 = OpVariable %15 Input +%117 = OpVariable %21 Input +%120 = OpVariable %21 Input +%122 = OpVariable %27 Input +%125 = OpVariable %27 Input +%127 = OpVariable %33 Input +%130 = OpVariable %33 Input +%132 = OpVariable %39 Output +%133 = OpVariable %39 Output +%134 = OpVariable %42 Output +%135 = OpVariable %42 Output +%136 = OpVariable %45 Output +%137 = OpVariable %45 Output +%138 = OpVariable %48 Output +%139 = OpVariable %48 Output +%142 = OpConstantNull %12 +%183 = OpVariable %15 Input +%186 = OpVariable %15 Input +%188 = OpVariable %21 Input +%191 = OpVariable %21 Input +%193 = OpVariable %27 Input +%196 = OpVariable %27 Input +%198 = OpVariable %33 Input +%201 = OpVariable %33 Input +%203 = OpVariable %39 Output +%204 = OpVariable %39 Output +%205 = OpVariable %42 Output +%206 = OpVariable %42 Output +%207 = OpVariable %45 Output +%208 = OpVariable %45 Output +%209 = OpVariable %48 Output +%210 = OpVariable %48 Output +%213 = OpConstantNull %12 +%215 = OpConstantNull %12 +%264 = OpVariable %15 Input +%267 = OpVariable %15 Input +%269 = OpVariable %21 Input +%272 = OpVariable %21 Input +%274 = OpVariable %27 Input +%277 = OpVariable %27 Input +%279 = OpVariable %33 Input +%282 = OpVariable %33 Input +%284 = OpVariable %39 Output +%286 = OpConstant %4 0 +%288 = OpConstantNull %12 +%50 = OpFunction %2 None %51 +%13 = OpLabel +%60 = OpVariable %61 Function %62 +%16 = OpLoad %5 %14 +%17 = OpFConvert %4 %16 +%19 = OpLoad %5 %18 +%22 = OpLoad %7 %20 +%23 = OpFConvert %6 %22 +%25 = OpLoad %7 %24 +%28 = OpLoad %9 %26 +%29 = OpFConvert %8 %28 +%31 = OpLoad %9 %30 +%34 = OpLoad %11 %32 +%35 = OpFConvert %10 %34 +%37 = OpLoad %11 %36 +OpBranch %63 +%63 = OpLabel +OpLine %3 15 5 +OpLine %3 15 25 +%65 = OpFAdd %4 %17 %52 +OpLine %3 15 5 +%68 = OpAccessChain %64 %60 %66 +OpStore %68 %65 +OpLine %3 16 5 +OpLine %3 16 25 +%70 = OpFAdd %5 %19 %53 +OpLine %3 16 5 +%72 = OpAccessChain %69 %60 %71 +OpStore %72 %70 +OpLine %3 17 5 +OpLine %3 17 23 +%74 = OpFAdd %6 %23 %54 +OpLine %3 17 5 +%76 = OpAccessChain %73 %60 %75 +OpStore %76 %74 +OpLine %3 18 5 +OpLine %3 18 34 +OpLine %3 18 23 +%78 = OpFAdd %7 %25 %55 +OpLine %3 18 5 +%80 = OpAccessChain %77 %60 %79 +OpStore %80 %78 +OpLine %3 19 5 +OpLine %3 19 23 +%82 = OpFAdd %8 %29 %56 +OpLine %3 19 5 +%84 = OpAccessChain %81 %60 %83 +OpStore %84 %82 +OpLine %3 20 5 +OpLine %3 20 34 +OpLine %3 20 23 +%86 = OpFAdd %9 %31 %57 +OpLine %3 20 5 +%88 = OpAccessChain %85 %60 %87 +OpStore %88 %86 +OpLine %3 21 5 +OpLine %3 21 23 +%90 = OpFAdd %10 %35 %58 +OpLine %3 21 5 +%92 = OpAccessChain %89 %60 %91 +OpStore %92 %90 +OpLine %3 22 5 +OpLine %3 22 34 +OpLine %3 22 23 +%94 = OpFAdd %11 %37 %59 +OpLine %3 22 5 +%96 = OpAccessChain %93 %60 %95 +OpStore %96 %94 +OpLine %3 1 1 +%97 = OpLoad %12 %60 +%98 = OpCompositeExtract %4 %97 0 +%99 = OpFConvert %5 %98 +OpStore %38 %99 +%100 = OpCompositeExtract %5 %97 1 +OpStore %40 %100 +%101 = OpCompositeExtract %6 %97 2 +%102 = OpFConvert %7 %101 +OpStore %41 %102 +%103 = OpCompositeExtract %7 %97 3 +OpStore %43 %103 +%104 = OpCompositeExtract %8 %97 4 +%105 = OpFConvert %9 %104 +OpStore %44 %105 +%106 = OpCompositeExtract %9 %97 5 +OpStore %46 %106 +%107 = OpCompositeExtract %10 %97 6 +%108 = OpFConvert %11 %107 +OpStore %47 %108 +%109 = OpCompositeExtract %11 %97 7 +OpStore %49 %109 +OpReturn +OpFunctionEnd +%140 = OpFunction %2 None %51 +%110 = OpLabel +%141 = OpVariable %61 Function %142 +%113 = OpLoad %5 %112 +%114 = OpFConvert %4 %113 +%116 = OpLoad %5 %115 +%118 = OpLoad %7 %117 +%119 = OpFConvert %6 %118 +%121 = OpLoad %7 %120 +%123 = OpLoad %9 %122 +%124 = OpFConvert %8 %123 +%126 = OpLoad %9 %125 +%128 = OpLoad %11 %127 +%129 = OpFConvert %10 %128 +%131 = OpLoad %11 %130 +%111 = OpCompositeConstruct %12 %114 %116 %119 %121 %124 %126 %129 %131 +OpBranch %143 +%143 = OpLabel +OpLine %3 40 5 +%144 = OpCompositeExtract %4 %111 0 +OpLine %3 40 25 +%145 = OpFAdd %4 %144 %52 +OpLine %3 40 5 +%146 = OpAccessChain %64 %141 %66 +OpStore %146 %145 +OpLine %3 41 5 +%147 = OpCompositeExtract %5 %111 1 +OpLine %3 41 25 +%148 = OpFAdd %5 %147 %53 +OpLine %3 41 5 +%149 = OpAccessChain %69 %141 %71 +OpStore %149 %148 +OpLine %3 42 5 +%150 = OpCompositeExtract %6 %111 2 +OpLine %3 42 23 +%151 = OpFAdd %6 %150 %54 +OpLine %3 42 5 +%152 = OpAccessChain %73 %141 %75 +OpStore %152 %151 +OpLine %3 43 5 +%153 = OpCompositeExtract %7 %111 3 +OpLine %3 43 40 +OpLine %3 43 23 +%154 = OpFAdd %7 %153 %55 +OpLine %3 43 5 +%155 = OpAccessChain %77 %141 %79 +OpStore %155 %154 +OpLine %3 44 5 +%156 = OpCompositeExtract %8 %111 4 +OpLine %3 44 23 +%157 = OpFAdd %8 %156 %56 +OpLine %3 44 5 +%158 = OpAccessChain %81 %141 %83 +OpStore %158 %157 +OpLine %3 45 5 +%159 = OpCompositeExtract %9 %111 5 +OpLine %3 45 40 +OpLine %3 45 23 +%160 = OpFAdd %9 %159 %57 +OpLine %3 45 5 +%161 = OpAccessChain %85 %141 %87 +OpStore %161 %160 +OpLine %3 46 5 +%162 = OpCompositeExtract %10 %111 6 +OpLine %3 46 23 +%163 = OpFAdd %10 %162 %58 +OpLine %3 46 5 +%164 = OpAccessChain %89 %141 %91 +OpStore %164 %163 +OpLine %3 47 5 +%165 = OpCompositeExtract %11 %111 7 +OpLine %3 47 40 +OpLine %3 47 23 +%166 = OpFAdd %11 %165 %59 +OpLine %3 47 5 +%167 = OpAccessChain %93 %141 %95 +OpStore %167 %166 +OpLine %3 1 1 +%168 = OpLoad %12 %141 +%169 = OpCompositeExtract %4 %168 0 +%170 = OpFConvert %5 %169 +OpStore %132 %170 +%171 = OpCompositeExtract %5 %168 1 +OpStore %133 %171 +%172 = OpCompositeExtract %6 %168 2 +%173 = OpFConvert %7 %172 +OpStore %134 %173 +%174 = OpCompositeExtract %7 %168 3 +OpStore %135 %174 +%175 = OpCompositeExtract %8 %168 4 +%176 = OpFConvert %9 %175 +OpStore %136 %176 +%177 = OpCompositeExtract %9 %168 5 +OpStore %137 %177 +%178 = OpCompositeExtract %10 %168 6 +%179 = OpFConvert %11 %178 +OpStore %138 %179 +%180 = OpCompositeExtract %11 %168 7 +OpStore %139 %180 +OpReturn +OpFunctionEnd +%211 = OpFunction %2 None %51 +%181 = OpLabel +%212 = OpVariable %61 Function %213 +%214 = OpVariable %61 Function %215 +%184 = OpLoad %5 %183 +%185 = OpFConvert %4 %184 +%187 = OpLoad %5 %186 +%189 = OpLoad %7 %188 +%190 = OpFConvert %6 %189 +%192 = OpLoad %7 %191 +%194 = OpLoad %9 %193 +%195 = OpFConvert %8 %194 +%197 = OpLoad %9 %196 +%199 = OpLoad %11 %198 +%200 = OpFConvert %10 %199 +%202 = OpLoad %11 %201 +%182 = OpCompositeConstruct %12 %185 %187 %190 %192 %195 %197 %200 %202 +OpBranch %216 +%216 = OpLabel +OpLine %3 53 5 +OpStore %212 %182 +OpLine %3 55 5 +%217 = OpAccessChain %64 %212 %66 +%218 = OpLoad %4 %217 +OpLine %3 55 25 +%219 = OpFAdd %4 %218 %52 +OpLine %3 55 5 +%220 = OpAccessChain %64 %214 %66 +OpStore %220 %219 +OpLine %3 56 5 +%221 = OpAccessChain %69 %212 %71 +%222 = OpLoad %5 %221 +OpLine %3 56 25 +%223 = OpFAdd %5 %222 %53 +OpLine %3 56 5 +%224 = OpAccessChain %69 %214 %71 +OpStore %224 %223 +OpLine %3 57 5 +%225 = OpAccessChain %73 %212 %75 +%226 = OpLoad %6 %225 +OpLine %3 57 23 +%227 = OpFAdd %6 %226 %54 +OpLine %3 57 5 +%228 = OpAccessChain %73 %214 %75 +OpStore %228 %227 +OpLine %3 58 5 +%229 = OpAccessChain %77 %212 %79 +%230 = OpLoad %7 %229 +OpLine %3 58 40 +OpLine %3 58 23 +%231 = OpFAdd %7 %230 %55 +OpLine %3 58 5 +%232 = OpAccessChain %77 %214 %79 +OpStore %232 %231 +OpLine %3 59 5 +%233 = OpAccessChain %81 %212 %83 +%234 = OpLoad %8 %233 +OpLine %3 59 23 +%235 = OpFAdd %8 %234 %56 +OpLine %3 59 5 +%236 = OpAccessChain %81 %214 %83 +OpStore %236 %235 +OpLine %3 60 5 +%237 = OpAccessChain %85 %212 %87 +%238 = OpLoad %9 %237 +OpLine %3 60 40 +OpLine %3 60 23 +%239 = OpFAdd %9 %238 %57 +OpLine %3 60 5 +%240 = OpAccessChain %85 %214 %87 +OpStore %240 %239 +OpLine %3 61 5 +%241 = OpAccessChain %89 %212 %91 +%242 = OpLoad %10 %241 +OpLine %3 61 23 +%243 = OpFAdd %10 %242 %58 +OpLine %3 61 5 +%244 = OpAccessChain %89 %214 %91 +OpStore %244 %243 +OpLine %3 62 5 +%245 = OpAccessChain %93 %212 %95 +%246 = OpLoad %11 %245 +OpLine %3 62 40 +OpLine %3 62 23 +%247 = OpFAdd %11 %246 %59 +OpLine %3 62 5 +%248 = OpAccessChain %93 %214 %95 +OpStore %248 %247 +OpLine %3 1 1 +%249 = OpLoad %12 %214 +%250 = OpCompositeExtract %4 %249 0 +%251 = OpFConvert %5 %250 +OpStore %203 %251 +%252 = OpCompositeExtract %5 %249 1 +OpStore %204 %252 +%253 = OpCompositeExtract %6 %249 2 +%254 = OpFConvert %7 %253 +OpStore %205 %254 +%255 = OpCompositeExtract %7 %249 3 +OpStore %206 %255 +%256 = OpCompositeExtract %8 %249 4 +%257 = OpFConvert %9 %256 +OpStore %207 %257 +%258 = OpCompositeExtract %9 %249 5 +OpStore %208 %258 +%259 = OpCompositeExtract %10 %249 6 +%260 = OpFConvert %11 %259 +OpStore %209 %260 +%261 = OpCompositeExtract %11 %249 7 +OpStore %210 %261 +OpReturn +OpFunctionEnd +%285 = OpFunction %2 None %51 +%262 = OpLabel +%287 = OpVariable %61 Function %288 +%265 = OpLoad %5 %264 +%266 = OpFConvert %4 %265 +%268 = OpLoad %5 %267 +%270 = OpLoad %7 %269 +%271 = OpFConvert %6 %270 +%273 = OpLoad %7 %272 +%275 = OpLoad %9 %274 +%276 = OpFConvert %8 %275 +%278 = OpLoad %9 %277 +%280 = OpLoad %11 %279 +%281 = OpFConvert %10 %280 +%283 = OpLoad %11 %282 +%263 = OpCompositeConstruct %12 %266 %268 %271 %273 %276 %278 %281 %283 +OpBranch %289 +%289 = OpLabel +OpLine %3 68 5 +OpStore %287 %263 +OpLine %3 69 5 +OpLine %3 69 5 +%290 = OpAccessChain %64 %287 %66 +OpStore %290 %286 +OpLine %3 70 12 +%291 = OpAccessChain %64 %287 %66 +%292 = OpLoad %4 %291 +%293 = OpFConvert %5 %292 +OpStore %284 %293 +OpReturn +OpFunctionEnd \ No newline at end of file diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs index 67428309541..8c3fa7d00dc 100644 --- a/wgpu-hal/src/vulkan/adapter.rs +++ b/wgpu-hal/src/vulkan/adapter.rs @@ -226,7 +226,7 @@ impl PhysicalDeviceFeatures { /// [`Adapter::required_device_extensions`]: super::Adapter::required_device_extensions fn from_extensions_and_requested_features( phd_capabilities: &PhysicalDeviceProperties, - _phd_features: &PhysicalDeviceFeatures, + phd_features: &PhysicalDeviceFeatures, enabled_extensions: &[&'static CStr], requested_features: wgt::Features, downlevel_flags: wgt::DownlevelFlags, @@ -396,10 +396,17 @@ impl PhysicalDeviceFeatures { _ => None, }, _16bit_storage: if requested_features.contains(wgt::Features::SHADER_F16) { + // Check if the device actually supports storage_input_output16 + let storage_input_output16_supported = phd_features + ._16bit_storage + .as_ref() + .map(|features| features.storage_input_output16 != 0) + .unwrap_or(false); + Some( vk::PhysicalDevice16BitStorageFeatures::default() .storage_buffer16_bit_access(true) - .storage_input_output16(true) + .storage_input_output16(storage_input_output16_supported) .uniform_and_storage_buffer16_bit_access(true), ) } else { @@ -736,12 +743,12 @@ impl PhysicalDeviceFeatures { if let (Some(ref f16_i8), Some(ref bit16)) = (self.shader_float16_int8, self._16bit_storage) { + // Note storage_input_output16 is not required, we polyfill f16 I/O using f32 types when this capability is not available features.set( F::SHADER_F16, f16_i8.shader_float16 != 0 && bit16.storage_buffer16_bit_access != 0 - && bit16.uniform_and_storage_buffer16_bit_access != 0 - && bit16.storage_input_output16 != 0, + && bit16.uniform_and_storage_buffer16_bit_access != 0, ); } @@ -2109,6 +2116,15 @@ impl super::Adapter { spv::ZeroInitializeWorkgroupMemoryMode::Polyfill }, force_loop_bounding: true, + use_storage_input_output_16: features.contains(wgt::Features::SHADER_F16) && { + // Check if the device actually supports storage_input_output16 + let phd_features = self.physical_device_features(enabled_extensions, features); + phd_features + ._16bit_storage + .as_ref() + .map(|storage_features| storage_features.storage_input_output16 != 0) + .unwrap_or(false) + }, // We need to build this separately for each invocation, so just default it out here binding_map: BTreeMap::default(), debug_info: None, From e9d75c3663d97e4545007ea083f6f40477d06a35 Mon Sep 17 00:00:00 2001 From: Erich Gubler Date: Wed, 16 Jul 2025 15:56:15 -0400 Subject: [PATCH 09/20] fixup! [naga spv-out] Add f16 io polyfill --- naga/src/back/spv/f16_polyfill.rs | 11 ++++++----- wgpu-hal/src/vulkan/adapter.rs | 3 ++- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/naga/src/back/spv/f16_polyfill.rs b/naga/src/back/spv/f16_polyfill.rs index abcdb2092f2..e1f0e5bd224 100644 --- a/naga/src/back/spv/f16_polyfill.rs +++ b/naga/src/back/spv/f16_polyfill.rs @@ -1,17 +1,18 @@ /*! -This module provides functionality polyfills f16 input/output variables -when the StorageInputOutput16 capability is not available or disabled. +This module provides functionality polyfills `f16` input/output variables when the +`StorageInputOutput16` capability is not available or disabled. It works by: -1. Declaring f16 I/O variables as f32 in SPIR-V -2. Converting between f16 and f32 at runtime using OpFConvert + +1. Declaring `f16` I/O variables as `f32` in SPIR-V +2. Converting between `f16` and `f32` at runtime using `OpFConvert` 3. Maintaining mappings to track which variables need conversion */ use crate::back::spv::{Instruction, LocalType, NumericType, Word}; use alloc::vec::Vec; -/// Manages f16 I/O polyfill state and operations. +/// Manages `f16` I/O polyfill state and operations. #[derive(Default)] pub(super) struct F16IoPolyfill { use_native: bool, diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs index 8c3fa7d00dc..49ecbad564b 100644 --- a/wgpu-hal/src/vulkan/adapter.rs +++ b/wgpu-hal/src/vulkan/adapter.rs @@ -743,7 +743,8 @@ impl PhysicalDeviceFeatures { if let (Some(ref f16_i8), Some(ref bit16)) = (self.shader_float16_int8, self._16bit_storage) { - // Note storage_input_output16 is not required, we polyfill f16 I/O using f32 types when this capability is not available + // Note `storage_input_output16` is not required, we polyfill `f16` I/O using `f32` + // types when this capability is not available features.set( F::SHADER_F16, f16_i8.shader_float16 != 0 From a8cddd9590b07ee0b04f683d42ae20beea7aa2b9 Mon Sep 17 00:00:00 2001 From: Erich Gubler Date: Wed, 16 Jul 2025 17:34:48 -0400 Subject: [PATCH 10/20] fixup! [naga spv-out] Add f16 io polyfill --- naga/src/back/spv/f16_polyfill.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/naga/src/back/spv/f16_polyfill.rs b/naga/src/back/spv/f16_polyfill.rs index e1f0e5bd224..9ce6d0fe39f 100644 --- a/naga/src/back/spv/f16_polyfill.rs +++ b/naga/src/back/spv/f16_polyfill.rs @@ -14,7 +14,7 @@ use alloc::vec::Vec; /// Manages `f16` I/O polyfill state and operations. #[derive(Default)] -pub(super) struct F16IoPolyfill { +pub(in crate::back::spv) struct F16IoPolyfill { use_native: bool, variable_map: crate::FastHashMap, } From c5e2ad36bb444fb2c28fcb221e8b13c5f9b34a25 Mon Sep 17 00:00:00 2001 From: cryvosh Date: Thu, 7 Aug 2025 19:53:12 -0400 Subject: [PATCH 11/20] Fix merge oops --- wgpu-hal/src/vulkan/adapter.rs | 48 +++++++++++++++++++++++++--------- 1 file changed, 36 insertions(+), 12 deletions(-) diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs index 49ecbad564b..2ff2a65ca50 100644 --- a/wgpu-hal/src/vulkan/adapter.rs +++ b/wgpu-hal/src/vulkan/adapter.rs @@ -815,7 +815,7 @@ impl PhysicalDeviceFeatures { features.set( F::EXPERIMENTAL_RAY_QUERY // Although this doesn't really require ray queries, it does not make sense to be enabled if acceleration structures - // aren't enabled. + // aren't enabled. | F::EXTENDED_ACCELERATION_STRUCTURE_VERTEX_FORMATS, supports_acceleration_structures && caps.supports_extension(khr::ray_query::NAME), ); @@ -940,7 +940,7 @@ pub struct PhysicalDeviceProperties { /// Additional `vk::PhysicalDevice` properties from the /// `VK_EXT_mesh_shader` extension. - _mesh_shader: Option>, + mesh_shader: Option>, /// The device API version. /// @@ -1168,14 +1168,29 @@ impl PhysicalDeviceProperties { let max_compute_workgroups_per_dimension = limits.max_compute_work_group_count[0] .min(limits.max_compute_work_group_count[1]) .min(limits.max_compute_work_group_count[2]); + let ( + max_task_workgroup_total_count, + max_task_workgroups_per_dimension, + max_mesh_multiview_count, + max_mesh_output_layers, + ) = match self.mesh_shader { + Some(m) => ( + m.max_task_work_group_total_count, + m.max_task_work_group_count.into_iter().min().unwrap(), + m.max_mesh_multiview_view_count, + m.max_mesh_output_layers, + ), + None => (0, 0, 0, 0), + }; - // Prevent very large buffers on mesa and most android devices. + // Prevent very large buffers on mesa and most android devices, and in all cases + // don't risk confusing JS by exceeding the range of a double. let is_nvidia = self.properties.vendor_id == crate::auxil::db::nvidia::VENDOR; let max_buffer_size = if (cfg!(target_os = "linux") || cfg!(target_os = "android")) && !is_nvidia { i32::MAX as u64 } else { - u64::MAX + 1u64 << 52 }; let mut max_binding_array_elements = 0; @@ -1275,6 +1290,12 @@ impl PhysicalDeviceProperties { max_compute_workgroups_per_dimension, max_buffer_size, max_non_sampler_bindings: u32::MAX, + + max_task_workgroup_total_count, + max_task_workgroups_per_dimension, + max_mesh_multiview_count, + max_mesh_output_layers, + max_blas_primitive_count, max_blas_geometry_count, max_tlas_instance_count, @@ -1409,7 +1430,7 @@ impl super::InstanceShared { if supports_mesh_shader { let next = capabilities - ._mesh_shader + .mesh_shader .insert(vk::PhysicalDeviceMeshShaderPropertiesEXT::default()); properties2 = properties2.push_next(next); } @@ -1702,7 +1723,7 @@ impl super::Instance { }; let queue_flags = queue_families.first()?.queue_flags; if !queue_flags.contains(vk::QueueFlags::GRAPHICS) { - log::warn!("The first queue only exposes {:?}", queue_flags); + log::warn!("The first queue only exposes {queue_flags:?}"); return None; } @@ -1830,10 +1851,10 @@ impl super::Adapter { }); if !unsupported_extensions.is_empty() { - log::warn!("Missing extensions: {:?}", unsupported_extensions); + log::warn!("Missing extensions: {unsupported_extensions:?}"); } - log::debug!("Supported extensions: {:?}", supported_extensions); + log::debug!("Supported extensions: {supported_extensions:?}"); supported_extensions } @@ -2179,6 +2200,9 @@ impl super::Adapter { self.private_caps.maximum_samplers, )), memory_allocations_counter: Default::default(), + + texture_identity_factory: super::ResourceIdentityFactory::new(), + texture_view_identity_factory: super::ResourceIdentityFactory::new(), }); let relay_semaphores = super::RelaySemaphores::new(&shared)?; @@ -2527,7 +2551,7 @@ impl crate::Adapter for super::Adapter { Ok(true) => (), Ok(false) => return None, Err(e) => { - log::error!("get_physical_device_surface_support: {}", e); + log::error!("get_physical_device_surface_support: {e}"); return None; } } @@ -2542,7 +2566,7 @@ impl crate::Adapter for super::Adapter { } { Ok(caps) => caps, Err(e) => { - log::error!("get_physical_device_surface_capabilities: {}", e); + log::error!("get_physical_device_surface_capabilities: {e}"); return None; } } @@ -2576,7 +2600,7 @@ impl crate::Adapter for super::Adapter { } { Ok(present_modes) => present_modes, Err(e) => { - log::error!("get_physical_device_surface_present_modes: {}", e); + log::error!("get_physical_device_surface_present_modes: {e}"); // Per definition of `SurfaceCapabilities`, there must be at least one present mode. return None; } @@ -2592,7 +2616,7 @@ impl crate::Adapter for super::Adapter { } { Ok(formats) => formats, Err(e) => { - log::error!("get_physical_device_surface_formats: {}", e); + log::error!("get_physical_device_surface_formats: {e}"); // Per definition of `SurfaceCapabilities`, there must be at least one present format. return None; } From 0389c1b3e60ecb4f87e6172957dfc9cc4cc9ddde Mon Sep 17 00:00:00 2001 From: cryvosh Date: Thu, 7 Aug 2025 20:14:55 -0400 Subject: [PATCH 12/20] Feedback --- naga/src/back/spv/block.rs | 4 ++-- naga/src/back/spv/f16_polyfill.rs | 9 ++++----- naga/src/back/spv/writer.rs | 12 ++++++------ 3 files changed, 12 insertions(+), 13 deletions(-) diff --git a/naga/src/back/spv/block.rs b/naga/src/back/spv/block.rs index b0b061626f6..a69790380fa 100644 --- a/naga/src/back/spv/block.rs +++ b/naga/src/back/spv/block.rs @@ -237,7 +237,7 @@ impl Writer { } }; - if let Some((f32_ty, _)) = self.io_f16_polyfills.get_polyfill_info(res_member.id) { + if let Some(f32_ty) = self.io_f16_polyfills.get_polyfill_info(res_member.id) { let converted = self.id_gen.next(); super::f16_polyfill::F16IoPolyfill::emit_f16_to_f32_conversion( member_value_id, @@ -2325,7 +2325,7 @@ impl BlockContext<'_> { ExpressionPointer::Ready { pointer_id } => { let id = self.gen_id(); - if let Some((f32_ty, _)) = + if let Some(f32_ty) = self.writer.io_f16_polyfills.get_polyfill_info(pointer_id) { block diff --git a/naga/src/back/spv/f16_polyfill.rs b/naga/src/back/spv/f16_polyfill.rs index 9ce6d0fe39f..a982348c83e 100644 --- a/naga/src/back/spv/f16_polyfill.rs +++ b/naga/src/back/spv/f16_polyfill.rs @@ -16,7 +16,7 @@ use alloc::vec::Vec; #[derive(Default)] pub(in crate::back::spv) struct F16IoPolyfill { use_native: bool, - variable_map: crate::FastHashMap, + variable_map: crate::FastHashMap, } impl F16IoPolyfill { @@ -42,12 +42,11 @@ impl F16IoPolyfill { } } - pub fn register_variable(&mut self, variable_id: Word, f32_type_id: Word, f16_type_id: Word) { - self.variable_map - .insert(variable_id, (f32_type_id, f16_type_id)); + pub fn register_variable(&mut self, variable_id: Word, f32_type_id: Word) { + self.variable_map.insert(variable_id, f32_type_id); } - pub fn get_polyfill_info(&self, variable_id: Word) -> Option<(Word, Word)> { + pub fn get_polyfill_info(&self, variable_id: Word) -> Option { self.variable_map.get(&variable_id).copied() } diff --git a/naga/src/back/spv/writer.rs b/naga/src/back/spv/writer.rs index e54cffe19cf..769949b82d0 100644 --- a/naga/src/back/spv/writer.rs +++ b/naga/src/back/spv/writer.rs @@ -734,7 +734,7 @@ impl Writer { iface.varying_ids.push(varying_id); let mut id = self.id_gen.next(); - if let Some((f32_ty, _)) = self.io_f16_polyfills.get_polyfill_info(varying_id) { + if let Some(f32_ty) = self.io_f16_polyfills.get_polyfill_info(varying_id) { prelude .body .push(Instruction::load(f32_ty, id, varying_id, None)); @@ -778,8 +778,8 @@ impl Writer { binding, )?; iface.varying_ids.push(varying_id); - let id = self.id_gen.next(); - if let Some((f32_ty, _)) = + let mut id = self.id_gen.next(); + if let Some(f32_ty) = self.io_f16_polyfills.get_polyfill_info(varying_id) { prelude @@ -792,7 +792,8 @@ impl Writer { converted, &mut prelude.body, ); - constituent_ids.push(converted); + id = converted; + constituent_ids.push(id); } else { prelude .body @@ -1960,9 +1961,8 @@ impl Writer { let f32_type_id = self.get_localtype_id(f32_value_local); let ptr_id = self.get_pointer_type_id(f32_type_id, class); - let f16_type_id = self.get_handle_type_id(ty); self.io_f16_polyfills - .register_variable(id, f32_type_id, f16_type_id); + .register_variable(id, f32_type_id); ptr_id } else { From 7642b6425170b87b475acb6a52a37d302e3aa5ae Mon Sep 17 00:00:00 2001 From: cryvosh Date: Thu, 7 Aug 2025 20:18:42 -0400 Subject: [PATCH 13/20] fmt --- naga/src/back/spv/block.rs | 4 +--- naga/src/back/spv/writer.rs | 7 ++----- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/naga/src/back/spv/block.rs b/naga/src/back/spv/block.rs index a69790380fa..1b20d998c87 100644 --- a/naga/src/back/spv/block.rs +++ b/naga/src/back/spv/block.rs @@ -2325,9 +2325,7 @@ impl BlockContext<'_> { ExpressionPointer::Ready { pointer_id } => { let id = self.gen_id(); - if let Some(f32_ty) = - self.writer.io_f16_polyfills.get_polyfill_info(pointer_id) - { + if let Some(f32_ty) = self.writer.io_f16_polyfills.get_polyfill_info(pointer_id) { block .body .push(Instruction::load(f32_ty, id, pointer_id, None)); diff --git a/naga/src/back/spv/writer.rs b/naga/src/back/spv/writer.rs index 769949b82d0..64a81b67a31 100644 --- a/naga/src/back/spv/writer.rs +++ b/naga/src/back/spv/writer.rs @@ -779,9 +779,7 @@ impl Writer { )?; iface.varying_ids.push(varying_id); let mut id = self.id_gen.next(); - if let Some(f32_ty) = - self.io_f16_polyfills.get_polyfill_info(varying_id) - { + if let Some(f32_ty) = self.io_f16_polyfills.get_polyfill_info(varying_id) { prelude .body .push(Instruction::load(f32_ty, id, varying_id, None)); @@ -1961,8 +1959,7 @@ impl Writer { let f32_type_id = self.get_localtype_id(f32_value_local); let ptr_id = self.get_pointer_type_id(f32_type_id, class); - self.io_f16_polyfills - .register_variable(id, f32_type_id); + self.io_f16_polyfills.register_variable(id, f32_type_id); ptr_id } else { From 8f13d221dae886f3e3f5a8c3a7e77a8db6c01ee3 Mon Sep 17 00:00:00 2001 From: cryvosh Date: Thu, 7 Aug 2025 21:15:32 -0400 Subject: [PATCH 14/20] Cleanup --- naga/src/back/spv/block.rs | 31 ++++------ naga/src/back/spv/f16_polyfill.rs | 14 ++--- naga/src/back/spv/writer.rs | 95 +++++++++++++++++-------------- 3 files changed, 69 insertions(+), 71 deletions(-) diff --git a/naga/src/back/spv/block.rs b/naga/src/back/spv/block.rs index 1b20d998c87..fa6768d3c33 100644 --- a/naga/src/back/spv/block.rs +++ b/naga/src/back/spv/block.rs @@ -237,18 +237,7 @@ impl Writer { } }; - if let Some(f32_ty) = self.io_f16_polyfills.get_polyfill_info(res_member.id) { - let converted = self.id_gen.next(); - super::f16_polyfill::F16IoPolyfill::emit_f16_to_f32_conversion( - member_value_id, - f32_ty, - converted, - body, - ); - body.push(Instruction::store(res_member.id, converted, None)); - } else { - body.push(Instruction::store(res_member.id, member_value_id, None)); - } + self.store_io_with_f16_polyfill(body, res_member.id, member_value_id); match res_member.built_in { Some(crate::BuiltIn::Position { .. }) @@ -2325,16 +2314,16 @@ impl BlockContext<'_> { ExpressionPointer::Ready { pointer_id } => { let id = self.gen_id(); - if let Some(f32_ty) = self.writer.io_f16_polyfills.get_polyfill_info(pointer_id) { - block - .body - .push(Instruction::load(f32_ty, id, pointer_id, None)); - let converted = self.gen_id(); - super::f16_polyfill::F16IoPolyfill::emit_f32_to_f16_conversion( - id, - result_type_id, - converted, + if self + .writer + .io_f16_polyfills + .get_f32_io_type(pointer_id) + .is_some() + { + let converted = self.writer.load_io_with_f16_polyfill( &mut block.body, + pointer_id, + result_type_id, ); return Ok(converted); } diff --git a/naga/src/back/spv/f16_polyfill.rs b/naga/src/back/spv/f16_polyfill.rs index a982348c83e..ee391f7f245 100644 --- a/naga/src/back/spv/f16_polyfill.rs +++ b/naga/src/back/spv/f16_polyfill.rs @@ -16,14 +16,14 @@ use alloc::vec::Vec; #[derive(Default)] pub(in crate::back::spv) struct F16IoPolyfill { use_native: bool, - variable_map: crate::FastHashMap, + io_var_to_f32_type: crate::FastHashMap, } impl F16IoPolyfill { pub fn new(use_storage_input_output_16: bool) -> Self { Self { use_native: use_storage_input_output_16, - variable_map: crate::FastHashMap::default(), + io_var_to_f32_type: crate::FastHashMap::default(), } } @@ -42,12 +42,12 @@ impl F16IoPolyfill { } } - pub fn register_variable(&mut self, variable_id: Word, f32_type_id: Word) { - self.variable_map.insert(variable_id, f32_type_id); + pub fn register_io_var(&mut self, variable_id: Word, f32_type_id: Word) { + self.io_var_to_f32_type.insert(variable_id, f32_type_id); } - pub fn get_polyfill_info(&self, variable_id: Word) -> Option { - self.variable_map.get(&variable_id).copied() + pub fn get_f32_io_type(&self, variable_id: Word) -> Option { + self.io_var_to_f32_type.get(&variable_id).copied() } pub fn emit_f16_to_f32_conversion( @@ -98,7 +98,7 @@ impl F16IoPolyfill { impl crate::back::spv::recyclable::Recyclable for F16IoPolyfill { fn recycle(mut self) -> Self { - self.variable_map = self.variable_map.recycle(); + self.io_var_to_f32_type = self.io_var_to_f32_type.recycle(); self } } diff --git a/naga/src/back/spv/writer.rs b/naga/src/back/spv/writer.rs index 64a81b67a31..48d13f03c25 100644 --- a/naga/src/back/spv/writer.rs +++ b/naga/src/back/spv/writer.rs @@ -732,28 +732,11 @@ impl Writer { binding, )?; iface.varying_ids.push(varying_id); - let mut id = self.id_gen.next(); - - if let Some(f32_ty) = self.io_f16_polyfills.get_polyfill_info(varying_id) { - prelude - .body - .push(Instruction::load(f32_ty, id, varying_id, None)); - let converted = self.id_gen.next(); - super::f16_polyfill::F16IoPolyfill::emit_f32_to_f16_conversion( - id, - argument_type_id, - converted, - &mut prelude.body, - ); - id = converted; - } else { - prelude.body.push(Instruction::load( - argument_type_id, - id, - varying_id, - None, - )); - } + let id = self.load_io_with_f16_polyfill( + &mut prelude.body, + varying_id, + argument_type_id, + ); if binding == &crate::Binding::BuiltIn(crate::BuiltIn::LocalInvocationId) { local_invocation_id = Some(id); @@ -778,26 +761,9 @@ impl Writer { binding, )?; iface.varying_ids.push(varying_id); - let mut id = self.id_gen.next(); - if let Some(f32_ty) = self.io_f16_polyfills.get_polyfill_info(varying_id) { - prelude - .body - .push(Instruction::load(f32_ty, id, varying_id, None)); - let converted = self.id_gen.next(); - super::f16_polyfill::F16IoPolyfill::emit_f32_to_f16_conversion( - id, - type_id, - converted, - &mut prelude.body, - ); - id = converted; - constituent_ids.push(id); - } else { - prelude - .body - .push(Instruction::load(type_id, id, varying_id, None)); - constituent_ids.push(id); - } + let id = + self.load_io_with_f16_polyfill(&mut prelude.body, varying_id, type_id); + constituent_ids.push(id); if binding == &crate::Binding::BuiltIn(crate::BuiltIn::LocalInvocationId) { local_invocation_id = Some(id); @@ -1959,7 +1925,7 @@ impl Writer { let f32_type_id = self.get_localtype_id(f32_value_local); let ptr_id = self.get_pointer_type_id(f32_type_id, class); - self.io_f16_polyfills.register_variable(id, f32_type_id); + self.io_f16_polyfills.register_io_var(id, f32_type_id); ptr_id } else { @@ -2172,6 +2138,49 @@ impl Writer { Ok(id) } + /// Load an IO variable, converting from `f32` to `f16` if polyfill is active. + /// Returns the id of the loaded value matching `target_type_id`. + pub(super) fn load_io_with_f16_polyfill( + &mut self, + body: &mut Vec, + varying_id: Word, + target_type_id: Word, + ) -> Word { + let tmp = self.id_gen.next(); + if let Some(f32_ty) = self.io_f16_polyfills.get_f32_io_type(varying_id) { + body.push(Instruction::load(f32_ty, tmp, varying_id, None)); + let converted = self.id_gen.next(); + super::f16_polyfill::F16IoPolyfill::emit_f32_to_f16_conversion( + tmp, + target_type_id, + converted, + body, + ); + converted + } else { + body.push(Instruction::load(target_type_id, tmp, varying_id, None)); + tmp + } + } + + /// Store an IO variable, converting from `f16` to `f32` if polyfill is active. + pub(super) fn store_io_with_f16_polyfill( + &mut self, + body: &mut Vec, + varying_id: Word, + value_id: Word, + ) { + if let Some(f32_ty) = self.io_f16_polyfills.get_f32_io_type(varying_id) { + let converted = self.id_gen.next(); + super::f16_polyfill::F16IoPolyfill::emit_f16_to_f32_conversion( + value_id, f32_ty, converted, body, + ); + body.push(Instruction::store(varying_id, converted, None)); + } else { + body.push(Instruction::store(varying_id, value_id, None)); + } + } + fn write_global_variable( &mut self, ir_module: &crate::Module, From 6efd4ba5cc43b4e1dc884e25b253a161d0660ef0 Mon Sep 17 00:00:00 2001 From: cryvosh Date: Thu, 7 Aug 2025 21:23:16 -0400 Subject: [PATCH 15/20] Cleanup comments --- naga/src/back/spv/mod.rs | 8 ++++---- naga/tests/naga/spirv_capabilities.rs | 6 +++--- wgpu-hal/src/vulkan/adapter.rs | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/naga/src/back/spv/mod.rs b/naga/src/back/spv/mod.rs index 53bfd2cd845..ab3abe95515 100644 --- a/naga/src/back/spv/mod.rs +++ b/naga/src/back/spv/mod.rs @@ -774,8 +774,8 @@ pub struct Writer { ray_get_committed_intersection_function: Option, ray_get_candidate_intersection_function: Option, - /// F16 I/O polyfill manager for handling f16 input/output variables - /// when StorageInputOutput16 capability is not available. + /// F16 I/O polyfill manager for handling `f16` input/output variables + /// when `StorageInputOutput16` capability is not available. io_f16_polyfills: f16_polyfill::F16IoPolyfill, } @@ -859,8 +859,8 @@ pub struct Options<'a> { /// to think the number of iterations is bounded. pub force_loop_bounding: bool, - /// Whether to use the StorageInputOutput16 capability for f16 shader I/O. - /// When false, f16 I/O is polyfilled using f32 types with conversions. + /// Whether to use the `StorageInputOutput16` capability for `f16` shader I/O. + /// When false, `f16` I/O is polyfilled using `f32` types with conversions. pub use_storage_input_output_16: bool, pub debug_info: Option>, diff --git a/naga/tests/naga/spirv_capabilities.rs b/naga/tests/naga/spirv_capabilities.rs index aa99298273d..6d0e8153b81 100644 --- a/naga/tests/naga/spirv_capabilities.rs +++ b/naga/tests/naga/spirv_capabilities.rs @@ -253,7 +253,7 @@ fn f16_io_capabilities() { .unwrap(); let caps_native = writer_native.get_capabilities_used(); - // Should include StorageInputOutput16 for native f16 I/O + // Should include `StorageInputOutput16` for native `f16` I/O assert!(caps_native.contains(&Ca::StorageInputOutput16)); // Test polyfill path: use_storage_input_output_16 = false @@ -269,10 +269,10 @@ fn f16_io_capabilities() { .unwrap(); let caps_polyfill = writer_polyfill.get_capabilities_used(); - // Should not include StorageInputOutput16 when polyfilled + // Should not include `StorageInputOutput16` when polyfilled assert!(!caps_polyfill.contains(&Ca::StorageInputOutput16)); - // But should still include the basic f16 capabilities + // But should still include the basic `f16` capabilities assert!(caps_polyfill.contains(&Ca::Float16)); } diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs index 2ff2a65ca50..35d7aafb3f7 100644 --- a/wgpu-hal/src/vulkan/adapter.rs +++ b/wgpu-hal/src/vulkan/adapter.rs @@ -396,7 +396,7 @@ impl PhysicalDeviceFeatures { _ => None, }, _16bit_storage: if requested_features.contains(wgt::Features::SHADER_F16) { - // Check if the device actually supports storage_input_output16 + // Check if the device actually supports `storage_input_output16` let storage_input_output16_supported = phd_features ._16bit_storage .as_ref() From 7098938a18b234a2abb5d0e621b61c598f6062d0 Mon Sep 17 00:00:00 2001 From: cryvosh Date: Thu, 7 Aug 2025 21:59:26 -0400 Subject: [PATCH 16/20] Cleanup capability check --- wgpu-hal/src/vulkan/adapter.rs | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs index 35d7aafb3f7..ce840590c36 100644 --- a/wgpu-hal/src/vulkan/adapter.rs +++ b/wgpu-hal/src/vulkan/adapter.rs @@ -198,6 +198,13 @@ impl PhysicalDeviceFeatures { info } + fn supports_storage_input_output_16(&self) -> bool { + self._16bit_storage + .as_ref() + .map(|features| features.storage_input_output16 != 0) + .unwrap_or(false) + } + /// Create a `PhysicalDeviceFeatures` that can be used to create a logical /// device. /// @@ -396,17 +403,12 @@ impl PhysicalDeviceFeatures { _ => None, }, _16bit_storage: if requested_features.contains(wgt::Features::SHADER_F16) { - // Check if the device actually supports `storage_input_output16` - let storage_input_output16_supported = phd_features - ._16bit_storage - .as_ref() - .map(|features| features.storage_input_output16 != 0) - .unwrap_or(false); - Some( vk::PhysicalDevice16BitStorageFeatures::default() .storage_buffer16_bit_access(true) - .storage_input_output16(storage_input_output16_supported) + .storage_input_output16( + phd_features.supports_storage_input_output_16(), + ) .uniform_and_storage_buffer16_bit_access(true), ) } else { @@ -2138,15 +2140,8 @@ impl super::Adapter { spv::ZeroInitializeWorkgroupMemoryMode::Polyfill }, force_loop_bounding: true, - use_storage_input_output_16: features.contains(wgt::Features::SHADER_F16) && { - // Check if the device actually supports storage_input_output16 - let phd_features = self.physical_device_features(enabled_extensions, features); - phd_features - ._16bit_storage - .as_ref() - .map(|storage_features| storage_features.storage_input_output16 != 0) - .unwrap_or(false) - }, + use_storage_input_output_16: features.contains(wgt::Features::SHADER_F16) + && self.phd_features.supports_storage_input_output_16(), // We need to build this separately for each invocation, so just default it out here binding_map: BTreeMap::default(), debug_info: None, From c2eb34c928b687073fdc8b04c8a13605faad8f93 Mon Sep 17 00:00:00 2001 From: cryvosh Date: Thu, 7 Aug 2025 22:07:43 -0400 Subject: [PATCH 17/20] fmt --- wgpu-hal/src/vulkan/adapter.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs index ce840590c36..a51312a8030 100644 --- a/wgpu-hal/src/vulkan/adapter.rs +++ b/wgpu-hal/src/vulkan/adapter.rs @@ -406,9 +406,7 @@ impl PhysicalDeviceFeatures { Some( vk::PhysicalDevice16BitStorageFeatures::default() .storage_buffer16_bit_access(true) - .storage_input_output16( - phd_features.supports_storage_input_output_16(), - ) + .storage_input_output16(phd_features.supports_storage_input_output_16()) .uniform_and_storage_buffer16_bit_access(true), ) } else { From bd259cdc9708ca4ffe92b642880d002640c72dca Mon Sep 17 00:00:00 2001 From: cryvosh Date: Thu, 7 Aug 2025 23:50:21 -0400 Subject: [PATCH 18/20] Changelog --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0dd31e77071..60d8796a400 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -61,7 +61,8 @@ By @Vecvec in [#7913](https://github.com/gfx-rs/wgpu/pull/7913). #### Naga -Naga now requires that no type be larger than 1 GB. This limit may be lowered in the future; feedback on an appropriate value for the limit is welcome. By @andyleiserson in [#7950](https://github.com/gfx-rs/wgpu/pull/7950). +- Naga now requires that no type be larger than 1 GB. This limit may be lowered in the future; feedback on an appropriate value for the limit is welcome. By @andyleiserson in [#7950](https://github.com/gfx-rs/wgpu/pull/7950). +- Add f16 IO polyfill on Vulkan backend to enable SHADER_F16 use without requiring `storageInputOutput16`. By @cryvosh in [#7884](https://github.com/gfx-rs/wgpu/pull/7884) ### Bug Fixes @@ -161,7 +162,6 @@ By @Vecvec in [#7829](https://github.com/gfx-rs/wgpu/pull/7829). - Diagnostic rendering methods (i.e., `naga::{front::wgsl::ParseError,WithSpan}::emit_error_to_string_with_path`) now accept more types for their `path` argument via a new sealed `AsDiagnosticFilePath` trait. By @atlv24, @bushrat011899, and @ErichDonGubler in [#7643](https://github.com/gfx-rs/wgpu/pull/7643). - Add support for [quad operations](https://www.w3.org/TR/WGSL/#quad-builtin-functions) (requires `SUBGROUP` feature to be enabled). By @dzamkov and @valaphee in [#7683](https://github.com/gfx-rs/wgpu/pull/7683). - Add support for `atomicCompareExchangeWeak` in HLSL and GLSL backends. By @cryvosh in [#7658](https://github.com/gfx-rs/wgpu/pull/7658) -- Add f16 IO polyfill on Vulkan backend to enable SHADER_F16 use without requiring `storageInputOutput16`. By @cryvosh in [#7884](https://github.com/gfx-rs/wgpu/pull/7884) ### General From 520f9878f59376309441ddefe4730e0491ddf7b1 Mon Sep 17 00:00:00 2001 From: cryvosh Date: Sun, 10 Aug 2025 14:41:14 -0400 Subject: [PATCH 19/20] Add test --- naga/src/back/spv/f16_polyfill.rs | 4 +- naga/tests/in/wgsl/f16-native.wgsl | 8 ++ naga/tests/in/wgsl/f16-polyfill.wgsl | 8 ++ naga/tests/out/spv/wgsl-f16-native.spvasm | 110 +++++++++++++++++- naga/tests/out/spv/wgsl-f16-polyfill.spvasm | 118 +++++++++++++++++++- 5 files changed, 242 insertions(+), 6 deletions(-) diff --git a/naga/src/back/spv/f16_polyfill.rs b/naga/src/back/spv/f16_polyfill.rs index ee391f7f245..824490265af 100644 --- a/naga/src/back/spv/f16_polyfill.rs +++ b/naga/src/back/spv/f16_polyfill.rs @@ -1,6 +1,6 @@ /*! -This module provides functionality polyfills `f16` input/output variables when the -`StorageInputOutput16` capability is not available or disabled. +This module provides functionality for polyfilling `f16` input/output variables +when the `StorageInputOutput16` capability is not available or disabled. It works by: diff --git a/naga/tests/in/wgsl/f16-native.wgsl b/naga/tests/in/wgsl/f16-native.wgsl index 2dea0baaa29..fda726df765 100644 --- a/naga/tests/in/wgsl/f16-native.wgsl +++ b/naga/tests/in/wgsl/f16-native.wgsl @@ -69,3 +69,11 @@ fn test_return_partial(input_original: F16IO) -> @location(0) f16 { input.scalar_f16 = 0.0h; return input.scalar_f16; } + +@fragment +fn test_component_access(input: F16IO) -> F16IO { + var output: F16IO; + output.vec2_f16.x = input.vec2_f16.y; + output.vec2_f16.y = input.vec2_f16.x; + return output; +} \ No newline at end of file diff --git a/naga/tests/in/wgsl/f16-polyfill.wgsl b/naga/tests/in/wgsl/f16-polyfill.wgsl index 2dea0baaa29..fda726df765 100644 --- a/naga/tests/in/wgsl/f16-polyfill.wgsl +++ b/naga/tests/in/wgsl/f16-polyfill.wgsl @@ -69,3 +69,11 @@ fn test_return_partial(input_original: F16IO) -> @location(0) f16 { input.scalar_f16 = 0.0h; return input.scalar_f16; } + +@fragment +fn test_component_access(input: F16IO) -> F16IO { + var output: F16IO; + output.vec2_f16.x = input.vec2_f16.y; + output.vec2_f16.y = input.vec2_f16.x; + return output; +} \ No newline at end of file diff --git a/naga/tests/out/spv/wgsl-f16-native.spvasm b/naga/tests/out/spv/wgsl-f16-native.spvasm index 78f1b0d9b58..43210270933 100644 --- a/naga/tests/out/spv/wgsl-f16-native.spvasm +++ b/naga/tests/out/spv/wgsl-f16-native.spvasm @@ -1,7 +1,7 @@ ; SPIR-V ; Version: 1.1 ; Generator: rspirv -; Bound: 273 +; Bound: 318 OpCapability Shader OpCapability Float16 OpCapability StorageBuffer16BitAccess @@ -14,10 +14,12 @@ OpEntryPoint Fragment %54 "test_direct" %14 %17 %20 %23 %26 %29 %32 %35 %38 %40 OpEntryPoint Fragment %136 "test_struct" %112 %114 %116 %118 %120 %122 %124 %126 %128 %129 %130 %131 %132 %133 %134 %135 OpEntryPoint Fragment %199 "test_copy_input" %175 %177 %179 %181 %183 %185 %187 %189 %191 %192 %193 %194 %195 %196 %197 %198 OpEntryPoint Fragment %265 "test_return_partial" %248 %250 %252 %254 %256 %258 %260 %262 %264 +OpEntryPoint Fragment %299 "test_component_access" %275 %277 %279 %281 %283 %285 %287 %289 %291 %292 %293 %294 %295 %296 %297 %298 OpExecutionMode %54 OriginUpperLeft OpExecutionMode %136 OriginUpperLeft OpExecutionMode %199 OriginUpperLeft OpExecutionMode %265 OriginUpperLeft +OpExecutionMode %299 OriginUpperLeft %3 = OpString "f16-native.wgsl" OpSource Unknown 0 %3 "enable f16; @@ -90,7 +92,14 @@ fn test_return_partial(input_original: F16IO) -> @location(0) f16 { input.scalar_f16 = 0.0h; return input.scalar_f16; } -" + +@fragment +fn test_component_access(input: F16IO) -> F16IO { + var output: F16IO; + output.vec2_f16.x = input.vec2_f16.y; + output.vec2_f16.y = input.vec2_f16.x; + return output; +}" OpMemberName %12 0 "scalar_f16" OpMemberName %12 1 "scalar_f32" OpMemberName %12 2 "vec2_f16" @@ -165,6 +174,24 @@ OpName %260 "vec4_f16" OpName %262 "vec4_f32" OpName %265 "test_return_partial" OpName %267 "input" +OpName %275 "scalar_f16" +OpName %277 "scalar_f32" +OpName %279 "vec2_f16" +OpName %281 "vec2_f32" +OpName %283 "vec3_f16" +OpName %285 "vec3_f32" +OpName %287 "vec4_f16" +OpName %289 "vec4_f32" +OpName %291 "scalar_f16" +OpName %292 "scalar_f32" +OpName %293 "vec2_f16" +OpName %294 "vec2_f32" +OpName %295 "vec3_f16" +OpName %296 "vec3_f32" +OpName %297 "vec4_f16" +OpName %298 "vec4_f32" +OpName %299 "test_component_access" +OpName %300 "output" OpMemberDecorate %12 0 Offset 0 OpMemberDecorate %12 1 Offset 4 OpMemberDecorate %12 2 Offset 8 @@ -230,6 +257,22 @@ OpDecorate %258 Location 5 OpDecorate %260 Location 6 OpDecorate %262 Location 7 OpDecorate %264 Location 0 +OpDecorate %275 Location 0 +OpDecorate %277 Location 1 +OpDecorate %279 Location 2 +OpDecorate %281 Location 3 +OpDecorate %283 Location 4 +OpDecorate %285 Location 5 +OpDecorate %287 Location 6 +OpDecorate %289 Location 7 +OpDecorate %291 Location 0 +OpDecorate %292 Location 1 +OpDecorate %293 Location 2 +OpDecorate %294 Location 3 +OpDecorate %295 Location 4 +OpDecorate %296 Location 5 +OpDecorate %297 Location 6 +OpDecorate %298 Location 7 %2 = OpTypeVoid %4 = OpTypeFloat 16 %5 = OpTypeFloat 32 @@ -346,6 +389,23 @@ OpDecorate %264 Location 0 %264 = OpVariable %39 Output %266 = OpConstant %4 0 %268 = OpConstantNull %12 +%275 = OpVariable %15 Input +%277 = OpVariable %18 Input +%279 = OpVariable %21 Input +%281 = OpVariable %24 Input +%283 = OpVariable %27 Input +%285 = OpVariable %30 Input +%287 = OpVariable %33 Input +%289 = OpVariable %36 Input +%291 = OpVariable %39 Output +%292 = OpVariable %41 Output +%293 = OpVariable %43 Output +%294 = OpVariable %45 Output +%295 = OpVariable %47 Output +%296 = OpVariable %49 Output +%297 = OpVariable %51 Output +%298 = OpVariable %53 Output +%301 = OpConstantNull %12 %54 = OpFunction %2 None %55 %13 = OpLabel %64 = OpVariable %65 Function %66 @@ -652,4 +712,50 @@ OpLine %3 70 12 %272 = OpLoad %4 %271 OpStore %264 %272 OpReturn +OpFunctionEnd +%299 = OpFunction %2 None %55 +%273 = OpLabel +%300 = OpVariable %65 Function %301 +%276 = OpLoad %4 %275 +%278 = OpLoad %5 %277 +%280 = OpLoad %6 %279 +%282 = OpLoad %7 %281 +%284 = OpLoad %8 %283 +%286 = OpLoad %9 %285 +%288 = OpLoad %10 %287 +%290 = OpLoad %11 %289 +%274 = OpCompositeConstruct %12 %276 %278 %280 %282 %284 %286 %288 %290 +OpBranch %302 +%302 = OpLabel +OpLine %3 76 5 +%303 = OpCompositeExtract %6 %274 2 +%304 = OpCompositeExtract %4 %303 1 +OpLine %3 76 5 +%305 = OpAccessChain %68 %300 %79 %70 +OpStore %305 %304 +OpLine %3 77 5 +%306 = OpCompositeExtract %6 %274 2 +%307 = OpCompositeExtract %4 %306 0 +OpLine %3 77 5 +%308 = OpAccessChain %68 %300 %79 %75 +OpStore %308 %307 +OpLine %3 1 1 +%309 = OpLoad %12 %300 +%310 = OpCompositeExtract %4 %309 0 +OpStore %291 %310 +%311 = OpCompositeExtract %5 %309 1 +OpStore %292 %311 +%312 = OpCompositeExtract %6 %309 2 +OpStore %293 %312 +%313 = OpCompositeExtract %7 %309 3 +OpStore %294 %313 +%314 = OpCompositeExtract %8 %309 4 +OpStore %295 %314 +%315 = OpCompositeExtract %9 %309 5 +OpStore %296 %315 +%316 = OpCompositeExtract %10 %309 6 +OpStore %297 %316 +%317 = OpCompositeExtract %11 %309 7 +OpStore %298 %317 +OpReturn OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/spv/wgsl-f16-polyfill.spvasm b/naga/tests/out/spv/wgsl-f16-polyfill.spvasm index 8b6b111750f..d673816a486 100644 --- a/naga/tests/out/spv/wgsl-f16-polyfill.spvasm +++ b/naga/tests/out/spv/wgsl-f16-polyfill.spvasm @@ -1,7 +1,7 @@ ; SPIR-V ; Version: 1.1 ; Generator: rspirv -; Bound: 294 +; Bound: 347 OpCapability Shader OpCapability Float16 OpCapability StorageBuffer16BitAccess @@ -13,10 +13,12 @@ OpEntryPoint Fragment %50 "test_direct" %14 %18 %20 %24 %26 %30 %32 %36 %38 %40 OpEntryPoint Fragment %140 "test_struct" %112 %115 %117 %120 %122 %125 %127 %130 %132 %133 %134 %135 %136 %137 %138 %139 OpEntryPoint Fragment %211 "test_copy_input" %183 %186 %188 %191 %193 %196 %198 %201 %203 %204 %205 %206 %207 %208 %209 %210 OpEntryPoint Fragment %285 "test_return_partial" %264 %267 %269 %272 %274 %277 %279 %282 %284 +OpEntryPoint Fragment %324 "test_component_access" %296 %299 %301 %304 %306 %309 %311 %314 %316 %317 %318 %319 %320 %321 %322 %323 OpExecutionMode %50 OriginUpperLeft OpExecutionMode %140 OriginUpperLeft OpExecutionMode %211 OriginUpperLeft OpExecutionMode %285 OriginUpperLeft +OpExecutionMode %324 OriginUpperLeft %3 = OpString "f16-polyfill.wgsl" OpSource Unknown 0 %3 "enable f16; @@ -89,7 +91,14 @@ fn test_return_partial(input_original: F16IO) -> @location(0) f16 { input.scalar_f16 = 0.0h; return input.scalar_f16; } -" + +@fragment +fn test_component_access(input: F16IO) -> F16IO { + var output: F16IO; + output.vec2_f16.x = input.vec2_f16.y; + output.vec2_f16.y = input.vec2_f16.x; + return output; +}" OpMemberName %12 0 "scalar_f16" OpMemberName %12 1 "scalar_f32" OpMemberName %12 2 "vec2_f16" @@ -164,6 +173,24 @@ OpName %279 "vec4_f16" OpName %282 "vec4_f32" OpName %285 "test_return_partial" OpName %287 "input" +OpName %296 "scalar_f16" +OpName %299 "scalar_f32" +OpName %301 "vec2_f16" +OpName %304 "vec2_f32" +OpName %306 "vec3_f16" +OpName %309 "vec3_f32" +OpName %311 "vec4_f16" +OpName %314 "vec4_f32" +OpName %316 "scalar_f16" +OpName %317 "scalar_f32" +OpName %318 "vec2_f16" +OpName %319 "vec2_f32" +OpName %320 "vec3_f16" +OpName %321 "vec3_f32" +OpName %322 "vec4_f16" +OpName %323 "vec4_f32" +OpName %324 "test_component_access" +OpName %325 "output" OpMemberDecorate %12 0 Offset 0 OpMemberDecorate %12 1 Offset 4 OpMemberDecorate %12 2 Offset 8 @@ -229,6 +256,22 @@ OpDecorate %277 Location 5 OpDecorate %279 Location 6 OpDecorate %282 Location 7 OpDecorate %284 Location 0 +OpDecorate %296 Location 0 +OpDecorate %299 Location 1 +OpDecorate %301 Location 2 +OpDecorate %304 Location 3 +OpDecorate %306 Location 4 +OpDecorate %309 Location 5 +OpDecorate %311 Location 6 +OpDecorate %314 Location 7 +OpDecorate %316 Location 0 +OpDecorate %317 Location 1 +OpDecorate %318 Location 2 +OpDecorate %319 Location 3 +OpDecorate %320 Location 4 +OpDecorate %321 Location 5 +OpDecorate %322 Location 6 +OpDecorate %323 Location 7 %2 = OpTypeVoid %4 = OpTypeFloat 16 %5 = OpTypeFloat 32 @@ -337,6 +380,23 @@ OpDecorate %284 Location 0 %284 = OpVariable %39 Output %286 = OpConstant %4 0 %288 = OpConstantNull %12 +%296 = OpVariable %15 Input +%299 = OpVariable %15 Input +%301 = OpVariable %21 Input +%304 = OpVariable %21 Input +%306 = OpVariable %27 Input +%309 = OpVariable %27 Input +%311 = OpVariable %33 Input +%314 = OpVariable %33 Input +%316 = OpVariable %39 Output +%317 = OpVariable %39 Output +%318 = OpVariable %42 Output +%319 = OpVariable %42 Output +%320 = OpVariable %45 Output +%321 = OpVariable %45 Output +%322 = OpVariable %48 Output +%323 = OpVariable %48 Output +%326 = OpConstantNull %12 %50 = OpFunction %2 None %51 %13 = OpLabel %60 = OpVariable %61 Function %62 @@ -672,4 +732,58 @@ OpLine %3 70 12 %293 = OpFConvert %5 %292 OpStore %284 %293 OpReturn +OpFunctionEnd +%324 = OpFunction %2 None %51 +%294 = OpLabel +%325 = OpVariable %61 Function %326 +%297 = OpLoad %5 %296 +%298 = OpFConvert %4 %297 +%300 = OpLoad %5 %299 +%302 = OpLoad %7 %301 +%303 = OpFConvert %6 %302 +%305 = OpLoad %7 %304 +%307 = OpLoad %9 %306 +%308 = OpFConvert %8 %307 +%310 = OpLoad %9 %309 +%312 = OpLoad %11 %311 +%313 = OpFConvert %10 %312 +%315 = OpLoad %11 %314 +%295 = OpCompositeConstruct %12 %298 %300 %303 %305 %308 %310 %313 %315 +OpBranch %327 +%327 = OpLabel +OpLine %3 76 5 +%328 = OpCompositeExtract %6 %295 2 +%329 = OpCompositeExtract %4 %328 1 +OpLine %3 76 5 +%330 = OpAccessChain %64 %325 %75 %66 +OpStore %330 %329 +OpLine %3 77 5 +%331 = OpCompositeExtract %6 %295 2 +%332 = OpCompositeExtract %4 %331 0 +OpLine %3 77 5 +%333 = OpAccessChain %64 %325 %75 %71 +OpStore %333 %332 +OpLine %3 1 1 +%334 = OpLoad %12 %325 +%335 = OpCompositeExtract %4 %334 0 +%336 = OpFConvert %5 %335 +OpStore %316 %336 +%337 = OpCompositeExtract %5 %334 1 +OpStore %317 %337 +%338 = OpCompositeExtract %6 %334 2 +%339 = OpFConvert %7 %338 +OpStore %318 %339 +%340 = OpCompositeExtract %7 %334 3 +OpStore %319 %340 +%341 = OpCompositeExtract %8 %334 4 +%342 = OpFConvert %9 %341 +OpStore %320 %342 +%343 = OpCompositeExtract %9 %334 5 +OpStore %321 %343 +%344 = OpCompositeExtract %10 %334 6 +%345 = OpFConvert %11 %344 +OpStore %322 %345 +%346 = OpCompositeExtract %11 %334 7 +OpStore %323 %346 +OpReturn OpFunctionEnd \ No newline at end of file From 00a72d31dde191da3f4a78c04cbb97a030be8b7e Mon Sep 17 00:00:00 2001 From: Teodor Tanasoaia <28601907+teoxoy@users.noreply.github.com> Date: Thu, 21 Aug 2025 10:32:26 +0200 Subject: [PATCH 20/20] rem unneeded change --- naga/src/back/spv/block.rs | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/naga/src/back/spv/block.rs b/naga/src/back/spv/block.rs index fa6768d3c33..6f4e5d12574 100644 --- a/naga/src/back/spv/block.rs +++ b/naga/src/back/spv/block.rs @@ -2313,21 +2313,6 @@ impl BlockContext<'_> { match self.write_access_chain(pointer, block, access_type_adjustment)? { ExpressionPointer::Ready { pointer_id } => { let id = self.gen_id(); - - if self - .writer - .io_f16_polyfills - .get_f32_io_type(pointer_id) - .is_some() - { - let converted = self.writer.load_io_with_f16_polyfill( - &mut block.body, - pointer_id, - result_type_id, - ); - return Ok(converted); - } - let atomic_space = match *self.fun_info[pointer].ty.inner_with(&self.ir_module.types) { crate::TypeInner::Pointer { base, space } => {