From 1e59af159248ceaad49043d9ac573ab691ca46d3 Mon Sep 17 00:00:00 2001
From: cryvosh <cryvosh@gmail.com>
Date: Thu, 1 May 2025 21:59:32 +0100
Subject: [PATCH 01/20] Fix shaderf16 support on vulkan/nvidia

---
 wgpu-hal/src/vulkan/adapter.rs | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs
index 419f8eee503..38e3c78991f 100644
--- a/wgpu-hal/src/vulkan/adapter.rs
+++ b/wgpu-hal/src/vulkan/adapter.rs
@@ -387,12 +387,22 @@ impl PhysicalDeviceFeatures {
                 None
             },
             shader_float16: if requested_features.contains(wgt::Features::SHADER_F16) {
+                let supported_f16_features = _phd_features.shader_float16.map(|(_, s)| s)
+                    .unwrap_or_default();
+
                 Some((
                     vk::PhysicalDeviceShaderFloat16Int8Features::default().shader_float16(true),
+                    // Only enable the 16bit storage features that are actually supported by the hardware
                     vk::PhysicalDevice16BitStorageFeatures::default()
-                        .storage_buffer16_bit_access(true)
-                        .storage_input_output16(true)
-                        .uniform_and_storage_buffer16_bit_access(true),
+                        .storage_buffer16_bit_access(
+                            supported_f16_features.storage_buffer16_bit_access != 0
+                        )
+                        .uniform_and_storage_buffer16_bit_access(
+                            supported_f16_features.uniform_and_storage_buffer16_bit_access != 0
+                        )
+                        .storage_input_output16(
+                            supported_f16_features.storage_input_output16 != 0
+                        ),
                 ))
             } else {
                 None
@@ -723,7 +733,6 @@ impl PhysicalDeviceFeatures {
                 f16_i8.shader_float16 != 0
                     && bit16.storage_buffer16_bit_access != 0
                     && bit16.uniform_and_storage_buffer16_bit_access != 0
-                    && bit16.storage_input_output16 != 0,
             );
         }
 

From 3e0ee6c40fae5d0f76f1d4448cb19c598c6065e7 Mon Sep 17 00:00:00 2001
From: cryvosh <cryvosh@gmail.com>
Date: Thu, 1 May 2025 22:16:54 +0100
Subject: [PATCH 02/20] fmt

---
 wgpu-hal/src/vulkan/adapter.rs | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs
index 38e3c78991f..c4e7319c268 100644
--- a/wgpu-hal/src/vulkan/adapter.rs
+++ b/wgpu-hal/src/vulkan/adapter.rs
@@ -387,7 +387,9 @@ impl PhysicalDeviceFeatures {
                 None
             },
             shader_float16: if requested_features.contains(wgt::Features::SHADER_F16) {
-                let supported_f16_features = _phd_features.shader_float16.map(|(_, s)| s)
+                let supported_f16_features = _phd_features
+                    .shader_float16
+                    .map(|(_, s)| s)
                     .unwrap_or_default();
 
                 Some((
@@ -395,14 +397,12 @@ impl PhysicalDeviceFeatures {
                     // Only enable the 16bit storage features that are actually supported by the hardware
                     vk::PhysicalDevice16BitStorageFeatures::default()
                         .storage_buffer16_bit_access(
-                            supported_f16_features.storage_buffer16_bit_access != 0
+                            supported_f16_features.storage_buffer16_bit_access != 0,
                         )
                         .uniform_and_storage_buffer16_bit_access(
-                            supported_f16_features.uniform_and_storage_buffer16_bit_access != 0
+                            supported_f16_features.uniform_and_storage_buffer16_bit_access != 0,
                         )
-                        .storage_input_output16(
-                            supported_f16_features.storage_input_output16 != 0
-                        ),
+                        .storage_input_output16(supported_f16_features.storage_input_output16 != 0),
                 ))
             } else {
                 None
@@ -732,7 +732,7 @@ impl PhysicalDeviceFeatures {
                 F::SHADER_F16,
                 f16_i8.shader_float16 != 0
                     && bit16.storage_buffer16_bit_access != 0
-                    && bit16.uniform_and_storage_buffer16_bit_access != 0
+                    && bit16.uniform_and_storage_buffer16_bit_access != 0,
             );
         }
 

From 888c46144ad1a33ff8245669eef9abc2df9d03ef Mon Sep 17 00:00:00 2001
From: cryvosh <cryvosh@gmail.com>
Date: Sat, 5 Jul 2025 08:45:18 -0400
Subject: [PATCH 03/20] F16 polyfill

---
 Cargo.lock                                  |  24 +-
 naga/src/back/spv/block.rs                  |  30 +-
 naga/src/back/spv/mod.rs                    |  11 +
 naga/src/back/spv/polyfill.rs               | 108 ++++
 naga/src/back/spv/writer.rs                 |  92 ++-
 naga/tests/in/wgsl/f16-native.toml          |  13 +
 naga/tests/in/wgsl/f16-native.wgsl          |  71 ++
 naga/tests/in/wgsl/f16-polyfill.toml        |  13 +
 naga/tests/in/wgsl/f16-polyfill.wgsl        |  71 ++
 naga/tests/naga/snapshots.rs                |  21 +-
 naga/tests/naga/spirv_capabilities.rs       | 135 ++++
 naga/tests/out/spv/wgsl-f16-native.spvasm   | 655 +++++++++++++++++++
 naga/tests/out/spv/wgsl-f16-polyfill.spvasm | 675 ++++++++++++++++++++
 wgpu-hal/src/vulkan/adapter.rs              |  24 +-
 14 files changed, 1912 insertions(+), 31 deletions(-)
 create mode 100644 naga/src/back/spv/polyfill.rs
 create mode 100644 naga/tests/in/wgsl/f16-native.toml
 create mode 100644 naga/tests/in/wgsl/f16-native.wgsl
 create mode 100644 naga/tests/in/wgsl/f16-polyfill.toml
 create mode 100644 naga/tests/in/wgsl/f16-polyfill.wgsl
 create mode 100644 naga/tests/out/spv/wgsl-f16-native.spvasm
 create mode 100644 naga/tests/out/spv/wgsl-f16-polyfill.spvasm

diff --git a/Cargo.lock b/Cargo.lock
index 5f75584314f..ac5b8a0bb28 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -799,15 +799,15 @@ dependencies = [
 
 [[package]]
 name = "console"
-version = "0.15.11"
+version = "0.16.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "054ccb5b10f9f2cbf51eb355ca1d05c2d279ce1804688d0db74b4733a5aeafd8"
+checksum = "2e09ced7ebbccb63b4c65413d821f2e00ce54c5ca4514ddc6b3c892fdbcbc69d"
 dependencies = [
  "encode_unicode",
  "libc",
  "once_cell",
  "unicode-width",
- "windows-sys 0.59.0",
+ "windows-sys 0.60.2",
 ]
 
 [[package]]
@@ -2090,14 +2090,14 @@ dependencies = [
 
 [[package]]
 name = "indicatif"
-version = "0.17.11"
+version = "0.17.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "183b3088984b400f4cfac3620d5e076c84da5364016b4f49473de574b2586235"
+checksum = "4adb2ee6ad319a912210a36e56e3623555817bcc877a7e6e8802d1d69c4d8056"
 dependencies = [
  "console",
- "number_prefix",
  "portable-atomic",
  "unicode-width",
+ "unit-prefix",
  "web-time 1.1.0",
 ]
 
@@ -2720,12 +2720,6 @@ dependencies = [
  "syn",
 ]
 
-[[package]]
-name = "number_prefix"
-version = "0.4.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3"
-
 [[package]]
 name = "nv-flip"
 version = "0.1.2"
@@ -4424,6 +4418,12 @@ version = "0.2.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853"
 
+[[package]]
+name = "unit-prefix"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "323402cff2dd658f39ca17c789b502021b3f18707c91cdf22e3838e1b4023817"
+
 [[package]]
 name = "url"
 version = "2.5.4"
diff --git a/naga/src/back/spv/block.rs b/naga/src/back/spv/block.rs
index 7ec659e1d90..5de6e91c616 100644
--- a/naga/src/back/spv/block.rs
+++ b/naga/src/back/spv/block.rs
@@ -237,7 +237,18 @@ impl Writer {
                 }
             };
 
-            body.push(Instruction::store(res_member.id, member_value_id, None));
+            if let Some((f32_ty, _)) = self.io_f16_polyfills.get_polyfill_info(res_member.id) {
+                let converted = self.id_gen.next();
+                super::polyfill::F16IoPolyfill::emit_f16_to_f32_conversion(
+                    member_value_id,
+                    f32_ty,
+                    converted,
+                    body,
+                );
+                body.push(Instruction::store(res_member.id, converted, None));
+            } else {
+                body.push(Instruction::store(res_member.id, member_value_id, None));
+            }
 
             match res_member.built_in {
                 Some(crate::BuiltIn::Position { .. })
@@ -2313,6 +2324,23 @@ impl BlockContext<'_> {
         match self.write_access_chain(pointer, block, access_type_adjustment)? {
             ExpressionPointer::Ready { pointer_id } => {
                 let id = self.gen_id();
+
+                if let Some((f32_ty, _)) =
+                    self.writer.io_f16_polyfills.get_polyfill_info(pointer_id)
+                {
+                    block
+                        .body
+                        .push(Instruction::load(f32_ty, id, pointer_id, None));
+                    let converted = self.gen_id();
+                    super::polyfill::F16IoPolyfill::emit_f32_to_f16_conversion(
+                        id,
+                        result_type_id,
+                        converted,
+                        &mut block.body,
+                    );
+                    return Ok(converted);
+                }
+
                 let atomic_space =
                     match *self.fun_info[pointer].ty.inner_with(&self.ir_module.types) {
                         crate::TypeInner::Pointer { base, space } => {
diff --git a/naga/src/back/spv/mod.rs b/naga/src/back/spv/mod.rs
index 2dcd95957d7..5a37044fe47 100644
--- a/naga/src/back/spv/mod.rs
+++ b/naga/src/back/spv/mod.rs
@@ -10,6 +10,7 @@ mod image;
 mod index;
 mod instructions;
 mod layout;
+mod polyfill;
 mod ray;
 mod recyclable;
 mod selection;
@@ -744,6 +745,7 @@ pub struct Writer {
     bounds_check_policies: BoundsCheckPolicies,
     zero_initialize_workgroup_memory: ZeroInitializeWorkgroupMemoryMode,
     force_loop_bounding: bool,
+    use_storage_input_output_16: bool,
     void_type: Word,
     //TODO: convert most of these into vectors, addressable by handle indices
     lookup_type: crate::FastHashMap<LookupType, Word>,
@@ -770,6 +772,10 @@ pub struct Writer {
 
     ray_get_committed_intersection_function: Option<Word>,
     ray_get_candidate_intersection_function: Option<Word>,
+
+    /// F16 I/O polyfill manager for handling f16 input/output variables
+    /// when StorageInputOutput16 capability is not available.
+    io_f16_polyfills: polyfill::F16IoPolyfill,
 }
 
 bitflags::bitflags! {
@@ -852,6 +858,10 @@ pub struct Options<'a> {
     /// to think the number of iterations is bounded.
     pub force_loop_bounding: bool,
 
+    /// Whether to use the StorageInputOutput16 capability for f16 shader I/O.
+    /// When false, f16 I/O is polyfilled using f32 types with conversions.
+    pub use_storage_input_output_16: bool,
+
     pub debug_info: Option<DebugInfo<'a>>,
 }
 
@@ -871,6 +881,7 @@ impl Default for Options<'_> {
             bounds_check_policies: BoundsCheckPolicies::default(),
             zero_initialize_workgroup_memory: ZeroInitializeWorkgroupMemoryMode::Polyfill,
             force_loop_bounding: true,
+            use_storage_input_output_16: true,
             debug_info: None,
         }
     }
diff --git a/naga/src/back/spv/polyfill.rs b/naga/src/back/spv/polyfill.rs
new file mode 100644
index 00000000000..9c50444620d
--- /dev/null
+++ b/naga/src/back/spv/polyfill.rs
@@ -0,0 +1,108 @@
+/*!
+This module provides functionality polyfills f16 input/output variables
+when the StorageInputOutput16 capability is not available or disabled.
+
+It works by:
+1. Declaring f16 I/O variables as f32 in SPIR-V
+2. Converting between f16 and f32 at runtime using OpFConvert
+3. Maintaining mappings to track which variables need conversion
+*/
+
+use crate::back::spv::{Instruction, LocalType, NumericType, Word};
+use std::vec::Vec;
+
+/// Manages f16 I/O polyfill state and operations.
+#[derive(Default)]
+pub(super) struct F16IoPolyfill {
+    use_native: bool,
+    variable_map: crate::FastHashMap<Word, (Word, Word)>,
+}
+
+impl F16IoPolyfill {
+    pub fn new(use_storage_input_output_16: bool) -> Self {
+        Self {
+            use_native: use_storage_input_output_16,
+            variable_map: crate::FastHashMap::default(),
+        }
+    }
+
+    pub fn needs_polyfill(&self, ty_inner: &crate::TypeInner) -> bool {
+        use crate::{ScalarKind as Sk, TypeInner};
+
+        !self.use_native
+            && match *ty_inner {
+                TypeInner::Scalar(ref s) if s.kind == Sk::Float && s.width == 2 => true,
+                TypeInner::Vector { scalar, .. }
+                    if scalar.kind == Sk::Float && scalar.width == 2 =>
+                {
+                    true
+                }
+                _ => false,
+            }
+    }
+
+    pub fn register_variable(&mut self, variable_id: Word, f32_type_id: Word, f16_type_id: Word) {
+        self.variable_map
+            .insert(variable_id, (f32_type_id, f16_type_id));
+    }
+
+    pub fn get_polyfill_info(&self, variable_id: Word) -> Option<(Word, Word)> {
+        self.variable_map.get(&variable_id).copied()
+    }
+
+    pub fn emit_f16_to_f32_conversion(
+        f16_value_id: Word,
+        f32_type_id: Word,
+        converted_id: Word,
+        body: &mut Vec<Instruction>,
+    ) {
+        body.push(Instruction::unary(
+            spirv::Op::FConvert,
+            f32_type_id,
+            converted_id,
+            f16_value_id,
+        ));
+    }
+
+    pub fn emit_f32_to_f16_conversion(
+        f32_value_id: Word,
+        f16_type_id: Word,
+        converted_id: Word,
+        body: &mut Vec<Instruction>,
+    ) {
+        body.push(Instruction::unary(
+            spirv::Op::FConvert,
+            f16_type_id,
+            converted_id,
+            f32_value_id,
+        ));
+    }
+
+    pub fn create_polyfill_type(ty_inner: &crate::TypeInner) -> Option<LocalType> {
+        use crate::{ScalarKind as Sk, TypeInner};
+
+        match *ty_inner {
+            TypeInner::Scalar(ref s) if s.kind == Sk::Float && s.width == 2 => {
+                Some(LocalType::Numeric(NumericType::Scalar(crate::Scalar::F32)))
+            }
+            TypeInner::Vector { size, scalar } if scalar.kind == Sk::Float && scalar.width == 2 => {
+                Some(LocalType::Numeric(NumericType::Vector {
+                    size,
+                    scalar: crate::Scalar::F32,
+                }))
+            }
+            _ => None,
+        }
+    }
+
+    pub fn clear(&mut self) {
+        self.variable_map.clear();
+    }
+}
+
+impl crate::back::spv::recyclable::Recyclable for F16IoPolyfill {
+    fn recycle(mut self) -> Self {
+        self.variable_map = self.variable_map.recycle();
+        self
+    }
+}
diff --git a/naga/src/back/spv/writer.rs b/naga/src/back/spv/writer.rs
index b61747c8326..b9c530fbf8f 100644
--- a/naga/src/back/spv/writer.rs
+++ b/naga/src/back/spv/writer.rs
@@ -78,6 +78,7 @@ impl Writer {
             bounds_check_policies: options.bounds_check_policies,
             zero_initialize_workgroup_memory: options.zero_initialize_workgroup_memory,
             force_loop_bounding: options.force_loop_bounding,
+            use_storage_input_output_16: options.use_storage_input_output_16,
             void_type,
             lookup_type: crate::FastHashMap::default(),
             lookup_function: crate::FastHashMap::default(),
@@ -92,6 +93,9 @@ impl Writer {
             temp_list: Vec::new(),
             ray_get_committed_intersection_function: None,
             ray_get_candidate_intersection_function: None,
+            io_f16_polyfills: super::polyfill::F16IoPolyfill::new(
+                options.use_storage_input_output_16,
+            ),
         })
     }
 
@@ -125,6 +129,7 @@ impl Writer {
             bounds_check_policies: self.bounds_check_policies,
             zero_initialize_workgroup_memory: self.zero_initialize_workgroup_memory,
             force_loop_bounding: self.force_loop_bounding,
+            use_storage_input_output_16: self.use_storage_input_output_16,
             capabilities_available: take(&mut self.capabilities_available),
             binding_map: take(&mut self.binding_map),
 
@@ -151,6 +156,7 @@ impl Writer {
             temp_list: take(&mut self.temp_list).recycle(),
             ray_get_candidate_intersection_function: None,
             ray_get_committed_intersection_function: None,
+            io_f16_polyfills: take(&mut self.io_f16_polyfills).recycle(),
         };
 
         *self = fresh;
@@ -726,10 +732,28 @@ impl Writer {
                         binding,
                     )?;
                     iface.varying_ids.push(varying_id);
-                    let id = self.id_gen.next();
-                    prelude
-                        .body
-                        .push(Instruction::load(argument_type_id, id, varying_id, None));
+                    let mut id = self.id_gen.next();
+
+                    if let Some((f32_ty, _)) = self.io_f16_polyfills.get_polyfill_info(varying_id) {
+                        prelude
+                            .body
+                            .push(Instruction::load(f32_ty, id, varying_id, None));
+                        let converted = self.id_gen.next();
+                        super::polyfill::F16IoPolyfill::emit_f32_to_f16_conversion(
+                            id,
+                            argument_type_id,
+                            converted,
+                            &mut prelude.body,
+                        );
+                        id = converted;
+                    } else {
+                        prelude.body.push(Instruction::load(
+                            argument_type_id,
+                            id,
+                            varying_id,
+                            None,
+                        ));
+                    }
 
                     if binding == &crate::Binding::BuiltIn(crate::BuiltIn::LocalInvocationId) {
                         local_invocation_id = Some(id);
@@ -755,10 +779,26 @@ impl Writer {
                         )?;
                         iface.varying_ids.push(varying_id);
                         let id = self.id_gen.next();
-                        prelude
-                            .body
-                            .push(Instruction::load(type_id, id, varying_id, None));
-                        constituent_ids.push(id);
+                        if let Some((f32_ty, _)) =
+                            self.io_f16_polyfills.get_polyfill_info(varying_id)
+                        {
+                            prelude
+                                .body
+                                .push(Instruction::load(f32_ty, id, varying_id, None));
+                            let converted = self.id_gen.next();
+                            super::polyfill::F16IoPolyfill::emit_f32_to_f16_conversion(
+                                id,
+                                type_id,
+                                converted,
+                                &mut prelude.body,
+                            );
+                            constituent_ids.push(converted);
+                        } else {
+                            prelude
+                                .body
+                                .push(Instruction::load(type_id, id, varying_id, None));
+                            constituent_ids.push(id);
+                        }
 
                         if binding == &crate::Binding::BuiltIn(crate::BuiltIn::GlobalInvocationId) {
                             local_invocation_id = Some(id);
@@ -1220,8 +1260,10 @@ impl Writer {
                         .insert(spirv::Capability::StorageBuffer16BitAccess);
                     self.capabilities_used
                         .insert(spirv::Capability::UniformAndStorageBuffer16BitAccess);
-                    self.capabilities_used
-                        .insert(spirv::Capability::StorageInputOutput16);
+                    if self.use_storage_input_output_16 {
+                        self.capabilities_used
+                            .insert(spirv::Capability::StorageInputOutput16);
+                    }
                 }
                 Instruction::type_float(id, bits)
             }
@@ -1904,8 +1946,27 @@ impl Writer {
         ty: Handle<crate::Type>,
         binding: &crate::Binding,
     ) -> Result<Word, Error> {
+        use crate::TypeInner;
+
         let id = self.id_gen.next();
-        let pointer_type_id = self.get_handle_pointer_type_id(ty, class);
+        let ty_inner = &ir_module.types[ty].inner;
+        let needs_polyfill = self.needs_f16_polyfill(ty_inner);
+
+        let pointer_type_id = if needs_polyfill {
+            let f32_value_local = super::polyfill::F16IoPolyfill::create_polyfill_type(ty_inner)
+                .expect("needs_polyfill returned true but create_polyfill_type returned None");
+
+            let f32_type_id = self.get_localtype_id(f32_value_local);
+            let ptr_id = self.get_pointer_type_id(f32_type_id, class);
+            let f16_type_id = self.get_handle_type_id(ty);
+            self.io_f16_polyfills
+                .register_variable(id, f32_type_id, f16_type_id);
+
+            ptr_id
+        } else {
+            self.get_handle_pointer_type_id(ty, class)
+        };
+
         Instruction::variable(pointer_type_id, id, class, None)
             .to_words(&mut self.logical_layout.declarations);
 
@@ -2088,8 +2149,9 @@ impl Writer {
                 // > shader, must be decorated Flat
                 if class == spirv::StorageClass::Input && stage == crate::ShaderStage::Fragment {
                     let is_flat = match ir_module.types[ty].inner {
-                        crate::TypeInner::Scalar(scalar)
-                        | crate::TypeInner::Vector { scalar, .. } => match scalar.kind {
+                        TypeInner::Scalar(scalar) | TypeInner::Vector { scalar, .. } => match scalar
+                            .kind
+                        {
                             Sk::Uint | Sk::Sint | Sk::Bool => true,
                             Sk::Float => false,
                             Sk::AbstractInt | Sk::AbstractFloat => {
@@ -2584,6 +2646,10 @@ impl Writer {
         self.decorate(id, spirv::Decoration::NonUniform, &[]);
         Ok(())
     }
+
+    pub(super) fn needs_f16_polyfill(&self, ty_inner: &crate::TypeInner) -> bool {
+        self.io_f16_polyfills.needs_polyfill(ty_inner)
+    }
 }
 
 #[test]
diff --git a/naga/tests/in/wgsl/f16-native.toml b/naga/tests/in/wgsl/f16-native.toml
new file mode 100644
index 00000000000..529d34f80da
--- /dev/null
+++ b/naga/tests/in/wgsl/f16-native.toml
@@ -0,0 +1,13 @@
+targets = "SPIRV"
+god_mode = true
+
+[spv]
+debug = true
+version = [1, 1]
+use_storage_input_output_16 = true
+capabilities = ["Float16"]
+
+[bounds_check_policies]
+index = "ReadZeroSkipWrite"
+buffer = "ReadZeroSkipWrite"
+image = "ReadZeroSkipWrite"
diff --git a/naga/tests/in/wgsl/f16-native.wgsl b/naga/tests/in/wgsl/f16-native.wgsl
new file mode 100644
index 00000000000..2dea0baaa29
--- /dev/null
+++ b/naga/tests/in/wgsl/f16-native.wgsl
@@ -0,0 +1,71 @@
+enable f16;
+
+@fragment
+fn test_direct(
+    @location(0) scalar_f16: f16,
+    @location(1) scalar_f32: f32,
+    @location(2) vec2_f16: vec2<f16>,
+    @location(3) vec2_f32: vec2<f32>,
+    @location(4) vec3_f16: vec3<f16>,
+    @location(5) vec3_f32: vec3<f32>,
+    @location(6) vec4_f16: vec4<f16>,
+    @location(7) vec4_f32: vec4<f32>,
+) -> F16IO {
+    var output: F16IO;
+    output.scalar_f16 = scalar_f16 + 1.0h;
+    output.scalar_f32 = scalar_f32 + 1.0;
+    output.vec2_f16 = vec2_f16 + vec2(1.0h);
+    output.vec2_f32 = vec2_f32 + vec2(1.0);
+    output.vec3_f16 = vec3_f16 + vec3(1.0h);
+    output.vec3_f32 = vec3_f32 + vec3(1.0);
+    output.vec4_f16 = vec4_f16 + vec4(1.0h);
+    output.vec4_f32 = vec4_f32 + vec4(1.0);
+    return output;
+}
+
+struct F16IO {
+    @location(0) scalar_f16: f16,
+    @location(1) scalar_f32: f32,
+    @location(2) vec2_f16: vec2<f16>,
+    @location(3) vec2_f32: vec2<f32>,
+    @location(4) vec3_f16: vec3<f16>,
+    @location(5) vec3_f32: vec3<f32>,
+    @location(6) vec4_f16: vec4<f16>,
+    @location(7) vec4_f32: vec4<f32>,
+}
+
+@fragment
+fn test_struct(input: F16IO) -> F16IO {
+    var output: F16IO;
+    output.scalar_f16 = input.scalar_f16 + 1.0h;
+    output.scalar_f32 = input.scalar_f32 + 1.0;
+    output.vec2_f16 = input.vec2_f16 + vec2(1.0h);
+    output.vec2_f32 = input.vec2_f32 + vec2(1.0);
+    output.vec3_f16 = input.vec3_f16 + vec3(1.0h);
+    output.vec3_f32 = input.vec3_f32 + vec3(1.0);
+    output.vec4_f16 = input.vec4_f16 + vec4(1.0h);
+    output.vec4_f32 = input.vec4_f32 + vec4(1.0);
+    return output;
+}
+
+@fragment
+fn test_copy_input(input_original: F16IO) -> F16IO {
+    var input = input_original;
+    var output: F16IO;
+    output.scalar_f16 = input.scalar_f16 + 1.0h;
+    output.scalar_f32 = input.scalar_f32 + 1.0;
+    output.vec2_f16 = input.vec2_f16 + vec2(1.0h);
+    output.vec2_f32 = input.vec2_f32 + vec2(1.0);
+    output.vec3_f16 = input.vec3_f16 + vec3(1.0h);
+    output.vec3_f32 = input.vec3_f32 + vec3(1.0);
+    output.vec4_f16 = input.vec4_f16 + vec4(1.0h);
+    output.vec4_f32 = input.vec4_f32 + vec4(1.0);
+    return output;
+}
+
+@fragment
+fn test_return_partial(input_original: F16IO) -> @location(0) f16 {
+    var input = input_original;
+    input.scalar_f16 = 0.0h;
+    return input.scalar_f16;
+}
diff --git a/naga/tests/in/wgsl/f16-polyfill.toml b/naga/tests/in/wgsl/f16-polyfill.toml
new file mode 100644
index 00000000000..96160063e05
--- /dev/null
+++ b/naga/tests/in/wgsl/f16-polyfill.toml
@@ -0,0 +1,13 @@
+targets = "SPIRV"
+god_mode = true
+
+[spv]
+debug = true
+version = [1, 1]
+use_storage_input_output_16 = false
+capabilities = ["Float16"]
+
+[bounds_check_policies]
+index = "ReadZeroSkipWrite"
+buffer = "ReadZeroSkipWrite"
+image = "ReadZeroSkipWrite"
diff --git a/naga/tests/in/wgsl/f16-polyfill.wgsl b/naga/tests/in/wgsl/f16-polyfill.wgsl
new file mode 100644
index 00000000000..2dea0baaa29
--- /dev/null
+++ b/naga/tests/in/wgsl/f16-polyfill.wgsl
@@ -0,0 +1,71 @@
+enable f16;
+
+@fragment
+fn test_direct(
+    @location(0) scalar_f16: f16,
+    @location(1) scalar_f32: f32,
+    @location(2) vec2_f16: vec2<f16>,
+    @location(3) vec2_f32: vec2<f32>,
+    @location(4) vec3_f16: vec3<f16>,
+    @location(5) vec3_f32: vec3<f32>,
+    @location(6) vec4_f16: vec4<f16>,
+    @location(7) vec4_f32: vec4<f32>,
+) -> F16IO {
+    var output: F16IO;
+    output.scalar_f16 = scalar_f16 + 1.0h;
+    output.scalar_f32 = scalar_f32 + 1.0;
+    output.vec2_f16 = vec2_f16 + vec2(1.0h);
+    output.vec2_f32 = vec2_f32 + vec2(1.0);
+    output.vec3_f16 = vec3_f16 + vec3(1.0h);
+    output.vec3_f32 = vec3_f32 + vec3(1.0);
+    output.vec4_f16 = vec4_f16 + vec4(1.0h);
+    output.vec4_f32 = vec4_f32 + vec4(1.0);
+    return output;
+}
+
+struct F16IO {
+    @location(0) scalar_f16: f16,
+    @location(1) scalar_f32: f32,
+    @location(2) vec2_f16: vec2<f16>,
+    @location(3) vec2_f32: vec2<f32>,
+    @location(4) vec3_f16: vec3<f16>,
+    @location(5) vec3_f32: vec3<f32>,
+    @location(6) vec4_f16: vec4<f16>,
+    @location(7) vec4_f32: vec4<f32>,
+}
+
+@fragment
+fn test_struct(input: F16IO) -> F16IO {
+    var output: F16IO;
+    output.scalar_f16 = input.scalar_f16 + 1.0h;
+    output.scalar_f32 = input.scalar_f32 + 1.0;
+    output.vec2_f16 = input.vec2_f16 + vec2(1.0h);
+    output.vec2_f32 = input.vec2_f32 + vec2(1.0);
+    output.vec3_f16 = input.vec3_f16 + vec3(1.0h);
+    output.vec3_f32 = input.vec3_f32 + vec3(1.0);
+    output.vec4_f16 = input.vec4_f16 + vec4(1.0h);
+    output.vec4_f32 = input.vec4_f32 + vec4(1.0);
+    return output;
+}
+
+@fragment
+fn test_copy_input(input_original: F16IO) -> F16IO {
+    var input = input_original;
+    var output: F16IO;
+    output.scalar_f16 = input.scalar_f16 + 1.0h;
+    output.scalar_f32 = input.scalar_f32 + 1.0;
+    output.vec2_f16 = input.vec2_f16 + vec2(1.0h);
+    output.vec2_f32 = input.vec2_f32 + vec2(1.0);
+    output.vec3_f16 = input.vec3_f16 + vec3(1.0h);
+    output.vec3_f32 = input.vec3_f32 + vec3(1.0);
+    output.vec4_f16 = input.vec4_f16 + vec4(1.0h);
+    output.vec4_f32 = input.vec4_f32 + vec4(1.0);
+    return output;
+}
+
+@fragment
+fn test_return_partial(input_original: F16IO) -> @location(0) f16 {
+    var input = input_original;
+    input.scalar_f16 = 0.0h;
+    return input.scalar_f16;
+}
diff --git a/naga/tests/naga/snapshots.rs b/naga/tests/naga/snapshots.rs
index 32e2f5e0285..b00ef579203 100644
--- a/naga/tests/naga/snapshots.rs
+++ b/naga/tests/naga/snapshots.rs
@@ -91,7 +91,7 @@ struct SpirvInParameters {
     adjust_coordinate_space: bool,
 }
 
-#[derive(Default, serde::Deserialize)]
+#[derive(serde::Deserialize)]
 #[serde(default)]
 struct SpirvOutParameters {
     version: SpvOutVersion,
@@ -101,11 +101,29 @@ struct SpirvOutParameters {
     force_point_size: bool,
     clamp_frag_depth: bool,
     separate_entry_points: bool,
+    use_storage_input_output_16: bool,
     #[cfg(all(feature = "deserialize", spv_out))]
     #[serde(deserialize_with = "deserialize_binding_map")]
     binding_map: naga::back::spv::BindingMap,
 }
 
+impl Default for SpirvOutParameters {
+    fn default() -> Self {
+        Self {
+            version: SpvOutVersion::default(),
+            capabilities: naga::FastHashSet::default(),
+            debug: false,
+            adjust_coordinate_space: false,
+            force_point_size: false,
+            clamp_frag_depth: false,
+            separate_entry_points: false,
+            use_storage_input_output_16: true,
+            #[cfg(all(feature = "deserialize", spv_out))]
+            binding_map: naga::back::spv::BindingMap::default(),
+        }
+    }
+}
+
 #[derive(Default, serde::Deserialize)]
 #[serde(default)]
 struct WgslOutParameters {
@@ -617,6 +635,7 @@ fn write_output_spv(
         binding_map: params.binding_map.clone(),
         zero_initialize_workgroup_memory: spv::ZeroInitializeWorkgroupMemoryMode::Polyfill,
         force_loop_bounding: true,
+        use_storage_input_output_16: params.use_storage_input_output_16,
         debug_info,
     };
 
diff --git a/naga/tests/naga/spirv_capabilities.rs b/naga/tests/naga/spirv_capabilities.rs
index 2d46e37f72d..aa99298273d 100644
--- a/naga/tests/naga/spirv_capabilities.rs
+++ b/naga/tests/naga/spirv_capabilities.rs
@@ -6,6 +6,9 @@ Test SPIR-V backend capability checks.
 
 use spirv::Capability as Ca;
 
+#[cfg(spv_out)]
+use rspirv::binary::Disassemble;
+
 fn capabilities_used(source: &str) -> naga::FastIndexSet<Ca> {
     use naga::back::spv;
     use naga::valid;
@@ -213,3 +216,135 @@ fn int64() {
 fn float16() {
     require(&[Ca::Float16], "enable f16; fn f(x: f16) { }");
 }
+
+#[test]
+fn f16_io_capabilities() {
+    let source = r#"
+        enable f16;
+        
+        struct VertexOutput {
+            @location(0) color: vec3<f16>,
+        }
+        
+        @fragment  
+        fn main(input: VertexOutput) -> @location(0) vec4<f16> {
+            return vec4<f16>(input.color, f16(1.0));
+        }
+    "#;
+
+    use naga::back::spv;
+    use naga::valid;
+
+    let module = naga::front::wgsl::parse_str(source).unwrap();
+    let info = valid::Validator::new(valid::ValidationFlags::all(), valid::Capabilities::all())
+        .validate(&module)
+        .unwrap();
+
+    // Test native path: use_storage_input_output_16 = true
+    let options_native = spv::Options {
+        use_storage_input_output_16: true,
+        ..Default::default()
+    };
+
+    let mut words_native = vec![];
+    let mut writer_native = spv::Writer::new(&options_native).unwrap();
+    writer_native
+        .write(&module, &info, None, &None, &mut words_native)
+        .unwrap();
+    let caps_native = writer_native.get_capabilities_used();
+
+    // Should include StorageInputOutput16 for native f16 I/O
+    assert!(caps_native.contains(&Ca::StorageInputOutput16));
+
+    // Test polyfill path: use_storage_input_output_16 = false
+    let options_polyfill = spv::Options {
+        use_storage_input_output_16: false,
+        ..Default::default()
+    };
+
+    let mut words_polyfill = vec![];
+    let mut writer_polyfill = spv::Writer::new(&options_polyfill).unwrap();
+    writer_polyfill
+        .write(&module, &info, None, &None, &mut words_polyfill)
+        .unwrap();
+    let caps_polyfill = writer_polyfill.get_capabilities_used();
+
+    // Should not include StorageInputOutput16 when polyfilled
+    assert!(!caps_polyfill.contains(&Ca::StorageInputOutput16));
+
+    // But should still include the basic f16 capabilities
+    assert!(caps_polyfill.contains(&Ca::Float16));
+}
+
+#[cfg(spv_out)]
+#[test]
+fn f16_io_polyfill_codegen() {
+    let source = r#"
+        enable f16;
+
+        struct F16IO {
+            @location(0) scalar_f16: f16,
+            @location(1) scalar_f32: f32,
+            @location(2) vec2_f16: vec2<f16>,
+            @location(3) vec2_f32: vec2<f32>,
+        }
+
+        @fragment
+        fn main(input: F16IO) -> F16IO {
+            var output = input;
+            output.scalar_f16 = input.scalar_f16 + 1.0h;
+            output.vec2_f16.x = input.vec2_f16.y;
+            return output;
+        }
+    "#;
+
+    use naga::{back::spv, valid};
+
+    let module = naga::front::wgsl::parse_str(source).unwrap();
+    let info = valid::Validator::new(valid::ValidationFlags::all(), valid::Capabilities::all())
+        .validate(&module)
+        .unwrap();
+
+    // Test Native Path
+    let options_native = spv::Options {
+        use_storage_input_output_16: true,
+        ..Default::default()
+    };
+    let mut words_native = vec![];
+    let mut writer_native = spv::Writer::new(&options_native).unwrap();
+    writer_native
+        .write(&module, &info, None, &None, &mut words_native)
+        .unwrap();
+    let caps_native = writer_native.get_capabilities_used();
+    let dis_native = rspirv::dr::load_words(words_native).unwrap().disassemble();
+
+    // Native path must request the capability and must NOT have conversions.
+    assert!(caps_native.contains(&Ca::StorageInputOutput16));
+    assert!(!dis_native.contains("OpFConvert"));
+
+    // Test Polyfill Path
+    let options_polyfill = spv::Options {
+        use_storage_input_output_16: false,
+        ..Default::default()
+    };
+    let mut words_polyfill = vec![];
+    let mut writer_polyfill = spv::Writer::new(&options_polyfill).unwrap();
+    writer_polyfill
+        .write(&module, &info, None, &None, &mut words_polyfill)
+        .unwrap();
+    let caps_polyfill = writer_polyfill.get_capabilities_used();
+    let dis_polyfill = rspirv::dr::load_words(words_polyfill)
+        .unwrap()
+        .disassemble();
+
+    // Polyfill path should request the capability but not have conversions.
+    assert!(!caps_polyfill.contains(&Ca::StorageInputOutput16));
+    assert!(dis_polyfill.contains("OpFConvert"));
+
+    // Should have 2 input conversions, and 2 output conversions
+    let fconvert_count = dis_polyfill.matches("OpFConvert").count();
+    assert_eq!(
+        fconvert_count, 4,
+        "Expected 4 OpFConvert instructions for polyfilled I/O"
+    );
+}
diff --git a/naga/tests/out/spv/wgsl-f16-native.spvasm b/naga/tests/out/spv/wgsl-f16-native.spvasm
new file mode 100644
index 00000000000..096b92bf03c
--- /dev/null
+++ b/naga/tests/out/spv/wgsl-f16-native.spvasm
@@ -0,0 +1,655 @@
+; SPIR-V
+; Version: 1.1
+; Generator: rspirv
+; Bound: 273
+OpCapability Shader
+OpCapability Float16
+OpCapability StorageBuffer16BitAccess
+OpCapability UniformAndStorageBuffer16BitAccess
+OpCapability StorageInputOutput16
+OpExtension "SPV_KHR_16bit_storage"
+%1 = OpExtInstImport "GLSL.std.450"
+OpMemoryModel Logical GLSL450
+OpEntryPoint Fragment %54 "test_direct" %14 %17 %20 %23 %26 %29 %32 %35 %38 %40 %42 %44 %46 %48 %50 %52
+OpEntryPoint Fragment %136 "test_struct" %112 %114 %116 %118 %120 %122 %124 %126 %128 %129 %130 %131 %132 %133 %134 %135
+OpEntryPoint Fragment %199 "test_copy_input" %175 %177 %179 %181 %183 %185 %187 %189 %191 %192 %193 %194 %195 %196 %197 %198
+OpEntryPoint Fragment %265 "test_return_partial" %248 %250 %252 %254 %256 %258 %260 %262 %264
+OpExecutionMode %54 OriginUpperLeft
+OpExecutionMode %136 OriginUpperLeft
+OpExecutionMode %199 OriginUpperLeft
+OpExecutionMode %265 OriginUpperLeft
+%3 = OpString "f16-native.wgsl"
+OpSource Unknown 0 %3 "enable f16;
+
+@fragment
+fn test_direct(
+    @location(0) scalar_f16: f16,
+    @location(1) scalar_f32: f32,
+    @location(2) vec2_f16: vec2<f16>,
+    @location(3) vec2_f32: vec2<f32>,
+    @location(4) vec3_f16: vec3<f16>,
+    @location(5) vec3_f32: vec3<f32>,
+    @location(6) vec4_f16: vec4<f16>,
+    @location(7) vec4_f32: vec4<f32>,
+) -> F16IO {
+    var output: F16IO;
+    output.scalar_f16 = scalar_f16 + 1.0h;
+    output.scalar_f32 = scalar_f32 + 1.0;
+    output.vec2_f16 = vec2_f16 + vec2(1.0h);
+    output.vec2_f32 = vec2_f32 + vec2(1.0);
+    output.vec3_f16 = vec3_f16 + vec3(1.0h);
+    output.vec3_f32 = vec3_f32 + vec3(1.0);
+    output.vec4_f16 = vec4_f16 + vec4(1.0h);
+    output.vec4_f32 = vec4_f32 + vec4(1.0);
+    return output;
+}
+
+struct F16IO {
+    @location(0) scalar_f16: f16,
+    @location(1) scalar_f32: f32,
+    @location(2) vec2_f16: vec2<f16>,
+    @location(3) vec2_f32: vec2<f32>,
+    @location(4) vec3_f16: vec3<f16>,
+    @location(5) vec3_f32: vec3<f32>,
+    @location(6) vec4_f16: vec4<f16>,
+    @location(7) vec4_f32: vec4<f32>,
+}
+
+@fragment
+fn test_struct(input: F16IO) -> F16IO {
+    var output: F16IO;
+    output.scalar_f16 = input.scalar_f16 + 1.0h;
+    output.scalar_f32 = input.scalar_f32 + 1.0;
+    output.vec2_f16 = input.vec2_f16 + vec2(1.0h);
+    output.vec2_f32 = input.vec2_f32 + vec2(1.0);
+    output.vec3_f16 = input.vec3_f16 + vec3(1.0h);
+    output.vec3_f32 = input.vec3_f32 + vec3(1.0);
+    output.vec4_f16 = input.vec4_f16 + vec4(1.0h);
+    output.vec4_f32 = input.vec4_f32 + vec4(1.0);
+    return output;
+}
+
+@fragment
+fn test_copy_input(input_original: F16IO) -> F16IO {
+    var input = input_original;
+    var output: F16IO;
+    output.scalar_f16 = input.scalar_f16 + 1.0h;
+    output.scalar_f32 = input.scalar_f32 + 1.0;
+    output.vec2_f16 = input.vec2_f16 + vec2(1.0h);
+    output.vec2_f32 = input.vec2_f32 + vec2(1.0);
+    output.vec3_f16 = input.vec3_f16 + vec3(1.0h);
+    output.vec3_f32 = input.vec3_f32 + vec3(1.0);
+    output.vec4_f16 = input.vec4_f16 + vec4(1.0h);
+    output.vec4_f32 = input.vec4_f32 + vec4(1.0);
+    return output;
+}
+
+@fragment
+fn test_return_partial(input_original: F16IO) -> @location(0) f16 {
+    var input = input_original;
+    input.scalar_f16 = 0.0h;
+    return input.scalar_f16;
+}
+"
+OpMemberName %12 0 "scalar_f16"
+OpMemberName %12 1 "scalar_f32"
+OpMemberName %12 2 "vec2_f16"
+OpMemberName %12 3 "vec2_f32"
+OpMemberName %12 4 "vec3_f16"
+OpMemberName %12 5 "vec3_f32"
+OpMemberName %12 6 "vec4_f16"
+OpMemberName %12 7 "vec4_f32"
+OpName %12 "F16IO"
+OpName %14 "scalar_f16"
+OpName %17 "scalar_f32"
+OpName %20 "vec2_f16"
+OpName %23 "vec2_f32"
+OpName %26 "vec3_f16"
+OpName %29 "vec3_f32"
+OpName %32 "vec4_f16"
+OpName %35 "vec4_f32"
+OpName %38 "scalar_f16"
+OpName %40 "scalar_f32"
+OpName %42 "vec2_f16"
+OpName %44 "vec2_f32"
+OpName %46 "vec3_f16"
+OpName %48 "vec3_f32"
+OpName %50 "vec4_f16"
+OpName %52 "vec4_f32"
+OpName %54 "test_direct"
+OpName %64 "output"
+OpName %112 "scalar_f16"
+OpName %114 "scalar_f32"
+OpName %116 "vec2_f16"
+OpName %118 "vec2_f32"
+OpName %120 "vec3_f16"
+OpName %122 "vec3_f32"
+OpName %124 "vec4_f16"
+OpName %126 "vec4_f32"
+OpName %128 "scalar_f16"
+OpName %129 "scalar_f32"
+OpName %130 "vec2_f16"
+OpName %131 "vec2_f32"
+OpName %132 "vec3_f16"
+OpName %133 "vec3_f32"
+OpName %134 "vec4_f16"
+OpName %135 "vec4_f32"
+OpName %136 "test_struct"
+OpName %137 "output"
+OpName %175 "scalar_f16"
+OpName %177 "scalar_f32"
+OpName %179 "vec2_f16"
+OpName %181 "vec2_f32"
+OpName %183 "vec3_f16"
+OpName %185 "vec3_f32"
+OpName %187 "vec4_f16"
+OpName %189 "vec4_f32"
+OpName %191 "scalar_f16"
+OpName %192 "scalar_f32"
+OpName %193 "vec2_f16"
+OpName %194 "vec2_f32"
+OpName %195 "vec3_f16"
+OpName %196 "vec3_f32"
+OpName %197 "vec4_f16"
+OpName %198 "vec4_f32"
+OpName %199 "test_copy_input"
+OpName %200 "input"
+OpName %202 "output"
+OpName %248 "scalar_f16"
+OpName %250 "scalar_f32"
+OpName %252 "vec2_f16"
+OpName %254 "vec2_f32"
+OpName %256 "vec3_f16"
+OpName %258 "vec3_f32"
+OpName %260 "vec4_f16"
+OpName %262 "vec4_f32"
+OpName %265 "test_return_partial"
+OpName %267 "input"
+OpMemberDecorate %12 0 Offset 0
+OpMemberDecorate %12 1 Offset 4
+OpMemberDecorate %12 2 Offset 8
+OpMemberDecorate %12 3 Offset 16
+OpMemberDecorate %12 4 Offset 24
+OpMemberDecorate %12 5 Offset 32
+OpMemberDecorate %12 6 Offset 48
+OpMemberDecorate %12 7 Offset 64
+OpDecorate %14 Location 0
+OpDecorate %17 Location 1
+OpDecorate %20 Location 2
+OpDecorate %23 Location 3
+OpDecorate %26 Location 4
+OpDecorate %29 Location 5
+OpDecorate %32 Location 6
+OpDecorate %35 Location 7
+OpDecorate %38 Location 0
+OpDecorate %40 Location 1
+OpDecorate %42 Location 2
+OpDecorate %44 Location 3
+OpDecorate %46 Location 4
+OpDecorate %48 Location 5
+OpDecorate %50 Location 6
+OpDecorate %52 Location 7
+OpDecorate %112 Location 0
+OpDecorate %114 Location 1
+OpDecorate %116 Location 2
+OpDecorate %118 Location 3
+OpDecorate %120 Location 4
+OpDecorate %122 Location 5
+OpDecorate %124 Location 6
+OpDecorate %126 Location 7
+OpDecorate %128 Location 0
+OpDecorate %129 Location 1
+OpDecorate %130 Location 2
+OpDecorate %131 Location 3
+OpDecorate %132 Location 4
+OpDecorate %133 Location 5
+OpDecorate %134 Location 6
+OpDecorate %135 Location 7
+OpDecorate %175 Location 0
+OpDecorate %177 Location 1
+OpDecorate %179 Location 2
+OpDecorate %181 Location 3
+OpDecorate %183 Location 4
+OpDecorate %185 Location 5
+OpDecorate %187 Location 6
+OpDecorate %189 Location 7
+OpDecorate %191 Location 0
+OpDecorate %192 Location 1
+OpDecorate %193 Location 2
+OpDecorate %194 Location 3
+OpDecorate %195 Location 4
+OpDecorate %196 Location 5
+OpDecorate %197 Location 6
+OpDecorate %198 Location 7
+OpDecorate %248 Location 0
+OpDecorate %250 Location 1
+OpDecorate %252 Location 2
+OpDecorate %254 Location 3
+OpDecorate %256 Location 4
+OpDecorate %258 Location 5
+OpDecorate %260 Location 6
+OpDecorate %262 Location 7
+OpDecorate %264 Location 0
+%2 = OpTypeVoid
+%4 = OpTypeFloat 16
+%5 = OpTypeFloat 32
+%6 = OpTypeVector %4 2
+%7 = OpTypeVector %5 2
+%8 = OpTypeVector %4 3
+%9 = OpTypeVector %5 3
+%10 = OpTypeVector %4 4
+%11 = OpTypeVector %5 4
+%12 = OpTypeStruct %4 %5 %6 %7 %8 %9 %10 %11
+%15 = OpTypePointer Input %4
+%14 = OpVariable  %15  Input
+%18 = OpTypePointer Input %5
+%17 = OpVariable  %18  Input
+%21 = OpTypePointer Input %6
+%20 = OpVariable  %21  Input
+%24 = OpTypePointer Input %7
+%23 = OpVariable  %24  Input
+%27 = OpTypePointer Input %8
+%26 = OpVariable  %27  Input
+%30 = OpTypePointer Input %9
+%29 = OpVariable  %30  Input
+%33 = OpTypePointer Input %10
+%32 = OpVariable  %33  Input
+%36 = OpTypePointer Input %11
+%35 = OpVariable  %36  Input
+%39 = OpTypePointer Output %4
+%38 = OpVariable  %39  Output
+%41 = OpTypePointer Output %5
+%40 = OpVariable  %41  Output
+%43 = OpTypePointer Output %6
+%42 = OpVariable  %43  Output
+%45 = OpTypePointer Output %7
+%44 = OpVariable  %45  Output
+%47 = OpTypePointer Output %8
+%46 = OpVariable  %47  Output
+%49 = OpTypePointer Output %9
+%48 = OpVariable  %49  Output
+%51 = OpTypePointer Output %10
+%50 = OpVariable  %51  Output
+%53 = OpTypePointer Output %11
+%52 = OpVariable  %53  Output
+%55 = OpTypeFunction %2
+%56 = OpConstant  %4  2.1524e-41
+%57 = OpConstant  %5  1.0
+%58 = OpConstantComposite  %6  %56 %56
+%59 = OpConstantComposite  %7  %57 %57
+%60 = OpConstantComposite  %8  %56 %56 %56
+%61 = OpConstantComposite  %9  %57 %57 %57
+%62 = OpConstantComposite  %10  %56 %56 %56 %56
+%63 = OpConstantComposite  %11  %57 %57 %57 %57
+%65 = OpTypePointer Function %12
+%66 = OpConstantNull  %12
+%68 = OpTypePointer Function %4
+%71 = OpTypeInt 32 0
+%70 = OpConstant  %71  0
+%73 = OpTypePointer Function %5
+%75 = OpConstant  %71  1
+%77 = OpTypePointer Function %6
+%79 = OpConstant  %71  2
+%81 = OpTypePointer Function %7
+%83 = OpConstant  %71  3
+%85 = OpTypePointer Function %8
+%87 = OpConstant  %71  4
+%89 = OpTypePointer Function %9
+%91 = OpConstant  %71  5
+%93 = OpTypePointer Function %10
+%95 = OpConstant  %71  6
+%97 = OpTypePointer Function %11
+%99 = OpConstant  %71  7
+%112 = OpVariable  %15  Input
+%114 = OpVariable  %18  Input
+%116 = OpVariable  %21  Input
+%118 = OpVariable  %24  Input
+%120 = OpVariable  %27  Input
+%122 = OpVariable  %30  Input
+%124 = OpVariable  %33  Input
+%126 = OpVariable  %36  Input
+%128 = OpVariable  %39  Output
+%129 = OpVariable  %41  Output
+%130 = OpVariable  %43  Output
+%131 = OpVariable  %45  Output
+%132 = OpVariable  %47  Output
+%133 = OpVariable  %49  Output
+%134 = OpVariable  %51  Output
+%135 = OpVariable  %53  Output
+%138 = OpConstantNull  %12
+%175 = OpVariable  %15  Input
+%177 = OpVariable  %18  Input
+%179 = OpVariable  %21  Input
+%181 = OpVariable  %24  Input
+%183 = OpVariable  %27  Input
+%185 = OpVariable  %30  Input
+%187 = OpVariable  %33  Input
+%189 = OpVariable  %36  Input
+%191 = OpVariable  %39  Output
+%192 = OpVariable  %41  Output
+%193 = OpVariable  %43  Output
+%194 = OpVariable  %45  Output
+%195 = OpVariable  %47  Output
+%196 = OpVariable  %49  Output
+%197 = OpVariable  %51  Output
+%198 = OpVariable  %53  Output
+%201 = OpConstantNull  %12
+%203 = OpConstantNull  %12
+%248 = OpVariable  %15  Input
+%250 = OpVariable  %18  Input
+%252 = OpVariable  %21  Input
+%254 = OpVariable  %24  Input
+%256 = OpVariable  %27  Input
+%258 = OpVariable  %30  Input
+%260 = OpVariable  %33  Input
+%262 = OpVariable  %36  Input
+%264 = OpVariable  %39  Output
+%266 = OpConstant  %4  0.0
+%268 = OpConstantNull  %12
+%54 = OpFunction  %2  None %55
+%13 = OpLabel
+%64 = OpVariable  %65  Function %66
+%16 = OpLoad  %4  %14
+%19 = OpLoad  %5  %17
+%22 = OpLoad  %6  %20
+%25 = OpLoad  %7  %23
+%28 = OpLoad  %8  %26
+%31 = OpLoad  %9  %29
+%34 = OpLoad  %10  %32
+%37 = OpLoad  %11  %35
+OpBranch %67
+%67 = OpLabel
+OpLine %3 15 5
+OpLine %3 15 25
+%69 = OpFAdd  %4  %16 %56
+OpLine %3 15 5
+%72 = OpAccessChain  %68  %64 %70
+OpStore %72 %69
+OpLine %3 16 5
+OpLine %3 16 25
+%74 = OpFAdd  %5  %19 %57
+OpLine %3 16 5
+%76 = OpAccessChain  %73  %64 %75
+OpStore %76 %74
+OpLine %3 17 5
+OpLine %3 17 23
+%78 = OpFAdd  %6  %22 %58
+OpLine %3 17 5
+%80 = OpAccessChain  %77  %64 %79
+OpStore %80 %78
+OpLine %3 18 5
+OpLine %3 18 34
+OpLine %3 18 23
+%82 = OpFAdd  %7  %25 %59
+OpLine %3 18 5
+%84 = OpAccessChain  %81  %64 %83
+OpStore %84 %82
+OpLine %3 19 5
+OpLine %3 19 23
+%86 = OpFAdd  %8  %28 %60
+OpLine %3 19 5
+%88 = OpAccessChain  %85  %64 %87
+OpStore %88 %86
+OpLine %3 20 5
+OpLine %3 20 34
+OpLine %3 20 23
+%90 = OpFAdd  %9  %31 %61
+OpLine %3 20 5
+%92 = OpAccessChain  %89  %64 %91
+OpStore %92 %90
+OpLine %3 21 5
+OpLine %3 21 23
+%94 = OpFAdd  %10  %34 %62
+OpLine %3 21 5
+%96 = OpAccessChain  %93  %64 %95
+OpStore %96 %94
+OpLine %3 22 5
+OpLine %3 22 34
+OpLine %3 22 23
+%98 = OpFAdd  %11  %37 %63
+OpLine %3 22 5
+%100 = OpAccessChain  %97  %64 %99
+OpStore %100 %98
+OpLine %3 1 1
+%101 = OpLoad  %12  %64
+%102 = OpCompositeExtract  %4  %101 0
+OpStore %38 %102
+%103 = OpCompositeExtract  %5  %101 1
+OpStore %40 %103
+%104 = OpCompositeExtract  %6  %101 2
+OpStore %42 %104
+%105 = OpCompositeExtract  %7  %101 3
+OpStore %44 %105
+%106 = OpCompositeExtract  %8  %101 4
+OpStore %46 %106
+%107 = OpCompositeExtract  %9  %101 5
+OpStore %48 %107
+%108 = OpCompositeExtract  %10  %101 6
+OpStore %50 %108
+%109 = OpCompositeExtract  %11  %101 7
+OpStore %52 %109
+OpReturn
+OpFunctionEnd
+%136 = OpFunction  %2  None %55
+%110 = OpLabel
+%137 = OpVariable  %65  Function %138
+%113 = OpLoad  %4  %112
+%115 = OpLoad  %5  %114
+%117 = OpLoad  %6  %116
+%119 = OpLoad  %7  %118
+%121 = OpLoad  %8  %120
+%123 = OpLoad  %9  %122
+%125 = OpLoad  %10  %124
+%127 = OpLoad  %11  %126
+%111 = OpCompositeConstruct  %12  %113 %115 %117 %119 %121 %123 %125 %127
+OpBranch %139
+%139 = OpLabel
+OpLine %3 40 5
+%140 = OpCompositeExtract  %4  %111 0
+OpLine %3 40 25
+%141 = OpFAdd  %4  %140 %56
+OpLine %3 40 5
+%142 = OpAccessChain  %68  %137 %70
+OpStore %142 %141
+OpLine %3 41 5
+%143 = OpCompositeExtract  %5  %111 1
+OpLine %3 41 25
+%144 = OpFAdd  %5  %143 %57
+OpLine %3 41 5
+%145 = OpAccessChain  %73  %137 %75
+OpStore %145 %144
+OpLine %3 42 5
+%146 = OpCompositeExtract  %6  %111 2
+OpLine %3 42 23
+%147 = OpFAdd  %6  %146 %58
+OpLine %3 42 5
+%148 = OpAccessChain  %77  %137 %79
+OpStore %148 %147
+OpLine %3 43 5
+%149 = OpCompositeExtract  %7  %111 3
+OpLine %3 43 40
+OpLine %3 43 23
+%150 = OpFAdd  %7  %149 %59
+OpLine %3 43 5
+%151 = OpAccessChain  %81  %137 %83
+OpStore %151 %150
+OpLine %3 44 5
+%152 = OpCompositeExtract  %8  %111 4
+OpLine %3 44 23
+%153 = OpFAdd  %8  %152 %60
+OpLine %3 44 5
+%154 = OpAccessChain  %85  %137 %87
+OpStore %154 %153
+OpLine %3 45 5
+%155 = OpCompositeExtract  %9  %111 5
+OpLine %3 45 40
+OpLine %3 45 23
+%156 = OpFAdd  %9  %155 %61
+OpLine %3 45 5
+%157 = OpAccessChain  %89  %137 %91
+OpStore %157 %156
+OpLine %3 46 5
+%158 = OpCompositeExtract  %10  %111 6
+OpLine %3 46 23
+%159 = OpFAdd  %10  %158 %62
+OpLine %3 46 5
+%160 = OpAccessChain  %93  %137 %95
+OpStore %160 %159
+OpLine %3 47 5
+%161 = OpCompositeExtract  %11  %111 7
+OpLine %3 47 40
+OpLine %3 47 23
+%162 = OpFAdd  %11  %161 %63
+OpLine %3 47 5
+%163 = OpAccessChain  %97  %137 %99
+OpStore %163 %162
+OpLine %3 1 1
+%164 = OpLoad  %12  %137
+%165 = OpCompositeExtract  %4  %164 0
+OpStore %128 %165
+%166 = OpCompositeExtract  %5  %164 1
+OpStore %129 %166
+%167 = OpCompositeExtract  %6  %164 2
+OpStore %130 %167
+%168 = OpCompositeExtract  %7  %164 3
+OpStore %131 %168
+%169 = OpCompositeExtract  %8  %164 4
+OpStore %132 %169
+%170 = OpCompositeExtract  %9  %164 5
+OpStore %133 %170
+%171 = OpCompositeExtract  %10  %164 6
+OpStore %134 %171
+%172 = OpCompositeExtract  %11  %164 7
+OpStore %135 %172
+OpReturn
+OpFunctionEnd
+%199 = OpFunction  %2  None %55
+%173 = OpLabel
+%200 = OpVariable  %65  Function %201
+%202 = OpVariable  %65  Function %203
+%176 = OpLoad  %4  %175
+%178 = OpLoad  %5  %177
+%180 = OpLoad  %6  %179
+%182 = OpLoad  %7  %181
+%184 = OpLoad  %8  %183
+%186 = OpLoad  %9  %185
+%188 = OpLoad  %10  %187
+%190 = OpLoad  %11  %189
+%174 = OpCompositeConstruct  %12  %176 %178 %180 %182 %184 %186 %188 %190
+OpBranch %204
+%204 = OpLabel
+OpLine %3 53 5
+OpStore %200 %174
+OpLine %3 55 5
+%205 = OpAccessChain  %68  %200 %70
+%206 = OpLoad  %4  %205
+OpLine %3 55 25
+%207 = OpFAdd  %4  %206 %56
+OpLine %3 55 5
+%208 = OpAccessChain  %68  %202 %70
+OpStore %208 %207
+OpLine %3 56 5
+%209 = OpAccessChain  %73  %200 %75
+%210 = OpLoad  %5  %209
+OpLine %3 56 25
+%211 = OpFAdd  %5  %210 %57
+OpLine %3 56 5
+%212 = OpAccessChain  %73  %202 %75
+OpStore %212 %211
+OpLine %3 57 5
+%213 = OpAccessChain  %77  %200 %79
+%214 = OpLoad  %6  %213
+OpLine %3 57 23
+%215 = OpFAdd  %6  %214 %58
+OpLine %3 57 5
+%216 = OpAccessChain  %77  %202 %79
+OpStore %216 %215
+OpLine %3 58 5
+%217 = OpAccessChain  %81  %200 %83
+%218 = OpLoad  %7  %217
+OpLine %3 58 40
+OpLine %3 58 23
+%219 = OpFAdd  %7  %218 %59
+OpLine %3 58 5
+%220 = OpAccessChain  %81  %202 %83
+OpStore %220 %219
+OpLine %3 59 5
+%221 = OpAccessChain  %85  %200 %87
+%222 = OpLoad  %8  %221
+OpLine %3 59 23
+%223 = OpFAdd  %8  %222 %60
+OpLine %3 59 5
+%224 = OpAccessChain  %85  %202 %87
+OpStore %224 %223
+OpLine %3 60 5
+%225 = OpAccessChain  %89  %200 %91
+%226 = OpLoad  %9  %225
+OpLine %3 60 40
+OpLine %3 60 23
+%227 = OpFAdd  %9  %226 %61
+OpLine %3 60 5
+%228 = OpAccessChain  %89  %202 %91
+OpStore %228 %227
+OpLine %3 61 5
+%229 = OpAccessChain  %93  %200 %95
+%230 = OpLoad  %10  %229
+OpLine %3 61 23
+%231 = OpFAdd  %10  %230 %62
+OpLine %3 61 5
+%232 = OpAccessChain  %93  %202 %95
+OpStore %232 %231
+OpLine %3 62 5
+%233 = OpAccessChain  %97  %200 %99
+%234 = OpLoad  %11  %233
+OpLine %3 62 40
+OpLine %3 62 23
+%235 = OpFAdd  %11  %234 %63
+OpLine %3 62 5
+%236 = OpAccessChain  %97  %202 %99
+OpStore %236 %235
+OpLine %3 1 1
+%237 = OpLoad  %12  %202
+%238 = OpCompositeExtract  %4  %237 0
+OpStore %191 %238
+%239 = OpCompositeExtract  %5  %237 1
+OpStore %192 %239
+%240 = OpCompositeExtract  %6  %237 2
+OpStore %193 %240
+%241 = OpCompositeExtract  %7  %237 3
+OpStore %194 %241
+%242 = OpCompositeExtract  %8  %237 4
+OpStore %195 %242
+%243 = OpCompositeExtract  %9  %237 5
+OpStore %196 %243
+%244 = OpCompositeExtract  %10  %237 6
+OpStore %197 %244
+%245 = OpCompositeExtract  %11  %237 7
+OpStore %198 %245
+OpReturn
+OpFunctionEnd
+%265 = OpFunction  %2  None %55
+%246 = OpLabel
+%267 = OpVariable  %65  Function %268
+%249 = OpLoad  %4  %248
+%251 = OpLoad  %5  %250
+%253 = OpLoad  %6  %252
+%255 = OpLoad  %7  %254
+%257 = OpLoad  %8  %256
+%259 = OpLoad  %9  %258
+%261 = OpLoad  %10  %260
+%263 = OpLoad  %11  %262
+%247 = OpCompositeConstruct  %12  %249 %251 %253 %255 %257 %259 %261 %263
+OpBranch %269
+%269 = OpLabel
+OpLine %3 68 5
+OpStore %267 %247
+OpLine %3 69 5
+OpLine %3 69 5
+%270 = OpAccessChain  %68  %267 %70
+OpStore %270 %266
+OpLine %3 70 12
+%271 = OpAccessChain  %68  %267 %70
+%272 = OpLoad  %4  %271
+OpStore %264 %272
+OpReturn
+OpFunctionEnd
\ No newline at end of file
diff --git a/naga/tests/out/spv/wgsl-f16-polyfill.spvasm b/naga/tests/out/spv/wgsl-f16-polyfill.spvasm
new file mode 100644
index 00000000000..f3613b5f64c
--- /dev/null
+++ b/naga/tests/out/spv/wgsl-f16-polyfill.spvasm
@@ -0,0 +1,675 @@
+; SPIR-V
+; Version: 1.1
+; Generator: rspirv
+; Bound: 294
+OpCapability Shader
+OpCapability Float16
+OpCapability StorageBuffer16BitAccess
+OpCapability UniformAndStorageBuffer16BitAccess
+OpExtension "SPV_KHR_16bit_storage"
+%1 = OpExtInstImport "GLSL.std.450"
+OpMemoryModel Logical GLSL450
+OpEntryPoint Fragment %50 "test_direct" %14 %18 %20 %24 %26 %30 %32 %36 %38 %40 %41 %43 %44 %46 %47 %49
+OpEntryPoint Fragment %140 "test_struct" %112 %115 %117 %120 %122 %125 %127 %130 %132 %133 %134 %135 %136 %137 %138 %139
+OpEntryPoint Fragment %211 "test_copy_input" %183 %186 %188 %191 %193 %196 %198 %201 %203 %204 %205 %206 %207 %208 %209 %210
+OpEntryPoint Fragment %285 "test_return_partial" %264 %267 %269 %272 %274 %277 %279 %282 %284
+OpExecutionMode %50 OriginUpperLeft
+OpExecutionMode %140 OriginUpperLeft
+OpExecutionMode %211 OriginUpperLeft
+OpExecutionMode %285 OriginUpperLeft
+%3 = OpString "f16-polyfill.wgsl"
+OpSource Unknown 0 %3 "enable f16;
+
+@fragment
+fn test_direct(
+    @location(0) scalar_f16: f16,
+    @location(1) scalar_f32: f32,
+    @location(2) vec2_f16: vec2<f16>,
+    @location(3) vec2_f32: vec2<f32>,
+    @location(4) vec3_f16: vec3<f16>,
+    @location(5) vec3_f32: vec3<f32>,
+    @location(6) vec4_f16: vec4<f16>,
+    @location(7) vec4_f32: vec4<f32>,
+) -> F16IO {
+    var output: F16IO;
+    output.scalar_f16 = scalar_f16 + 1.0h;
+    output.scalar_f32 = scalar_f32 + 1.0;
+    output.vec2_f16 = vec2_f16 + vec2(1.0h);
+    output.vec2_f32 = vec2_f32 + vec2(1.0);
+    output.vec3_f16 = vec3_f16 + vec3(1.0h);
+    output.vec3_f32 = vec3_f32 + vec3(1.0);
+    output.vec4_f16 = vec4_f16 + vec4(1.0h);
+    output.vec4_f32 = vec4_f32 + vec4(1.0);
+    return output;
+}
+
+struct F16IO {
+    @location(0) scalar_f16: f16,
+    @location(1) scalar_f32: f32,
+    @location(2) vec2_f16: vec2<f16>,
+    @location(3) vec2_f32: vec2<f32>,
+    @location(4) vec3_f16: vec3<f16>,
+    @location(5) vec3_f32: vec3<f32>,
+    @location(6) vec4_f16: vec4<f16>,
+    @location(7) vec4_f32: vec4<f32>,
+}
+
+@fragment
+fn test_struct(input: F16IO) -> F16IO {
+    var output: F16IO;
+    output.scalar_f16 = input.scalar_f16 + 1.0h;
+    output.scalar_f32 = input.scalar_f32 + 1.0;
+    output.vec2_f16 = input.vec2_f16 + vec2(1.0h);
+    output.vec2_f32 = input.vec2_f32 + vec2(1.0);
+    output.vec3_f16 = input.vec3_f16 + vec3(1.0h);
+    output.vec3_f32 = input.vec3_f32 + vec3(1.0);
+    output.vec4_f16 = input.vec4_f16 + vec4(1.0h);
+    output.vec4_f32 = input.vec4_f32 + vec4(1.0);
+    return output;
+}
+
+@fragment
+fn test_copy_input(input_original: F16IO) -> F16IO {
+    var input = input_original;
+    var output: F16IO;
+    output.scalar_f16 = input.scalar_f16 + 1.0h;
+    output.scalar_f32 = input.scalar_f32 + 1.0;
+    output.vec2_f16 = input.vec2_f16 + vec2(1.0h);
+    output.vec2_f32 = input.vec2_f32 + vec2(1.0);
+    output.vec3_f16 = input.vec3_f16 + vec3(1.0h);
+    output.vec3_f32 = input.vec3_f32 + vec3(1.0);
+    output.vec4_f16 = input.vec4_f16 + vec4(1.0h);
+    output.vec4_f32 = input.vec4_f32 + vec4(1.0);
+    return output;
+}
+
+@fragment
+fn test_return_partial(input_original: F16IO) -> @location(0) f16 {
+    var input = input_original;
+    input.scalar_f16 = 0.0h;
+    return input.scalar_f16;
+}
+"
+OpMemberName %12 0 "scalar_f16"
+OpMemberName %12 1 "scalar_f32"
+OpMemberName %12 2 "vec2_f16"
+OpMemberName %12 3 "vec2_f32"
+OpMemberName %12 4 "vec3_f16"
+OpMemberName %12 5 "vec3_f32"
+OpMemberName %12 6 "vec4_f16"
+OpMemberName %12 7 "vec4_f32"
+OpName %12 "F16IO"
+OpName %14 "scalar_f16"
+OpName %18 "scalar_f32"
+OpName %20 "vec2_f16"
+OpName %24 "vec2_f32"
+OpName %26 "vec3_f16"
+OpName %30 "vec3_f32"
+OpName %32 "vec4_f16"
+OpName %36 "vec4_f32"
+OpName %38 "scalar_f16"
+OpName %40 "scalar_f32"
+OpName %41 "vec2_f16"
+OpName %43 "vec2_f32"
+OpName %44 "vec3_f16"
+OpName %46 "vec3_f32"
+OpName %47 "vec4_f16"
+OpName %49 "vec4_f32"
+OpName %50 "test_direct"
+OpName %60 "output"
+OpName %112 "scalar_f16"
+OpName %115 "scalar_f32"
+OpName %117 "vec2_f16"
+OpName %120 "vec2_f32"
+OpName %122 "vec3_f16"
+OpName %125 "vec3_f32"
+OpName %127 "vec4_f16"
+OpName %130 "vec4_f32"
+OpName %132 "scalar_f16"
+OpName %133 "scalar_f32"
+OpName %134 "vec2_f16"
+OpName %135 "vec2_f32"
+OpName %136 "vec3_f16"
+OpName %137 "vec3_f32"
+OpName %138 "vec4_f16"
+OpName %139 "vec4_f32"
+OpName %140 "test_struct"
+OpName %141 "output"
+OpName %183 "scalar_f16"
+OpName %186 "scalar_f32"
+OpName %188 "vec2_f16"
+OpName %191 "vec2_f32"
+OpName %193 "vec3_f16"
+OpName %196 "vec3_f32"
+OpName %198 "vec4_f16"
+OpName %201 "vec4_f32"
+OpName %203 "scalar_f16"
+OpName %204 "scalar_f32"
+OpName %205 "vec2_f16"
+OpName %206 "vec2_f32"
+OpName %207 "vec3_f16"
+OpName %208 "vec3_f32"
+OpName %209 "vec4_f16"
+OpName %210 "vec4_f32"
+OpName %211 "test_copy_input"
+OpName %212 "input"
+OpName %214 "output"
+OpName %264 "scalar_f16"
+OpName %267 "scalar_f32"
+OpName %269 "vec2_f16"
+OpName %272 "vec2_f32"
+OpName %274 "vec3_f16"
+OpName %277 "vec3_f32"
+OpName %279 "vec4_f16"
+OpName %282 "vec4_f32"
+OpName %285 "test_return_partial"
+OpName %287 "input"
+OpMemberDecorate %12 0 Offset 0
+OpMemberDecorate %12 1 Offset 4
+OpMemberDecorate %12 2 Offset 8
+OpMemberDecorate %12 3 Offset 16
+OpMemberDecorate %12 4 Offset 24
+OpMemberDecorate %12 5 Offset 32
+OpMemberDecorate %12 6 Offset 48
+OpMemberDecorate %12 7 Offset 64
+OpDecorate %14 Location 0
+OpDecorate %18 Location 1
+OpDecorate %20 Location 2
+OpDecorate %24 Location 3
+OpDecorate %26 Location 4
+OpDecorate %30 Location 5
+OpDecorate %32 Location 6
+OpDecorate %36 Location 7
+OpDecorate %38 Location 0
+OpDecorate %40 Location 1
+OpDecorate %41 Location 2
+OpDecorate %43 Location 3
+OpDecorate %44 Location 4
+OpDecorate %46 Location 5
+OpDecorate %47 Location 6
+OpDecorate %49 Location 7
+OpDecorate %112 Location 0
+OpDecorate %115 Location 1
+OpDecorate %117 Location 2
+OpDecorate %120 Location 3
+OpDecorate %122 Location 4
+OpDecorate %125 Location 5
+OpDecorate %127 Location 6
+OpDecorate %130 Location 7
+OpDecorate %132 Location 0
+OpDecorate %133 Location 1
+OpDecorate %134 Location 2
+OpDecorate %135 Location 3
+OpDecorate %136 Location 4
+OpDecorate %137 Location 5
+OpDecorate %138 Location 6
+OpDecorate %139 Location 7
+OpDecorate %183 Location 0
+OpDecorate %186 Location 1
+OpDecorate %188 Location 2
+OpDecorate %191 Location 3
+OpDecorate %193 Location 4
+OpDecorate %196 Location 5
+OpDecorate %198 Location 6
+OpDecorate %201 Location 7
+OpDecorate %203 Location 0
+OpDecorate %204 Location 1
+OpDecorate %205 Location 2
+OpDecorate %206 Location 3
+OpDecorate %207 Location 4
+OpDecorate %208 Location 5
+OpDecorate %209 Location 6
+OpDecorate %210 Location 7
+OpDecorate %264 Location 0
+OpDecorate %267 Location 1
+OpDecorate %269 Location 2
+OpDecorate %272 Location 3
+OpDecorate %274 Location 4
+OpDecorate %277 Location 5
+OpDecorate %279 Location 6
+OpDecorate %282 Location 7
+OpDecorate %284 Location 0
+%2 = OpTypeVoid
+%4 = OpTypeFloat 16
+%5 = OpTypeFloat 32
+%6 = OpTypeVector %4 2
+%7 = OpTypeVector %5 2
+%8 = OpTypeVector %4 3
+%9 = OpTypeVector %5 3
+%10 = OpTypeVector %4 4
+%11 = OpTypeVector %5 4
+%12 = OpTypeStruct %4 %5 %6 %7 %8 %9 %10 %11
+%15 = OpTypePointer Input %5
+%14 = OpVariable  %15  Input
+%18 = OpVariable  %15  Input
+%21 = OpTypePointer Input %7
+%20 = OpVariable  %21  Input
+%24 = OpVariable  %21  Input
+%27 = OpTypePointer Input %9
+%26 = OpVariable  %27  Input
+%30 = OpVariable  %27  Input
+%33 = OpTypePointer Input %11
+%32 = OpVariable  %33  Input
+%36 = OpVariable  %33  Input
+%39 = OpTypePointer Output %5
+%38 = OpVariable  %39  Output
+%40 = OpVariable  %39  Output
+%42 = OpTypePointer Output %7
+%41 = OpVariable  %42  Output
+%43 = OpVariable  %42  Output
+%45 = OpTypePointer Output %9
+%44 = OpVariable  %45  Output
+%46 = OpVariable  %45  Output
+%48 = OpTypePointer Output %11
+%47 = OpVariable  %48  Output
+%49 = OpVariable  %48  Output
+%51 = OpTypeFunction %2
+%52 = OpConstant  %4  2.1524e-41
+%53 = OpConstant  %5  1.0
+%54 = OpConstantComposite  %6  %52 %52
+%55 = OpConstantComposite  %7  %53 %53
+%56 = OpConstantComposite  %8  %52 %52 %52
+%57 = OpConstantComposite  %9  %53 %53 %53
+%58 = OpConstantComposite  %10  %52 %52 %52 %52
+%59 = OpConstantComposite  %11  %53 %53 %53 %53
+%61 = OpTypePointer Function %12
+%62 = OpConstantNull  %12
+%64 = OpTypePointer Function %4
+%67 = OpTypeInt 32 0
+%66 = OpConstant  %67  0
+%69 = OpTypePointer Function %5
+%71 = OpConstant  %67  1
+%73 = OpTypePointer Function %6
+%75 = OpConstant  %67  2
+%77 = OpTypePointer Function %7
+%79 = OpConstant  %67  3
+%81 = OpTypePointer Function %8
+%83 = OpConstant  %67  4
+%85 = OpTypePointer Function %9
+%87 = OpConstant  %67  5
+%89 = OpTypePointer Function %10
+%91 = OpConstant  %67  6
+%93 = OpTypePointer Function %11
+%95 = OpConstant  %67  7
+%112 = OpVariable  %15  Input
+%115 = OpVariable  %15  Input
+%117 = OpVariable  %21  Input
+%120 = OpVariable  %21  Input
+%122 = OpVariable  %27  Input
+%125 = OpVariable  %27  Input
+%127 = OpVariable  %33  Input
+%130 = OpVariable  %33  Input
+%132 = OpVariable  %39  Output
+%133 = OpVariable  %39  Output
+%134 = OpVariable  %42  Output
+%135 = OpVariable  %42  Output
+%136 = OpVariable  %45  Output
+%137 = OpVariable  %45  Output
+%138 = OpVariable  %48  Output
+%139 = OpVariable  %48  Output
+%142 = OpConstantNull  %12
+%183 = OpVariable  %15  Input
+%186 = OpVariable  %15  Input
+%188 = OpVariable  %21  Input
+%191 = OpVariable  %21  Input
+%193 = OpVariable  %27  Input
+%196 = OpVariable  %27  Input
+%198 = OpVariable  %33  Input
+%201 = OpVariable  %33  Input
+%203 = OpVariable  %39  Output
+%204 = OpVariable  %39  Output
+%205 = OpVariable  %42  Output
+%206 = OpVariable  %42  Output
+%207 = OpVariable  %45  Output
+%208 = OpVariable  %45  Output
+%209 = OpVariable  %48  Output
+%210 = OpVariable  %48  Output
+%213 = OpConstantNull  %12
+%215 = OpConstantNull  %12
+%264 = OpVariable  %15  Input
+%267 = OpVariable  %15  Input
+%269 = OpVariable  %21  Input
+%272 = OpVariable  %21  Input
+%274 = OpVariable  %27  Input
+%277 = OpVariable  %27  Input
+%279 = OpVariable  %33  Input
+%282 = OpVariable  %33  Input
+%284 = OpVariable  %39  Output
+%286 = OpConstant  %4  0.0
+%288 = OpConstantNull  %12
+%50 = OpFunction  %2  None %51
+%13 = OpLabel
+%60 = OpVariable  %61  Function %62
+%16 = OpLoad  %5  %14
+%17 = OpFConvert  %4  %16
+%19 = OpLoad  %5  %18
+%22 = OpLoad  %7  %20
+%23 = OpFConvert  %6  %22
+%25 = OpLoad  %7  %24
+%28 = OpLoad  %9  %26
+%29 = OpFConvert  %8  %28
+%31 = OpLoad  %9  %30
+%34 = OpLoad  %11  %32
+%35 = OpFConvert  %10  %34
+%37 = OpLoad  %11  %36
+OpBranch %63
+%63 = OpLabel
+OpLine %3 15 5
+OpLine %3 15 25
+%65 = OpFAdd  %4  %17 %52
+OpLine %3 15 5
+%68 = OpAccessChain  %64  %60 %66
+OpStore %68 %65
+OpLine %3 16 5
+OpLine %3 16 25
+%70 = OpFAdd  %5  %19 %53
+OpLine %3 16 5
+%72 = OpAccessChain  %69  %60 %71
+OpStore %72 %70
+OpLine %3 17 5
+OpLine %3 17 23
+%74 = OpFAdd  %6  %23 %54
+OpLine %3 17 5
+%76 = OpAccessChain  %73  %60 %75
+OpStore %76 %74
+OpLine %3 18 5
+OpLine %3 18 34
+OpLine %3 18 23
+%78 = OpFAdd  %7  %25 %55
+OpLine %3 18 5
+%80 = OpAccessChain  %77  %60 %79
+OpStore %80 %78
+OpLine %3 19 5
+OpLine %3 19 23
+%82 = OpFAdd  %8  %29 %56
+OpLine %3 19 5
+%84 = OpAccessChain  %81  %60 %83
+OpStore %84 %82
+OpLine %3 20 5
+OpLine %3 20 34
+OpLine %3 20 23
+%86 = OpFAdd  %9  %31 %57
+OpLine %3 20 5
+%88 = OpAccessChain  %85  %60 %87
+OpStore %88 %86
+OpLine %3 21 5
+OpLine %3 21 23
+%90 = OpFAdd  %10  %35 %58
+OpLine %3 21 5
+%92 = OpAccessChain  %89  %60 %91
+OpStore %92 %90
+OpLine %3 22 5
+OpLine %3 22 34
+OpLine %3 22 23
+%94 = OpFAdd  %11  %37 %59
+OpLine %3 22 5
+%96 = OpAccessChain  %93  %60 %95
+OpStore %96 %94
+OpLine %3 1 1
+%97 = OpLoad  %12  %60
+%98 = OpCompositeExtract  %4  %97 0
+%99 = OpFConvert  %5  %98
+OpStore %38 %99
+%100 = OpCompositeExtract  %5  %97 1
+OpStore %40 %100
+%101 = OpCompositeExtract  %6  %97 2
+%102 = OpFConvert  %7  %101
+OpStore %41 %102
+%103 = OpCompositeExtract  %7  %97 3
+OpStore %43 %103
+%104 = OpCompositeExtract  %8  %97 4
+%105 = OpFConvert  %9  %104
+OpStore %44 %105
+%106 = OpCompositeExtract  %9  %97 5
+OpStore %46 %106
+%107 = OpCompositeExtract  %10  %97 6
+%108 = OpFConvert  %11  %107
+OpStore %47 %108
+%109 = OpCompositeExtract  %11  %97 7
+OpStore %49 %109
+OpReturn
+OpFunctionEnd
+%140 = OpFunction  %2  None %51
+%110 = OpLabel
+%141 = OpVariable  %61  Function %142
+%113 = OpLoad  %5  %112
+%114 = OpFConvert  %4  %113
+%116 = OpLoad  %5  %115
+%118 = OpLoad  %7  %117
+%119 = OpFConvert  %6  %118
+%121 = OpLoad  %7  %120
+%123 = OpLoad  %9  %122
+%124 = OpFConvert  %8  %123
+%126 = OpLoad  %9  %125
+%128 = OpLoad  %11  %127
+%129 = OpFConvert  %10  %128
+%131 = OpLoad  %11  %130
+%111 = OpCompositeConstruct  %12  %114 %116 %119 %121 %124 %126 %129 %131
+OpBranch %143
+%143 = OpLabel
+OpLine %3 40 5
+%144 = OpCompositeExtract  %4  %111 0
+OpLine %3 40 25
+%145 = OpFAdd  %4  %144 %52
+OpLine %3 40 5
+%146 = OpAccessChain  %64  %141 %66
+OpStore %146 %145
+OpLine %3 41 5
+%147 = OpCompositeExtract  %5  %111 1
+OpLine %3 41 25
+%148 = OpFAdd  %5  %147 %53
+OpLine %3 41 5
+%149 = OpAccessChain  %69  %141 %71
+OpStore %149 %148
+OpLine %3 42 5
+%150 = OpCompositeExtract  %6  %111 2
+OpLine %3 42 23
+%151 = OpFAdd  %6  %150 %54
+OpLine %3 42 5
+%152 = OpAccessChain  %73  %141 %75
+OpStore %152 %151
+OpLine %3 43 5
+%153 = OpCompositeExtract  %7  %111 3
+OpLine %3 43 40
+OpLine %3 43 23
+%154 = OpFAdd  %7  %153 %55
+OpLine %3 43 5
+%155 = OpAccessChain  %77  %141 %79
+OpStore %155 %154
+OpLine %3 44 5
+%156 = OpCompositeExtract  %8  %111 4
+OpLine %3 44 23
+%157 = OpFAdd  %8  %156 %56
+OpLine %3 44 5
+%158 = OpAccessChain  %81  %141 %83
+OpStore %158 %157
+OpLine %3 45 5
+%159 = OpCompositeExtract  %9  %111 5
+OpLine %3 45 40
+OpLine %3 45 23
+%160 = OpFAdd  %9  %159 %57
+OpLine %3 45 5
+%161 = OpAccessChain  %85  %141 %87
+OpStore %161 %160
+OpLine %3 46 5
+%162 = OpCompositeExtract  %10  %111 6
+OpLine %3 46 23
+%163 = OpFAdd  %10  %162 %58
+OpLine %3 46 5
+%164 = OpAccessChain  %89  %141 %91
+OpStore %164 %163
+OpLine %3 47 5
+%165 = OpCompositeExtract  %11  %111 7
+OpLine %3 47 40
+OpLine %3 47 23
+%166 = OpFAdd  %11  %165 %59
+OpLine %3 47 5
+%167 = OpAccessChain  %93  %141 %95
+OpStore %167 %166
+OpLine %3 1 1
+%168 = OpLoad  %12  %141
+%169 = OpCompositeExtract  %4  %168 0
+%170 = OpFConvert  %5  %169
+OpStore %132 %170
+%171 = OpCompositeExtract  %5  %168 1
+OpStore %133 %171
+%172 = OpCompositeExtract  %6  %168 2
+%173 = OpFConvert  %7  %172
+OpStore %134 %173
+%174 = OpCompositeExtract  %7  %168 3
+OpStore %135 %174
+%175 = OpCompositeExtract  %8  %168 4
+%176 = OpFConvert  %9  %175
+OpStore %136 %176
+%177 = OpCompositeExtract  %9  %168 5
+OpStore %137 %177
+%178 = OpCompositeExtract  %10  %168 6
+%179 = OpFConvert  %11  %178
+OpStore %138 %179
+%180 = OpCompositeExtract  %11  %168 7
+OpStore %139 %180
+OpReturn
+OpFunctionEnd
+%211 = OpFunction  %2  None %51
+%181 = OpLabel
+%212 = OpVariable  %61  Function %213
+%214 = OpVariable  %61  Function %215
+%184 = OpLoad  %5  %183
+%185 = OpFConvert  %4  %184
+%187 = OpLoad  %5  %186
+%189 = OpLoad  %7  %188
+%190 = OpFConvert  %6  %189
+%192 = OpLoad  %7  %191
+%194 = OpLoad  %9  %193
+%195 = OpFConvert  %8  %194
+%197 = OpLoad  %9  %196
+%199 = OpLoad  %11  %198
+%200 = OpFConvert  %10  %199
+%202 = OpLoad  %11  %201
+%182 = OpCompositeConstruct  %12  %185 %187 %190 %192 %195 %197 %200 %202
+OpBranch %216
+%216 = OpLabel
+OpLine %3 53 5
+OpStore %212 %182
+OpLine %3 55 5
+%217 = OpAccessChain  %64  %212 %66
+%218 = OpLoad  %4  %217
+OpLine %3 55 25
+%219 = OpFAdd  %4  %218 %52
+OpLine %3 55 5
+%220 = OpAccessChain  %64  %214 %66
+OpStore %220 %219
+OpLine %3 56 5
+%221 = OpAccessChain  %69  %212 %71
+%222 = OpLoad  %5  %221
+OpLine %3 56 25
+%223 = OpFAdd  %5  %222 %53
+OpLine %3 56 5
+%224 = OpAccessChain  %69  %214 %71
+OpStore %224 %223
+OpLine %3 57 5
+%225 = OpAccessChain  %73  %212 %75
+%226 = OpLoad  %6  %225
+OpLine %3 57 23
+%227 = OpFAdd  %6  %226 %54
+OpLine %3 57 5
+%228 = OpAccessChain  %73  %214 %75
+OpStore %228 %227
+OpLine %3 58 5
+%229 = OpAccessChain  %77  %212 %79
+%230 = OpLoad  %7  %229
+OpLine %3 58 40
+OpLine %3 58 23
+%231 = OpFAdd  %7  %230 %55
+OpLine %3 58 5
+%232 = OpAccessChain  %77  %214 %79
+OpStore %232 %231
+OpLine %3 59 5
+%233 = OpAccessChain  %81  %212 %83
+%234 = OpLoad  %8  %233
+OpLine %3 59 23
+%235 = OpFAdd  %8  %234 %56
+OpLine %3 59 5
+%236 = OpAccessChain  %81  %214 %83
+OpStore %236 %235
+OpLine %3 60 5
+%237 = OpAccessChain  %85  %212 %87
+%238 = OpLoad  %9  %237
+OpLine %3 60 40
+OpLine %3 60 23
+%239 = OpFAdd  %9  %238 %57
+OpLine %3 60 5
+%240 = OpAccessChain  %85  %214 %87
+OpStore %240 %239
+OpLine %3 61 5
+%241 = OpAccessChain  %89  %212 %91
+%242 = OpLoad  %10  %241
+OpLine %3 61 23
+%243 = OpFAdd  %10  %242 %58
+OpLine %3 61 5
+%244 = OpAccessChain  %89  %214 %91
+OpStore %244 %243
+OpLine %3 62 5
+%245 = OpAccessChain  %93  %212 %95
+%246 = OpLoad  %11  %245
+OpLine %3 62 40
+OpLine %3 62 23
+%247 = OpFAdd  %11  %246 %59
+OpLine %3 62 5
+%248 = OpAccessChain  %93  %214 %95
+OpStore %248 %247
+OpLine %3 1 1
+%249 = OpLoad  %12  %214
+%250 = OpCompositeExtract  %4  %249 0
+%251 = OpFConvert  %5  %250
+OpStore %203 %251
+%252 = OpCompositeExtract  %5  %249 1
+OpStore %204 %252
+%253 = OpCompositeExtract  %6  %249 2
+%254 = OpFConvert  %7  %253
+OpStore %205 %254
+%255 = OpCompositeExtract  %7  %249 3
+OpStore %206 %255
+%256 = OpCompositeExtract  %8  %249 4
+%257 = OpFConvert  %9  %256
+OpStore %207 %257
+%258 = OpCompositeExtract  %9  %249 5
+OpStore %208 %258
+%259 = OpCompositeExtract  %10  %249 6
+%260 = OpFConvert  %11  %259
+OpStore %209 %260
+%261 = OpCompositeExtract  %11  %249 7
+OpStore %210 %261
+OpReturn
+OpFunctionEnd
+%285 = OpFunction  %2  None %51
+%262 = OpLabel
+%287 = OpVariable  %61  Function %288
+%265 = OpLoad  %5  %264
+%266 = OpFConvert  %4  %265
+%268 = OpLoad  %5  %267
+%270 = OpLoad  %7  %269
+%271 = OpFConvert  %6  %270
+%273 = OpLoad  %7  %272
+%275 = OpLoad  %9  %274
+%276 = OpFConvert  %8  %275
+%278 = OpLoad  %9  %277
+%280 = OpLoad  %11  %279
+%281 = OpFConvert  %10  %280
+%283 = OpLoad  %11  %282
+%263 = OpCompositeConstruct  %12  %266 %268 %271 %273 %276 %278 %281 %283
+OpBranch %289
+%289 = OpLabel
+OpLine %3 68 5
+OpStore %287 %263
+OpLine %3 69 5
+OpLine %3 69 5
+%290 = OpAccessChain  %64  %287 %66
+OpStore %290 %286
+OpLine %3 70 12
+%291 = OpAccessChain  %64  %287 %66
+%292 = OpLoad  %4  %291
+%293 = OpFConvert  %5  %292
+OpStore %284 %293
+OpReturn
+OpFunctionEnd
\ No newline at end of file
diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs
index b429f2314dc..6667c71561e 100644
--- a/wgpu-hal/src/vulkan/adapter.rs
+++ b/wgpu-hal/src/vulkan/adapter.rs
@@ -226,7 +226,7 @@ impl PhysicalDeviceFeatures {
     /// [`Adapter::required_device_extensions`]: super::Adapter::required_device_extensions
     fn from_extensions_and_requested_features(
         phd_capabilities: &PhysicalDeviceProperties,
-        _phd_features: &PhysicalDeviceFeatures,
+        phd_features: &PhysicalDeviceFeatures,
         enabled_extensions: &[&'static CStr],
         requested_features: wgt::Features,
         downlevel_flags: wgt::DownlevelFlags,
@@ -396,10 +396,17 @@ impl PhysicalDeviceFeatures {
                 _ => None,
             },
             _16bit_storage: if requested_features.contains(wgt::Features::SHADER_F16) {
+                // Check if the device actually supports storage_input_output16
+                let storage_input_output16_supported = phd_features
+                    ._16bit_storage
+                    .as_ref()
+                    .map(|features| features.storage_input_output16 != 0)
+                    .unwrap_or(false);
+
                 Some(
                     vk::PhysicalDevice16BitStorageFeatures::default()
                         .storage_buffer16_bit_access(true)
-                        .storage_input_output16(true)
+                        .storage_input_output16(storage_input_output16_supported)
                         .uniform_and_storage_buffer16_bit_access(true),
                 )
             } else {
@@ -736,12 +743,12 @@ impl PhysicalDeviceFeatures {
 
         if let (Some(ref f16_i8), Some(ref bit16)) = (self.shader_float16_int8, self._16bit_storage)
         {
+            // Note storage_input_output16 is not required, we polyfill f16 I/O using f32 types when this capability is not available
             features.set(
                 F::SHADER_F16,
                 f16_i8.shader_float16 != 0
                     && bit16.storage_buffer16_bit_access != 0
-                    && bit16.uniform_and_storage_buffer16_bit_access != 0
-                    && bit16.storage_input_output16 != 0,
+                    && bit16.uniform_and_storage_buffer16_bit_access != 0,
             );
         }
 
@@ -2115,6 +2122,15 @@ impl super::Adapter {
                     spv::ZeroInitializeWorkgroupMemoryMode::Polyfill
                 },
                 force_loop_bounding: true,
+                use_storage_input_output_16: features.contains(wgt::Features::SHADER_F16) && {
+                    // Check if the device actually supports storage_input_output16
+                    let phd_features = self.physical_device_features(enabled_extensions, features);
+                    phd_features
+                        ._16bit_storage
+                        .as_ref()
+                        .map(|storage_features| storage_features.storage_input_output16 != 0)
+                        .unwrap_or(false)
+                },
                 // We need to build this separately for each invocation, so just default it out here
                 binding_map: BTreeMap::default(),
                 debug_info: None,

From 5694706a01d7d4ac834d0c18dd48cb893590ecad Mon Sep 17 00:00:00 2001
From: cryvosh <cryvosh@gmail.com>
Date: Sat, 5 Jul 2025 08:59:34 -0400
Subject: [PATCH 04/20] Cleanup

---
 naga/src/back/spv/polyfill.rs | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/naga/src/back/spv/polyfill.rs b/naga/src/back/spv/polyfill.rs
index 9c50444620d..abcdb2092f2 100644
--- a/naga/src/back/spv/polyfill.rs
+++ b/naga/src/back/spv/polyfill.rs
@@ -9,7 +9,7 @@ It works by:
 */
 
 use crate::back::spv::{Instruction, LocalType, NumericType, Word};
-use std::vec::Vec;
+use alloc::vec::Vec;
 
 /// Manages f16 I/O polyfill state and operations.
 #[derive(Default)]
@@ -94,10 +94,6 @@ impl F16IoPolyfill {
             _ => None,
         }
     }
-
-    pub fn clear(&mut self) {
-        self.variable_map.clear();
-    }
 }
 
 impl crate::back::spv::recyclable::Recyclable for F16IoPolyfill {

From 1fc98ffccf0a527bce6db239bd66c8f680f337f2 Mon Sep 17 00:00:00 2001
From: cryvosh <cryvosh@gmail.com>
Date: Sat, 5 Jul 2025 09:23:34 -0400
Subject: [PATCH 05/20] Rename file

---
 naga/src/back/spv/block.rs                         | 4 ++--
 naga/src/back/spv/{polyfill.rs => f16_polyfill.rs} | 0
 naga/src/back/spv/mod.rs                           | 4 ++--
 naga/src/back/spv/writer.rs                        | 8 ++++----
 4 files changed, 8 insertions(+), 8 deletions(-)
 rename naga/src/back/spv/{polyfill.rs => f16_polyfill.rs} (100%)

diff --git a/naga/src/back/spv/block.rs b/naga/src/back/spv/block.rs
index 5de6e91c616..5114b348546 100644
--- a/naga/src/back/spv/block.rs
+++ b/naga/src/back/spv/block.rs
@@ -239,7 +239,7 @@ impl Writer {
 
             if let Some((f32_ty, _)) = self.io_f16_polyfills.get_polyfill_info(res_member.id) {
                 let converted = self.id_gen.next();
-                super::polyfill::F16IoPolyfill::emit_f16_to_f32_conversion(
+                super::f16_polyfill::F16IoPolyfill::emit_f16_to_f32_conversion(
                     member_value_id,
                     f32_ty,
                     converted,
@@ -2332,7 +2332,7 @@ impl BlockContext<'_> {
                         .body
                         .push(Instruction::load(f32_ty, id, pointer_id, None));
                     let converted = self.gen_id();
-                    super::polyfill::F16IoPolyfill::emit_f32_to_f16_conversion(
+                    super::f16_polyfill::F16IoPolyfill::emit_f32_to_f16_conversion(
                         id,
                         result_type_id,
                         converted,
diff --git a/naga/src/back/spv/polyfill.rs b/naga/src/back/spv/f16_polyfill.rs
similarity index 100%
rename from naga/src/back/spv/polyfill.rs
rename to naga/src/back/spv/f16_polyfill.rs
diff --git a/naga/src/back/spv/mod.rs b/naga/src/back/spv/mod.rs
index 5a37044fe47..8ac85978fbc 100644
--- a/naga/src/back/spv/mod.rs
+++ b/naga/src/back/spv/mod.rs
@@ -10,7 +10,7 @@ mod image;
 mod index;
 mod instructions;
 mod layout;
-mod polyfill;
+mod f16_polyfill;
 mod ray;
 mod recyclable;
 mod selection;
@@ -775,7 +775,7 @@ pub struct Writer {
 
     /// F16 I/O polyfill manager for handling f16 input/output variables
     /// when StorageInputOutput16 capability is not available.
-    io_f16_polyfills: polyfill::F16IoPolyfill,
+    io_f16_polyfills: f16_polyfill::F16IoPolyfill,
 }
 
 bitflags::bitflags! {
diff --git a/naga/src/back/spv/writer.rs b/naga/src/back/spv/writer.rs
index b9c530fbf8f..510f841dc61 100644
--- a/naga/src/back/spv/writer.rs
+++ b/naga/src/back/spv/writer.rs
@@ -93,7 +93,7 @@ impl Writer {
             temp_list: Vec::new(),
             ray_get_committed_intersection_function: None,
             ray_get_candidate_intersection_function: None,
-            io_f16_polyfills: super::polyfill::F16IoPolyfill::new(
+            io_f16_polyfills: super::f16_polyfill::F16IoPolyfill::new(
                 options.use_storage_input_output_16,
             ),
         })
@@ -739,7 +739,7 @@ impl Writer {
                             .body
                             .push(Instruction::load(f32_ty, id, varying_id, None));
                         let converted = self.id_gen.next();
-                        super::polyfill::F16IoPolyfill::emit_f32_to_f16_conversion(
+                        super::f16_polyfill::F16IoPolyfill::emit_f32_to_f16_conversion(
                             id,
                             argument_type_id,
                             converted,
@@ -786,7 +786,7 @@ impl Writer {
                                 .body
                                 .push(Instruction::load(f32_ty, id, varying_id, None));
                             let converted = self.id_gen.next();
-                            super::polyfill::F16IoPolyfill::emit_f32_to_f16_conversion(
+                            super::f16_polyfill::F16IoPolyfill::emit_f32_to_f16_conversion(
                                 id,
                                 type_id,
                                 converted,
@@ -1953,7 +1953,7 @@ impl Writer {
         let needs_polyfill = self.needs_f16_polyfill(ty_inner);
 
         let pointer_type_id = if needs_polyfill {
-            let f32_value_local = super::polyfill::F16IoPolyfill::create_polyfill_type(ty_inner)
+            let f32_value_local = super::f16_polyfill::F16IoPolyfill::create_polyfill_type(ty_inner)
                 .expect("needs_polyfill returned true but create_polyfill_type returned None");
 
             let f32_type_id = self.get_localtype_id(f32_value_local);

From 5201714279937bb129b28d131e4d97ed3e5701ad Mon Sep 17 00:00:00 2001
From: cryvosh <cryvosh@gmail.com>
Date: Sat, 5 Jul 2025 09:24:47 -0400
Subject: [PATCH 06/20] fmt

---
 naga/src/back/spv/mod.rs    | 2 +-
 naga/src/back/spv/writer.rs | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/naga/src/back/spv/mod.rs b/naga/src/back/spv/mod.rs
index 8ac85978fbc..87d201ef212 100644
--- a/naga/src/back/spv/mod.rs
+++ b/naga/src/back/spv/mod.rs
@@ -5,12 +5,12 @@ Backend for [SPIR-V][spv] (Standard Portable Intermediate Representation).
 */
 
 mod block;
+mod f16_polyfill;
 mod helpers;
 mod image;
 mod index;
 mod instructions;
 mod layout;
-mod f16_polyfill;
 mod ray;
 mod recyclable;
 mod selection;
diff --git a/naga/src/back/spv/writer.rs b/naga/src/back/spv/writer.rs
index 510f841dc61..53ac4051853 100644
--- a/naga/src/back/spv/writer.rs
+++ b/naga/src/back/spv/writer.rs
@@ -1953,8 +1953,9 @@ impl Writer {
         let needs_polyfill = self.needs_f16_polyfill(ty_inner);
 
         let pointer_type_id = if needs_polyfill {
-            let f32_value_local = super::f16_polyfill::F16IoPolyfill::create_polyfill_type(ty_inner)
-                .expect("needs_polyfill returned true but create_polyfill_type returned None");
+            let f32_value_local =
+                super::f16_polyfill::F16IoPolyfill::create_polyfill_type(ty_inner)
+                    .expect("needs_polyfill returned true but create_polyfill_type returned None");
 
             let f32_type_id = self.get_localtype_id(f32_value_local);
             let ptr_id = self.get_pointer_type_id(f32_type_id, class);

From 44c88df7cf83fa0ccd1acdbbb1c9696c077528ec Mon Sep 17 00:00:00 2001
From: cryvosh <cryvosh@gmail.com>
Date: Sat, 5 Jul 2025 14:22:41 -0400
Subject: [PATCH 07/20] Changelog entry

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 946473f8127..07a87b7c3ed 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -50,6 +50,7 @@ Bottom level categories:
 - Diagnostic rendering methods (i.e., `naga::{front::wgsl::ParseError,WithSpan}::emit_error_to_string_with_path`) now accept more types for their `path` argument via a new sealed `AsDiagnosticFilePath` trait. By @atlv24, @bushrat011899, and @ErichDonGubler in [#7643](https://github.com/gfx-rs/wgpu/pull/7643).
 - Add support for [quad operations](https://www.w3.org/TR/WGSL/#quad-builtin-functions) (requires `SUBGROUP` feature to be enabled). By @dzamkov and @valaphee in [#7683](https://github.com/gfx-rs/wgpu/pull/7683).
 - Add support for `atomicCompareExchangeWeak` in HLSL and GLSL backends. By @cryvosh in [#7658](https://github.com/gfx-rs/wgpu/pull/7658)
+- Add f16 IO polyfill on Vulkan backend to enable SHADER_F16 use without requiring `storageInputOutput16`. By @cryvosh in [#7884](https://github.com/gfx-rs/wgpu/pull/7884)
 
 #### General
 

From 2151560851632082426b99c1d4d9a574b53e1614 Mon Sep 17 00:00:00 2001
From: cryvosh <cryvosh@gmail.com>
Date: Sat, 5 Jul 2025 14:22:41 -0400
Subject: [PATCH 08/20] [naga spv-out] Add f16 io polyfill

---
 CHANGELOG.md                                |   1 +
 naga/src/back/spv/block.rs                  |  30 +-
 naga/src/back/spv/f16_polyfill.rs           | 104 +++
 naga/src/back/spv/mod.rs                    |  11 +
 naga/src/back/spv/writer.rs                 |  93 ++-
 naga/tests/in/wgsl/f16-native.toml          |  13 +
 naga/tests/in/wgsl/f16-native.wgsl          |  71 ++
 naga/tests/in/wgsl/f16-polyfill.toml        |  13 +
 naga/tests/in/wgsl/f16-polyfill.wgsl        |  71 ++
 naga/tests/naga/snapshots.rs                |  21 +-
 naga/tests/naga/spirv_capabilities.rs       | 135 ++++
 naga/tests/out/spv/wgsl-f16-native.spvasm   | 655 +++++++++++++++++++
 naga/tests/out/spv/wgsl-f16-polyfill.spvasm | 675 ++++++++++++++++++++
 wgpu-hal/src/vulkan/adapter.rs              |  24 +-
 14 files changed, 1898 insertions(+), 19 deletions(-)
 create mode 100644 naga/src/back/spv/f16_polyfill.rs
 create mode 100644 naga/tests/in/wgsl/f16-native.toml
 create mode 100644 naga/tests/in/wgsl/f16-native.wgsl
 create mode 100644 naga/tests/in/wgsl/f16-polyfill.toml
 create mode 100644 naga/tests/in/wgsl/f16-polyfill.wgsl
 create mode 100644 naga/tests/out/spv/wgsl-f16-native.spvasm
 create mode 100644 naga/tests/out/spv/wgsl-f16-polyfill.spvasm

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c27699365fb..febf490551b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -130,6 +130,7 @@ By @Vecvec in [#7829](https://github.com/gfx-rs/wgpu/pull/7829).
 - Diagnostic rendering methods (i.e., `naga::{front::wgsl::ParseError,WithSpan}::emit_error_to_string_with_path`) now accept more types for their `path` argument via a new sealed `AsDiagnosticFilePath` trait. By @atlv24, @bushrat011899, and @ErichDonGubler in [#7643](https://github.com/gfx-rs/wgpu/pull/7643).
 - Add support for [quad operations](https://www.w3.org/TR/WGSL/#quad-builtin-functions) (requires `SUBGROUP` feature to be enabled). By @dzamkov and @valaphee in [#7683](https://github.com/gfx-rs/wgpu/pull/7683).
 - Add support for `atomicCompareExchangeWeak` in HLSL and GLSL backends. By @cryvosh in [#7658](https://github.com/gfx-rs/wgpu/pull/7658)
+- Add f16 IO polyfill on Vulkan backend to enable SHADER_F16 use without requiring `storageInputOutput16`. By @cryvosh in [#7884](https://github.com/gfx-rs/wgpu/pull/7884)
 
 ### General
 
diff --git a/naga/src/back/spv/block.rs b/naga/src/back/spv/block.rs
index 7ec659e1d90..5114b348546 100644
--- a/naga/src/back/spv/block.rs
+++ b/naga/src/back/spv/block.rs
@@ -237,7 +237,18 @@ impl Writer {
                 }
             };
 
-            body.push(Instruction::store(res_member.id, member_value_id, None));
+            if let Some((f32_ty, _)) = self.io_f16_polyfills.get_polyfill_info(res_member.id) {
+                let converted = self.id_gen.next();
+                super::f16_polyfill::F16IoPolyfill::emit_f16_to_f32_conversion(
+                    member_value_id,
+                    f32_ty,
+                    converted,
+                    body,
+                );
+                body.push(Instruction::store(res_member.id, converted, None));
+            } else {
+                body.push(Instruction::store(res_member.id, member_value_id, None));
+            }
 
             match res_member.built_in {
                 Some(crate::BuiltIn::Position { .. })
@@ -2313,6 +2324,23 @@ impl BlockContext<'_> {
         match self.write_access_chain(pointer, block, access_type_adjustment)? {
             ExpressionPointer::Ready { pointer_id } => {
                 let id = self.gen_id();
+
+                if let Some((f32_ty, _)) =
+                    self.writer.io_f16_polyfills.get_polyfill_info(pointer_id)
+                {
+                    block
+                        .body
+                        .push(Instruction::load(f32_ty, id, pointer_id, None));
+                    let converted = self.gen_id();
+                    super::f16_polyfill::F16IoPolyfill::emit_f32_to_f16_conversion(
+                        id,
+                        result_type_id,
+                        converted,
+                        &mut block.body,
+                    );
+                    return Ok(converted);
+                }
+
                 let atomic_space =
                     match *self.fun_info[pointer].ty.inner_with(&self.ir_module.types) {
                         crate::TypeInner::Pointer { base, space } => {
diff --git a/naga/src/back/spv/f16_polyfill.rs b/naga/src/back/spv/f16_polyfill.rs
new file mode 100644
index 00000000000..abcdb2092f2
--- /dev/null
+++ b/naga/src/back/spv/f16_polyfill.rs
@@ -0,0 +1,104 @@
+/*!
+This module provides functionality polyfills f16 input/output variables
+when the StorageInputOutput16 capability is not available or disabled.
+
+It works by:
+1. Declaring f16 I/O variables as f32 in SPIR-V
+2. Converting between f16 and f32 at runtime using OpFConvert
+3. Maintaining mappings to track which variables need conversion
+*/
+
+use crate::back::spv::{Instruction, LocalType, NumericType, Word};
+use alloc::vec::Vec;
+
+/// Manages f16 I/O polyfill state and operations.
+#[derive(Default)]
+pub(super) struct F16IoPolyfill {
+    use_native: bool,
+    variable_map: crate::FastHashMap<Word, (Word, Word)>,
+}
+
+impl F16IoPolyfill {
+    pub fn new(use_storage_input_output_16: bool) -> Self {
+        Self {
+            use_native: use_storage_input_output_16,
+            variable_map: crate::FastHashMap::default(),
+        }
+    }
+
+    pub fn needs_polyfill(&self, ty_inner: &crate::TypeInner) -> bool {
+        use crate::{ScalarKind as Sk, TypeInner};
+
+        !self.use_native
+            && match *ty_inner {
+                TypeInner::Scalar(ref s) if s.kind == Sk::Float && s.width == 2 => true,
+                TypeInner::Vector { scalar, .. }
+                    if scalar.kind == Sk::Float && scalar.width == 2 =>
+                {
+                    true
+                }
+                _ => false,
+            }
+    }
+
+    pub fn register_variable(&mut self, variable_id: Word, f32_type_id: Word, f16_type_id: Word) {
+        self.variable_map
+            .insert(variable_id, (f32_type_id, f16_type_id));
+    }
+
+    pub fn get_polyfill_info(&self, variable_id: Word) -> Option<(Word, Word)> {
+        self.variable_map.get(&variable_id).copied()
+    }
+
+    pub fn emit_f16_to_f32_conversion(
+        f16_value_id: Word,
+        f32_type_id: Word,
+        converted_id: Word,
+        body: &mut Vec<Instruction>,
+    ) {
+        body.push(Instruction::unary(
+            spirv::Op::FConvert,
+            f32_type_id,
+            converted_id,
+            f16_value_id,
+        ));
+    }
+
+    pub fn emit_f32_to_f16_conversion(
+        f32_value_id: Word,
+        f16_type_id: Word,
+        converted_id: Word,
+        body: &mut Vec<Instruction>,
+    ) {
+        body.push(Instruction::unary(
+            spirv::Op::FConvert,
+            f16_type_id,
+            converted_id,
+            f32_value_id,
+        ));
+    }
+
+    pub fn create_polyfill_type(ty_inner: &crate::TypeInner) -> Option<LocalType> {
+        use crate::{ScalarKind as Sk, TypeInner};
+
+        match *ty_inner {
+            TypeInner::Scalar(ref s) if s.kind == Sk::Float && s.width == 2 => {
+                Some(LocalType::Numeric(NumericType::Scalar(crate::Scalar::F32)))
+            }
+            TypeInner::Vector { size, scalar } if scalar.kind == Sk::Float && scalar.width == 2 => {
+                Some(LocalType::Numeric(NumericType::Vector {
+                    size,
+                    scalar: crate::Scalar::F32,
+                }))
+            }
+            _ => None,
+        }
+    }
+}
+
+impl crate::back::spv::recyclable::Recyclable for F16IoPolyfill {
+    fn recycle(mut self) -> Self {
+        self.variable_map = self.variable_map.recycle();
+        self
+    }
+}
diff --git a/naga/src/back/spv/mod.rs b/naga/src/back/spv/mod.rs
index 2dcd95957d7..87d201ef212 100644
--- a/naga/src/back/spv/mod.rs
+++ b/naga/src/back/spv/mod.rs
@@ -5,6 +5,7 @@ Backend for [SPIR-V][spv] (Standard Portable Intermediate Representation).
 */
 
 mod block;
+mod f16_polyfill;
 mod helpers;
 mod image;
 mod index;
@@ -744,6 +745,7 @@ pub struct Writer {
     bounds_check_policies: BoundsCheckPolicies,
     zero_initialize_workgroup_memory: ZeroInitializeWorkgroupMemoryMode,
     force_loop_bounding: bool,
+    use_storage_input_output_16: bool,
     void_type: Word,
     //TODO: convert most of these into vectors, addressable by handle indices
     lookup_type: crate::FastHashMap<LookupType, Word>,
@@ -770,6 +772,10 @@ pub struct Writer {
 
     ray_get_committed_intersection_function: Option<Word>,
     ray_get_candidate_intersection_function: Option<Word>,
+
+    /// F16 I/O polyfill manager for handling f16 input/output variables
+    /// when StorageInputOutput16 capability is not available.
+    io_f16_polyfills: f16_polyfill::F16IoPolyfill,
 }
 
 bitflags::bitflags! {
@@ -852,6 +858,10 @@ pub struct Options<'a> {
     /// to think the number of iterations is bounded.
     pub force_loop_bounding: bool,
 
+    /// Whether to use the StorageInputOutput16 capability for f16 shader I/O.
+    /// When false, f16 I/O is polyfilled using f32 types with conversions.
+    pub use_storage_input_output_16: bool,
+
     pub debug_info: Option<DebugInfo<'a>>,
 }
 
@@ -871,6 +881,7 @@ impl Default for Options<'_> {
             bounds_check_policies: BoundsCheckPolicies::default(),
             zero_initialize_workgroup_memory: ZeroInitializeWorkgroupMemoryMode::Polyfill,
             force_loop_bounding: true,
+            use_storage_input_output_16: true,
             debug_info: None,
         }
     }
diff --git a/naga/src/back/spv/writer.rs b/naga/src/back/spv/writer.rs
index b61747c8326..53ac4051853 100644
--- a/naga/src/back/spv/writer.rs
+++ b/naga/src/back/spv/writer.rs
@@ -78,6 +78,7 @@ impl Writer {
             bounds_check_policies: options.bounds_check_policies,
             zero_initialize_workgroup_memory: options.zero_initialize_workgroup_memory,
             force_loop_bounding: options.force_loop_bounding,
+            use_storage_input_output_16: options.use_storage_input_output_16,
             void_type,
             lookup_type: crate::FastHashMap::default(),
             lookup_function: crate::FastHashMap::default(),
@@ -92,6 +93,9 @@ impl Writer {
             temp_list: Vec::new(),
             ray_get_committed_intersection_function: None,
             ray_get_candidate_intersection_function: None,
+            io_f16_polyfills: super::f16_polyfill::F16IoPolyfill::new(
+                options.use_storage_input_output_16,
+            ),
         })
     }
 
@@ -125,6 +129,7 @@ impl Writer {
             bounds_check_policies: self.bounds_check_policies,
             zero_initialize_workgroup_memory: self.zero_initialize_workgroup_memory,
             force_loop_bounding: self.force_loop_bounding,
+            use_storage_input_output_16: self.use_storage_input_output_16,
             capabilities_available: take(&mut self.capabilities_available),
             binding_map: take(&mut self.binding_map),
 
@@ -151,6 +156,7 @@ impl Writer {
             temp_list: take(&mut self.temp_list).recycle(),
             ray_get_candidate_intersection_function: None,
             ray_get_committed_intersection_function: None,
+            io_f16_polyfills: take(&mut self.io_f16_polyfills).recycle(),
         };
 
         *self = fresh;
@@ -726,10 +732,28 @@ impl Writer {
                         binding,
                     )?;
                     iface.varying_ids.push(varying_id);
-                    let id = self.id_gen.next();
-                    prelude
-                        .body
-                        .push(Instruction::load(argument_type_id, id, varying_id, None));
+                    let mut id = self.id_gen.next();
+
+                    if let Some((f32_ty, _)) = self.io_f16_polyfills.get_polyfill_info(varying_id) {
+                        prelude
+                            .body
+                            .push(Instruction::load(f32_ty, id, varying_id, None));
+                        let converted = self.id_gen.next();
+                        super::f16_polyfill::F16IoPolyfill::emit_f32_to_f16_conversion(
+                            id,
+                            argument_type_id,
+                            converted,
+                            &mut prelude.body,
+                        );
+                        id = converted;
+                    } else {
+                        prelude.body.push(Instruction::load(
+                            argument_type_id,
+                            id,
+                            varying_id,
+                            None,
+                        ));
+                    }
 
                     if binding == &crate::Binding::BuiltIn(crate::BuiltIn::LocalInvocationId) {
                         local_invocation_id = Some(id);
@@ -755,10 +779,26 @@ impl Writer {
                         )?;
                         iface.varying_ids.push(varying_id);
                         let id = self.id_gen.next();
-                        prelude
-                            .body
-                            .push(Instruction::load(type_id, id, varying_id, None));
-                        constituent_ids.push(id);
+                        if let Some((f32_ty, _)) =
+                            self.io_f16_polyfills.get_polyfill_info(varying_id)
+                        {
+                            prelude
+                                .body
+                                .push(Instruction::load(f32_ty, id, varying_id, None));
+                            let converted = self.id_gen.next();
+                            super::f16_polyfill::F16IoPolyfill::emit_f32_to_f16_conversion(
+                                id,
+                                type_id,
+                                converted,
+                                &mut prelude.body,
+                            );
+                            constituent_ids.push(converted);
+                        } else {
+                            prelude
+                                .body
+                                .push(Instruction::load(type_id, id, varying_id, None));
+                            constituent_ids.push(id);
+                        }
 
                         if binding == &crate::Binding::BuiltIn(crate::BuiltIn::GlobalInvocationId) {
                             local_invocation_id = Some(id);
@@ -1220,8 +1260,10 @@ impl Writer {
                         .insert(spirv::Capability::StorageBuffer16BitAccess);
                     self.capabilities_used
                         .insert(spirv::Capability::UniformAndStorageBuffer16BitAccess);
-                    self.capabilities_used
-                        .insert(spirv::Capability::StorageInputOutput16);
+                    if self.use_storage_input_output_16 {
+                        self.capabilities_used
+                            .insert(spirv::Capability::StorageInputOutput16);
+                    }
                 }
                 Instruction::type_float(id, bits)
             }
@@ -1904,8 +1946,28 @@ impl Writer {
         ty: Handle<crate::Type>,
         binding: &crate::Binding,
     ) -> Result<Word, Error> {
+        use crate::TypeInner;
+
         let id = self.id_gen.next();
-        let pointer_type_id = self.get_handle_pointer_type_id(ty, class);
+        let ty_inner = &ir_module.types[ty].inner;
+        let needs_polyfill = self.needs_f16_polyfill(ty_inner);
+
+        let pointer_type_id = if needs_polyfill {
+            let f32_value_local =
+                super::f16_polyfill::F16IoPolyfill::create_polyfill_type(ty_inner)
+                    .expect("needs_polyfill returned true but create_polyfill_type returned None");
+
+            let f32_type_id = self.get_localtype_id(f32_value_local);
+            let ptr_id = self.get_pointer_type_id(f32_type_id, class);
+            let f16_type_id = self.get_handle_type_id(ty);
+            self.io_f16_polyfills
+                .register_variable(id, f32_type_id, f16_type_id);
+
+            ptr_id
+        } else {
+            self.get_handle_pointer_type_id(ty, class)
+        };
+
         Instruction::variable(pointer_type_id, id, class, None)
             .to_words(&mut self.logical_layout.declarations);
 
@@ -2088,8 +2150,9 @@ impl Writer {
                 // > shader, must be decorated Flat
                 if class == spirv::StorageClass::Input && stage == crate::ShaderStage::Fragment {
                     let is_flat = match ir_module.types[ty].inner {
-                        crate::TypeInner::Scalar(scalar)
-                        | crate::TypeInner::Vector { scalar, .. } => match scalar.kind {
+                        TypeInner::Scalar(scalar) | TypeInner::Vector { scalar, .. } => match scalar
+                            .kind
+                        {
                             Sk::Uint | Sk::Sint | Sk::Bool => true,
                             Sk::Float => false,
                             Sk::AbstractInt | Sk::AbstractFloat => {
@@ -2584,6 +2647,10 @@ impl Writer {
         self.decorate(id, spirv::Decoration::NonUniform, &[]);
         Ok(())
     }
+
+    pub(super) fn needs_f16_polyfill(&self, ty_inner: &crate::TypeInner) -> bool {
+        self.io_f16_polyfills.needs_polyfill(ty_inner)
+    }
 }
 
 #[test]
diff --git a/naga/tests/in/wgsl/f16-native.toml b/naga/tests/in/wgsl/f16-native.toml
new file mode 100644
index 00000000000..529d34f80da
--- /dev/null
+++ b/naga/tests/in/wgsl/f16-native.toml
@@ -0,0 +1,13 @@
+targets = "SPIRV"
+god_mode = true
+
+[spv]
+debug = true
+version = [1, 1]
+use_storage_input_output_16 = true
+capabilities = ["Float16"]
+
+[bounds_check_policies]
+index = "ReadZeroSkipWrite"
+buffer = "ReadZeroSkipWrite"
+image = "ReadZeroSkipWrite"
diff --git a/naga/tests/in/wgsl/f16-native.wgsl b/naga/tests/in/wgsl/f16-native.wgsl
new file mode 100644
index 00000000000..2dea0baaa29
--- /dev/null
+++ b/naga/tests/in/wgsl/f16-native.wgsl
@@ -0,0 +1,71 @@
+enable f16;
+
+@fragment
+fn test_direct(
+    @location(0) scalar_f16: f16,
+    @location(1) scalar_f32: f32,
+    @location(2) vec2_f16: vec2<f16>,
+    @location(3) vec2_f32: vec2<f32>,
+    @location(4) vec3_f16: vec3<f16>,
+    @location(5) vec3_f32: vec3<f32>,
+    @location(6) vec4_f16: vec4<f16>,
+    @location(7) vec4_f32: vec4<f32>,
+) -> F16IO {
+    var output: F16IO;
+    output.scalar_f16 = scalar_f16 + 1.0h;
+    output.scalar_f32 = scalar_f32 + 1.0;
+    output.vec2_f16 = vec2_f16 + vec2(1.0h);
+    output.vec2_f32 = vec2_f32 + vec2(1.0);
+    output.vec3_f16 = vec3_f16 + vec3(1.0h);
+    output.vec3_f32 = vec3_f32 + vec3(1.0);
+    output.vec4_f16 = vec4_f16 + vec4(1.0h);
+    output.vec4_f32 = vec4_f32 + vec4(1.0);
+    return output;
+}
+
+struct F16IO {
+    @location(0) scalar_f16: f16,
+    @location(1) scalar_f32: f32,
+    @location(2) vec2_f16: vec2<f16>,
+    @location(3) vec2_f32: vec2<f32>,
+    @location(4) vec3_f16: vec3<f16>,
+    @location(5) vec3_f32: vec3<f32>,
+    @location(6) vec4_f16: vec4<f16>,
+    @location(7) vec4_f32: vec4<f32>,
+}
+
+@fragment
+fn test_struct(input: F16IO) -> F16IO {
+    var output: F16IO;
+    output.scalar_f16 = input.scalar_f16 + 1.0h;
+    output.scalar_f32 = input.scalar_f32 + 1.0;
+    output.vec2_f16 = input.vec2_f16 + vec2(1.0h);
+    output.vec2_f32 = input.vec2_f32 + vec2(1.0);
+    output.vec3_f16 = input.vec3_f16 + vec3(1.0h);
+    output.vec3_f32 = input.vec3_f32 + vec3(1.0);
+    output.vec4_f16 = input.vec4_f16 + vec4(1.0h);
+    output.vec4_f32 = input.vec4_f32 + vec4(1.0);
+    return output;
+}
+
+@fragment
+fn test_copy_input(input_original: F16IO) -> F16IO {
+    var input = input_original;
+    var output: F16IO;
+    output.scalar_f16 = input.scalar_f16 + 1.0h;
+    output.scalar_f32 = input.scalar_f32 + 1.0;
+    output.vec2_f16 = input.vec2_f16 + vec2(1.0h);
+    output.vec2_f32 = input.vec2_f32 + vec2(1.0);
+    output.vec3_f16 = input.vec3_f16 + vec3(1.0h);
+    output.vec3_f32 = input.vec3_f32 + vec3(1.0);
+    output.vec4_f16 = input.vec4_f16 + vec4(1.0h);
+    output.vec4_f32 = input.vec4_f32 + vec4(1.0);
+    return output;
+}
+
+@fragment
+fn test_return_partial(input_original: F16IO) -> @location(0) f16 {
+    var input = input_original;
+    input.scalar_f16 = 0.0h;
+    return input.scalar_f16;
+}
diff --git a/naga/tests/in/wgsl/f16-polyfill.toml b/naga/tests/in/wgsl/f16-polyfill.toml
new file mode 100644
index 00000000000..96160063e05
--- /dev/null
+++ b/naga/tests/in/wgsl/f16-polyfill.toml
@@ -0,0 +1,13 @@
+targets = "SPIRV"
+god_mode = true
+
+[spv]
+debug = true
+version = [1, 1]
+use_storage_input_output_16 = false
+capabilities = ["Float16"]
+
+[bounds_check_policies]
+index = "ReadZeroSkipWrite"
+buffer = "ReadZeroSkipWrite"
+image = "ReadZeroSkipWrite"
diff --git a/naga/tests/in/wgsl/f16-polyfill.wgsl b/naga/tests/in/wgsl/f16-polyfill.wgsl
new file mode 100644
index 00000000000..2dea0baaa29
--- /dev/null
+++ b/naga/tests/in/wgsl/f16-polyfill.wgsl
@@ -0,0 +1,71 @@
+enable f16;
+
+@fragment
+fn test_direct(
+    @location(0) scalar_f16: f16,
+    @location(1) scalar_f32: f32,
+    @location(2) vec2_f16: vec2<f16>,
+    @location(3) vec2_f32: vec2<f32>,
+    @location(4) vec3_f16: vec3<f16>,
+    @location(5) vec3_f32: vec3<f32>,
+    @location(6) vec4_f16: vec4<f16>,
+    @location(7) vec4_f32: vec4<f32>,
+) -> F16IO {
+    var output: F16IO;
+    output.scalar_f16 = scalar_f16 + 1.0h;
+    output.scalar_f32 = scalar_f32 + 1.0;
+    output.vec2_f16 = vec2_f16 + vec2(1.0h);
+    output.vec2_f32 = vec2_f32 + vec2(1.0);
+    output.vec3_f16 = vec3_f16 + vec3(1.0h);
+    output.vec3_f32 = vec3_f32 + vec3(1.0);
+    output.vec4_f16 = vec4_f16 + vec4(1.0h);
+    output.vec4_f32 = vec4_f32 + vec4(1.0);
+    return output;
+}
+
+struct F16IO {
+    @location(0) scalar_f16: f16,
+    @location(1) scalar_f32: f32,
+    @location(2) vec2_f16: vec2<f16>,
+    @location(3) vec2_f32: vec2<f32>,
+    @location(4) vec3_f16: vec3<f16>,
+    @location(5) vec3_f32: vec3<f32>,
+    @location(6) vec4_f16: vec4<f16>,
+    @location(7) vec4_f32: vec4<f32>,
+}
+
+@fragment
+fn test_struct(input: F16IO) -> F16IO {
+    var output: F16IO;
+    output.scalar_f16 = input.scalar_f16 + 1.0h;
+    output.scalar_f32 = input.scalar_f32 + 1.0;
+    output.vec2_f16 = input.vec2_f16 + vec2(1.0h);
+    output.vec2_f32 = input.vec2_f32 + vec2(1.0);
+    output.vec3_f16 = input.vec3_f16 + vec3(1.0h);
+    output.vec3_f32 = input.vec3_f32 + vec3(1.0);
+    output.vec4_f16 = input.vec4_f16 + vec4(1.0h);
+    output.vec4_f32 = input.vec4_f32 + vec4(1.0);
+    return output;
+}
+
+@fragment
+fn test_copy_input(input_original: F16IO) -> F16IO {
+    var input = input_original;
+    var output: F16IO;
+    output.scalar_f16 = input.scalar_f16 + 1.0h;
+    output.scalar_f32 = input.scalar_f32 + 1.0;
+    output.vec2_f16 = input.vec2_f16 + vec2(1.0h);
+    output.vec2_f32 = input.vec2_f32 + vec2(1.0);
+    output.vec3_f16 = input.vec3_f16 + vec3(1.0h);
+    output.vec3_f32 = input.vec3_f32 + vec3(1.0);
+    output.vec4_f16 = input.vec4_f16 + vec4(1.0h);
+    output.vec4_f32 = input.vec4_f32 + vec4(1.0);
+    return output;
+}
+
+@fragment
+fn test_return_partial(input_original: F16IO) -> @location(0) f16 {
+    var input = input_original;
+    input.scalar_f16 = 0.0h;
+    return input.scalar_f16;
+}
diff --git a/naga/tests/naga/snapshots.rs b/naga/tests/naga/snapshots.rs
index 32e2f5e0285..b00ef579203 100644
--- a/naga/tests/naga/snapshots.rs
+++ b/naga/tests/naga/snapshots.rs
@@ -91,7 +91,7 @@ struct SpirvInParameters {
     adjust_coordinate_space: bool,
 }
 
-#[derive(Default, serde::Deserialize)]
+#[derive(serde::Deserialize)]
 #[serde(default)]
 struct SpirvOutParameters {
     version: SpvOutVersion,
@@ -101,11 +101,29 @@ struct SpirvOutParameters {
     force_point_size: bool,
     clamp_frag_depth: bool,
     separate_entry_points: bool,
+    use_storage_input_output_16: bool,
     #[cfg(all(feature = "deserialize", spv_out))]
     #[serde(deserialize_with = "deserialize_binding_map")]
     binding_map: naga::back::spv::BindingMap,
 }
 
+impl Default for SpirvOutParameters {
+    fn default() -> Self {
+        Self {
+            version: SpvOutVersion::default(),
+            capabilities: naga::FastHashSet::default(),
+            debug: false,
+            adjust_coordinate_space: false,
+            force_point_size: false,
+            clamp_frag_depth: false,
+            separate_entry_points: false,
+            use_storage_input_output_16: true,
+            #[cfg(all(feature = "deserialize", spv_out))]
+            binding_map: naga::back::spv::BindingMap::default(),
+        }
+    }
+}
+
 #[derive(Default, serde::Deserialize)]
 #[serde(default)]
 struct WgslOutParameters {
@@ -617,6 +635,7 @@ fn write_output_spv(
         binding_map: params.binding_map.clone(),
         zero_initialize_workgroup_memory: spv::ZeroInitializeWorkgroupMemoryMode::Polyfill,
         force_loop_bounding: true,
+        use_storage_input_output_16: params.use_storage_input_output_16,
         debug_info,
     };
 
diff --git a/naga/tests/naga/spirv_capabilities.rs b/naga/tests/naga/spirv_capabilities.rs
index 2d46e37f72d..aa99298273d 100644
--- a/naga/tests/naga/spirv_capabilities.rs
+++ b/naga/tests/naga/spirv_capabilities.rs
@@ -6,6 +6,9 @@ Test SPIR-V backend capability checks.
 
 use spirv::Capability as Ca;
 
+#[cfg(spv_out)]
+use rspirv::binary::Disassemble;
+
 fn capabilities_used(source: &str) -> naga::FastIndexSet<Ca> {
     use naga::back::spv;
     use naga::valid;
@@ -213,3 +216,135 @@ fn int64() {
 fn float16() {
     require(&[Ca::Float16], "enable f16; fn f(x: f16) { }");
 }
+
+#[test]
+fn f16_io_capabilities() {
+    let source = r#"
+        enable f16;
+        
+        struct VertexOutput {
+            @location(0) color: vec3<f16>,
+        }
+        
+        @fragment  
+        fn main(input: VertexOutput) -> @location(0) vec4<f16> {
+            return vec4<f16>(input.color, f16(1.0));
+        }
+    "#;
+
+    use naga::back::spv;
+    use naga::valid;
+
+    let module = naga::front::wgsl::parse_str(source).unwrap();
+    let info = valid::Validator::new(valid::ValidationFlags::all(), valid::Capabilities::all())
+        .validate(&module)
+        .unwrap();
+
+    // Test native path: use_storage_input_output_16 = true
+    let options_native = spv::Options {
+        use_storage_input_output_16: true,
+        ..Default::default()
+    };
+
+    let mut words_native = vec![];
+    let mut writer_native = spv::Writer::new(&options_native).unwrap();
+    writer_native
+        .write(&module, &info, None, &None, &mut words_native)
+        .unwrap();
+    let caps_native = writer_native.get_capabilities_used();
+
+    // Should include StorageInputOutput16 for native f16 I/O
+    assert!(caps_native.contains(&Ca::StorageInputOutput16));
+
+    // Test polyfill path: use_storage_input_output_16 = false
+    let options_polyfill = spv::Options {
+        use_storage_input_output_16: false,
+        ..Default::default()
+    };
+
+    let mut words_polyfill = vec![];
+    let mut writer_polyfill = spv::Writer::new(&options_polyfill).unwrap();
+    writer_polyfill
+        .write(&module, &info, None, &None, &mut words_polyfill)
+        .unwrap();
+    let caps_polyfill = writer_polyfill.get_capabilities_used();
+
+    // Should not include StorageInputOutput16 when polyfilled
+    assert!(!caps_polyfill.contains(&Ca::StorageInputOutput16));
+
+    // But should still include the basic f16 capabilities
+    assert!(caps_polyfill.contains(&Ca::Float16));
+}
+
+#[cfg(spv_out)]
+#[test]
+fn f16_io_polyfill_codegen() {
+    let source = r#"
+        enable f16;
+
+        struct F16IO {
+            @location(0) scalar_f16: f16,
+            @location(1) scalar_f32: f32,
+            @location(2) vec2_f16: vec2<f16>,
+            @location(3) vec2_f32: vec2<f32>,
+        }
+
+        @fragment
+        fn main(input: F16IO) -> F16IO {
+            var output = input;
+            output.scalar_f16 = input.scalar_f16 + 1.0h;
+            output.vec2_f16.x = input.vec2_f16.y;
+            return output;
+        }
+    "#;
+
+    use naga::{back::spv, valid};
+
+    let module = naga::front::wgsl::parse_str(source).unwrap();
+    let info = valid::Validator::new(valid::ValidationFlags::all(), valid::Capabilities::all())
+        .validate(&module)
+        .unwrap();
+
+    // Test Native Path
+    let options_native = spv::Options {
+        use_storage_input_output_16: true,
+        ..Default::default()
+    };
+    let mut words_native = vec![];
+    let mut writer_native = spv::Writer::new(&options_native).unwrap();
+    writer_native
+        .write(&module, &info, None, &None, &mut words_native)
+        .unwrap();
+    let caps_native = writer_native.get_capabilities_used();
+    let dis_native = rspirv::dr::load_words(words_native).unwrap().disassemble();
+
+    // Native path must request the capability and must NOT have conversions.
+    assert!(caps_native.contains(&Ca::StorageInputOutput16));
+    assert!(!dis_native.contains("OpFConvert"));
+
+    // Test Polyfill Path
+    let options_polyfill = spv::Options {
+        use_storage_input_output_16: false,
+        ..Default::default()
+    };
+    let mut words_polyfill = vec![];
+    let mut writer_polyfill = spv::Writer::new(&options_polyfill).unwrap();
+    writer_polyfill
+        .write(&module, &info, None, &None, &mut words_polyfill)
+        .unwrap();
+    let caps_polyfill = writer_polyfill.get_capabilities_used();
+    let dis_polyfill = rspirv::dr::load_words(words_polyfill)
+        .unwrap()
+        .disassemble();
+
+    // Polyfill path should request the capability but not have conversions.
+    assert!(!caps_polyfill.contains(&Ca::StorageInputOutput16));
+    assert!(dis_polyfill.contains("OpFConvert"));
+
+    // Should have 2 input conversions, and 2 output conversions
+    let fconvert_count = dis_polyfill.matches("OpFConvert").count();
+    assert_eq!(
+        fconvert_count, 4,
+        "Expected 4 OpFConvert instructions for polyfilled I/O"
+    );
+}
diff --git a/naga/tests/out/spv/wgsl-f16-native.spvasm b/naga/tests/out/spv/wgsl-f16-native.spvasm
new file mode 100644
index 00000000000..78f1b0d9b58
--- /dev/null
+++ b/naga/tests/out/spv/wgsl-f16-native.spvasm
@@ -0,0 +1,655 @@
+; SPIR-V
+; Version: 1.1
+; Generator: rspirv
+; Bound: 273
+OpCapability Shader
+OpCapability Float16
+OpCapability StorageBuffer16BitAccess
+OpCapability UniformAndStorageBuffer16BitAccess
+OpCapability StorageInputOutput16
+OpExtension "SPV_KHR_16bit_storage"
+%1 = OpExtInstImport "GLSL.std.450"
+OpMemoryModel Logical GLSL450
+OpEntryPoint Fragment %54 "test_direct" %14 %17 %20 %23 %26 %29 %32 %35 %38 %40 %42 %44 %46 %48 %50 %52
+OpEntryPoint Fragment %136 "test_struct" %112 %114 %116 %118 %120 %122 %124 %126 %128 %129 %130 %131 %132 %133 %134 %135
+OpEntryPoint Fragment %199 "test_copy_input" %175 %177 %179 %181 %183 %185 %187 %189 %191 %192 %193 %194 %195 %196 %197 %198
+OpEntryPoint Fragment %265 "test_return_partial" %248 %250 %252 %254 %256 %258 %260 %262 %264
+OpExecutionMode %54 OriginUpperLeft
+OpExecutionMode %136 OriginUpperLeft
+OpExecutionMode %199 OriginUpperLeft
+OpExecutionMode %265 OriginUpperLeft
+%3 = OpString "f16-native.wgsl"
+OpSource Unknown 0 %3 "enable f16;
+
+@fragment
+fn test_direct(
+    @location(0) scalar_f16: f16,
+    @location(1) scalar_f32: f32,
+    @location(2) vec2_f16: vec2<f16>,
+    @location(3) vec2_f32: vec2<f32>,
+    @location(4) vec3_f16: vec3<f16>,
+    @location(5) vec3_f32: vec3<f32>,
+    @location(6) vec4_f16: vec4<f16>,
+    @location(7) vec4_f32: vec4<f32>,
+) -> F16IO {
+    var output: F16IO;
+    output.scalar_f16 = scalar_f16 + 1.0h;
+    output.scalar_f32 = scalar_f32 + 1.0;
+    output.vec2_f16 = vec2_f16 + vec2(1.0h);
+    output.vec2_f32 = vec2_f32 + vec2(1.0);
+    output.vec3_f16 = vec3_f16 + vec3(1.0h);
+    output.vec3_f32 = vec3_f32 + vec3(1.0);
+    output.vec4_f16 = vec4_f16 + vec4(1.0h);
+    output.vec4_f32 = vec4_f32 + vec4(1.0);
+    return output;
+}
+
+struct F16IO {
+    @location(0) scalar_f16: f16,
+    @location(1) scalar_f32: f32,
+    @location(2) vec2_f16: vec2<f16>,
+    @location(3) vec2_f32: vec2<f32>,
+    @location(4) vec3_f16: vec3<f16>,
+    @location(5) vec3_f32: vec3<f32>,
+    @location(6) vec4_f16: vec4<f16>,
+    @location(7) vec4_f32: vec4<f32>,
+}
+
+@fragment
+fn test_struct(input: F16IO) -> F16IO {
+    var output: F16IO;
+    output.scalar_f16 = input.scalar_f16 + 1.0h;
+    output.scalar_f32 = input.scalar_f32 + 1.0;
+    output.vec2_f16 = input.vec2_f16 + vec2(1.0h);
+    output.vec2_f32 = input.vec2_f32 + vec2(1.0);
+    output.vec3_f16 = input.vec3_f16 + vec3(1.0h);
+    output.vec3_f32 = input.vec3_f32 + vec3(1.0);
+    output.vec4_f16 = input.vec4_f16 + vec4(1.0h);
+    output.vec4_f32 = input.vec4_f32 + vec4(1.0);
+    return output;
+}
+
+@fragment
+fn test_copy_input(input_original: F16IO) -> F16IO {
+    var input = input_original;
+    var output: F16IO;
+    output.scalar_f16 = input.scalar_f16 + 1.0h;
+    output.scalar_f32 = input.scalar_f32 + 1.0;
+    output.vec2_f16 = input.vec2_f16 + vec2(1.0h);
+    output.vec2_f32 = input.vec2_f32 + vec2(1.0);
+    output.vec3_f16 = input.vec3_f16 + vec3(1.0h);
+    output.vec3_f32 = input.vec3_f32 + vec3(1.0);
+    output.vec4_f16 = input.vec4_f16 + vec4(1.0h);
+    output.vec4_f32 = input.vec4_f32 + vec4(1.0);
+    return output;
+}
+
+@fragment
+fn test_return_partial(input_original: F16IO) -> @location(0) f16 {
+    var input = input_original;
+    input.scalar_f16 = 0.0h;
+    return input.scalar_f16;
+}
+"
+OpMemberName %12 0 "scalar_f16"
+OpMemberName %12 1 "scalar_f32"
+OpMemberName %12 2 "vec2_f16"
+OpMemberName %12 3 "vec2_f32"
+OpMemberName %12 4 "vec3_f16"
+OpMemberName %12 5 "vec3_f32"
+OpMemberName %12 6 "vec4_f16"
+OpMemberName %12 7 "vec4_f32"
+OpName %12 "F16IO"
+OpName %14 "scalar_f16"
+OpName %17 "scalar_f32"
+OpName %20 "vec2_f16"
+OpName %23 "vec2_f32"
+OpName %26 "vec3_f16"
+OpName %29 "vec3_f32"
+OpName %32 "vec4_f16"
+OpName %35 "vec4_f32"
+OpName %38 "scalar_f16"
+OpName %40 "scalar_f32"
+OpName %42 "vec2_f16"
+OpName %44 "vec2_f32"
+OpName %46 "vec3_f16"
+OpName %48 "vec3_f32"
+OpName %50 "vec4_f16"
+OpName %52 "vec4_f32"
+OpName %54 "test_direct"
+OpName %64 "output"
+OpName %112 "scalar_f16"
+OpName %114 "scalar_f32"
+OpName %116 "vec2_f16"
+OpName %118 "vec2_f32"
+OpName %120 "vec3_f16"
+OpName %122 "vec3_f32"
+OpName %124 "vec4_f16"
+OpName %126 "vec4_f32"
+OpName %128 "scalar_f16"
+OpName %129 "scalar_f32"
+OpName %130 "vec2_f16"
+OpName %131 "vec2_f32"
+OpName %132 "vec3_f16"
+OpName %133 "vec3_f32"
+OpName %134 "vec4_f16"
+OpName %135 "vec4_f32"
+OpName %136 "test_struct"
+OpName %137 "output"
+OpName %175 "scalar_f16"
+OpName %177 "scalar_f32"
+OpName %179 "vec2_f16"
+OpName %181 "vec2_f32"
+OpName %183 "vec3_f16"
+OpName %185 "vec3_f32"
+OpName %187 "vec4_f16"
+OpName %189 "vec4_f32"
+OpName %191 "scalar_f16"
+OpName %192 "scalar_f32"
+OpName %193 "vec2_f16"
+OpName %194 "vec2_f32"
+OpName %195 "vec3_f16"
+OpName %196 "vec3_f32"
+OpName %197 "vec4_f16"
+OpName %198 "vec4_f32"
+OpName %199 "test_copy_input"
+OpName %200 "input"
+OpName %202 "output"
+OpName %248 "scalar_f16"
+OpName %250 "scalar_f32"
+OpName %252 "vec2_f16"
+OpName %254 "vec2_f32"
+OpName %256 "vec3_f16"
+OpName %258 "vec3_f32"
+OpName %260 "vec4_f16"
+OpName %262 "vec4_f32"
+OpName %265 "test_return_partial"
+OpName %267 "input"
+OpMemberDecorate %12 0 Offset 0
+OpMemberDecorate %12 1 Offset 4
+OpMemberDecorate %12 2 Offset 8
+OpMemberDecorate %12 3 Offset 16
+OpMemberDecorate %12 4 Offset 24
+OpMemberDecorate %12 5 Offset 32
+OpMemberDecorate %12 6 Offset 48
+OpMemberDecorate %12 7 Offset 64
+OpDecorate %14 Location 0
+OpDecorate %17 Location 1
+OpDecorate %20 Location 2
+OpDecorate %23 Location 3
+OpDecorate %26 Location 4
+OpDecorate %29 Location 5
+OpDecorate %32 Location 6
+OpDecorate %35 Location 7
+OpDecorate %38 Location 0
+OpDecorate %40 Location 1
+OpDecorate %42 Location 2
+OpDecorate %44 Location 3
+OpDecorate %46 Location 4
+OpDecorate %48 Location 5
+OpDecorate %50 Location 6
+OpDecorate %52 Location 7
+OpDecorate %112 Location 0
+OpDecorate %114 Location 1
+OpDecorate %116 Location 2
+OpDecorate %118 Location 3
+OpDecorate %120 Location 4
+OpDecorate %122 Location 5
+OpDecorate %124 Location 6
+OpDecorate %126 Location 7
+OpDecorate %128 Location 0
+OpDecorate %129 Location 1
+OpDecorate %130 Location 2
+OpDecorate %131 Location 3
+OpDecorate %132 Location 4
+OpDecorate %133 Location 5
+OpDecorate %134 Location 6
+OpDecorate %135 Location 7
+OpDecorate %175 Location 0
+OpDecorate %177 Location 1
+OpDecorate %179 Location 2
+OpDecorate %181 Location 3
+OpDecorate %183 Location 4
+OpDecorate %185 Location 5
+OpDecorate %187 Location 6
+OpDecorate %189 Location 7
+OpDecorate %191 Location 0
+OpDecorate %192 Location 1
+OpDecorate %193 Location 2
+OpDecorate %194 Location 3
+OpDecorate %195 Location 4
+OpDecorate %196 Location 5
+OpDecorate %197 Location 6
+OpDecorate %198 Location 7
+OpDecorate %248 Location 0
+OpDecorate %250 Location 1
+OpDecorate %252 Location 2
+OpDecorate %254 Location 3
+OpDecorate %256 Location 4
+OpDecorate %258 Location 5
+OpDecorate %260 Location 6
+OpDecorate %262 Location 7
+OpDecorate %264 Location 0
+%2 = OpTypeVoid
+%4 = OpTypeFloat 16
+%5 = OpTypeFloat 32
+%6 = OpTypeVector %4 2
+%7 = OpTypeVector %5 2
+%8 = OpTypeVector %4 3
+%9 = OpTypeVector %5 3
+%10 = OpTypeVector %4 4
+%11 = OpTypeVector %5 4
+%12 = OpTypeStruct %4 %5 %6 %7 %8 %9 %10 %11
+%15 = OpTypePointer Input %4
+%14 = OpVariable  %15  Input
+%18 = OpTypePointer Input %5
+%17 = OpVariable  %18  Input
+%21 = OpTypePointer Input %6
+%20 = OpVariable  %21  Input
+%24 = OpTypePointer Input %7
+%23 = OpVariable  %24  Input
+%27 = OpTypePointer Input %8
+%26 = OpVariable  %27  Input
+%30 = OpTypePointer Input %9
+%29 = OpVariable  %30  Input
+%33 = OpTypePointer Input %10
+%32 = OpVariable  %33  Input
+%36 = OpTypePointer Input %11
+%35 = OpVariable  %36  Input
+%39 = OpTypePointer Output %4
+%38 = OpVariable  %39  Output
+%41 = OpTypePointer Output %5
+%40 = OpVariable  %41  Output
+%43 = OpTypePointer Output %6
+%42 = OpVariable  %43  Output
+%45 = OpTypePointer Output %7
+%44 = OpVariable  %45  Output
+%47 = OpTypePointer Output %8
+%46 = OpVariable  %47  Output
+%49 = OpTypePointer Output %9
+%48 = OpVariable  %49  Output
+%51 = OpTypePointer Output %10
+%50 = OpVariable  %51  Output
+%53 = OpTypePointer Output %11
+%52 = OpVariable  %53  Output
+%55 = OpTypeFunction %2
+%56 = OpConstant  %4  0.000000000000000000000000000000000000000021524
+%57 = OpConstant  %5  1
+%58 = OpConstantComposite  %6  %56 %56
+%59 = OpConstantComposite  %7  %57 %57
+%60 = OpConstantComposite  %8  %56 %56 %56
+%61 = OpConstantComposite  %9  %57 %57 %57
+%62 = OpConstantComposite  %10  %56 %56 %56 %56
+%63 = OpConstantComposite  %11  %57 %57 %57 %57
+%65 = OpTypePointer Function %12
+%66 = OpConstantNull  %12
+%68 = OpTypePointer Function %4
+%71 = OpTypeInt 32 0
+%70 = OpConstant  %71  0
+%73 = OpTypePointer Function %5
+%75 = OpConstant  %71  1
+%77 = OpTypePointer Function %6
+%79 = OpConstant  %71  2
+%81 = OpTypePointer Function %7
+%83 = OpConstant  %71  3
+%85 = OpTypePointer Function %8
+%87 = OpConstant  %71  4
+%89 = OpTypePointer Function %9
+%91 = OpConstant  %71  5
+%93 = OpTypePointer Function %10
+%95 = OpConstant  %71  6
+%97 = OpTypePointer Function %11
+%99 = OpConstant  %71  7
+%112 = OpVariable  %15  Input
+%114 = OpVariable  %18  Input
+%116 = OpVariable  %21  Input
+%118 = OpVariable  %24  Input
+%120 = OpVariable  %27  Input
+%122 = OpVariable  %30  Input
+%124 = OpVariable  %33  Input
+%126 = OpVariable  %36  Input
+%128 = OpVariable  %39  Output
+%129 = OpVariable  %41  Output
+%130 = OpVariable  %43  Output
+%131 = OpVariable  %45  Output
+%132 = OpVariable  %47  Output
+%133 = OpVariable  %49  Output
+%134 = OpVariable  %51  Output
+%135 = OpVariable  %53  Output
+%138 = OpConstantNull  %12
+%175 = OpVariable  %15  Input
+%177 = OpVariable  %18  Input
+%179 = OpVariable  %21  Input
+%181 = OpVariable  %24  Input
+%183 = OpVariable  %27  Input
+%185 = OpVariable  %30  Input
+%187 = OpVariable  %33  Input
+%189 = OpVariable  %36  Input
+%191 = OpVariable  %39  Output
+%192 = OpVariable  %41  Output
+%193 = OpVariable  %43  Output
+%194 = OpVariable  %45  Output
+%195 = OpVariable  %47  Output
+%196 = OpVariable  %49  Output
+%197 = OpVariable  %51  Output
+%198 = OpVariable  %53  Output
+%201 = OpConstantNull  %12
+%203 = OpConstantNull  %12
+%248 = OpVariable  %15  Input
+%250 = OpVariable  %18  Input
+%252 = OpVariable  %21  Input
+%254 = OpVariable  %24  Input
+%256 = OpVariable  %27  Input
+%258 = OpVariable  %30  Input
+%260 = OpVariable  %33  Input
+%262 = OpVariable  %36  Input
+%264 = OpVariable  %39  Output
+%266 = OpConstant  %4  0
+%268 = OpConstantNull  %12
+%54 = OpFunction  %2  None %55
+%13 = OpLabel
+%64 = OpVariable  %65  Function %66
+%16 = OpLoad  %4  %14
+%19 = OpLoad  %5  %17
+%22 = OpLoad  %6  %20
+%25 = OpLoad  %7  %23
+%28 = OpLoad  %8  %26
+%31 = OpLoad  %9  %29
+%34 = OpLoad  %10  %32
+%37 = OpLoad  %11  %35
+OpBranch %67
+%67 = OpLabel
+OpLine %3 15 5
+OpLine %3 15 25
+%69 = OpFAdd  %4  %16 %56
+OpLine %3 15 5
+%72 = OpAccessChain  %68  %64 %70
+OpStore %72 %69
+OpLine %3 16 5
+OpLine %3 16 25
+%74 = OpFAdd  %5  %19 %57
+OpLine %3 16 5
+%76 = OpAccessChain  %73  %64 %75
+OpStore %76 %74
+OpLine %3 17 5
+OpLine %3 17 23
+%78 = OpFAdd  %6  %22 %58
+OpLine %3 17 5
+%80 = OpAccessChain  %77  %64 %79
+OpStore %80 %78
+OpLine %3 18 5
+OpLine %3 18 34
+OpLine %3 18 23
+%82 = OpFAdd  %7  %25 %59
+OpLine %3 18 5
+%84 = OpAccessChain  %81  %64 %83
+OpStore %84 %82
+OpLine %3 19 5
+OpLine %3 19 23
+%86 = OpFAdd  %8  %28 %60
+OpLine %3 19 5
+%88 = OpAccessChain  %85  %64 %87
+OpStore %88 %86
+OpLine %3 20 5
+OpLine %3 20 34
+OpLine %3 20 23
+%90 = OpFAdd  %9  %31 %61
+OpLine %3 20 5
+%92 = OpAccessChain  %89  %64 %91
+OpStore %92 %90
+OpLine %3 21 5
+OpLine %3 21 23
+%94 = OpFAdd  %10  %34 %62
+OpLine %3 21 5
+%96 = OpAccessChain  %93  %64 %95
+OpStore %96 %94
+OpLine %3 22 5
+OpLine %3 22 34
+OpLine %3 22 23
+%98 = OpFAdd  %11  %37 %63
+OpLine %3 22 5
+%100 = OpAccessChain  %97  %64 %99
+OpStore %100 %98
+OpLine %3 1 1
+%101 = OpLoad  %12  %64
+%102 = OpCompositeExtract  %4  %101 0
+OpStore %38 %102
+%103 = OpCompositeExtract  %5  %101 1
+OpStore %40 %103
+%104 = OpCompositeExtract  %6  %101 2
+OpStore %42 %104
+%105 = OpCompositeExtract  %7  %101 3
+OpStore %44 %105
+%106 = OpCompositeExtract  %8  %101 4
+OpStore %46 %106
+%107 = OpCompositeExtract  %9  %101 5
+OpStore %48 %107
+%108 = OpCompositeExtract  %10  %101 6
+OpStore %50 %108
+%109 = OpCompositeExtract  %11  %101 7
+OpStore %52 %109
+OpReturn
+OpFunctionEnd
+%136 = OpFunction  %2  None %55
+%110 = OpLabel
+%137 = OpVariable  %65  Function %138
+%113 = OpLoad  %4  %112
+%115 = OpLoad  %5  %114
+%117 = OpLoad  %6  %116
+%119 = OpLoad  %7  %118
+%121 = OpLoad  %8  %120
+%123 = OpLoad  %9  %122
+%125 = OpLoad  %10  %124
+%127 = OpLoad  %11  %126
+%111 = OpCompositeConstruct  %12  %113 %115 %117 %119 %121 %123 %125 %127
+OpBranch %139
+%139 = OpLabel
+OpLine %3 40 5
+%140 = OpCompositeExtract  %4  %111 0
+OpLine %3 40 25
+%141 = OpFAdd  %4  %140 %56
+OpLine %3 40 5
+%142 = OpAccessChain  %68  %137 %70
+OpStore %142 %141
+OpLine %3 41 5
+%143 = OpCompositeExtract  %5  %111 1
+OpLine %3 41 25
+%144 = OpFAdd  %5  %143 %57
+OpLine %3 41 5
+%145 = OpAccessChain  %73  %137 %75
+OpStore %145 %144
+OpLine %3 42 5
+%146 = OpCompositeExtract  %6  %111 2
+OpLine %3 42 23
+%147 = OpFAdd  %6  %146 %58
+OpLine %3 42 5
+%148 = OpAccessChain  %77  %137 %79
+OpStore %148 %147
+OpLine %3 43 5
+%149 = OpCompositeExtract  %7  %111 3
+OpLine %3 43 40
+OpLine %3 43 23
+%150 = OpFAdd  %7  %149 %59
+OpLine %3 43 5
+%151 = OpAccessChain  %81  %137 %83
+OpStore %151 %150
+OpLine %3 44 5
+%152 = OpCompositeExtract  %8  %111 4
+OpLine %3 44 23
+%153 = OpFAdd  %8  %152 %60
+OpLine %3 44 5
+%154 = OpAccessChain  %85  %137 %87
+OpStore %154 %153
+OpLine %3 45 5
+%155 = OpCompositeExtract  %9  %111 5
+OpLine %3 45 40
+OpLine %3 45 23
+%156 = OpFAdd  %9  %155 %61
+OpLine %3 45 5
+%157 = OpAccessChain  %89  %137 %91
+OpStore %157 %156
+OpLine %3 46 5
+%158 = OpCompositeExtract  %10  %111 6
+OpLine %3 46 23
+%159 = OpFAdd  %10  %158 %62
+OpLine %3 46 5
+%160 = OpAccessChain  %93  %137 %95
+OpStore %160 %159
+OpLine %3 47 5
+%161 = OpCompositeExtract  %11  %111 7
+OpLine %3 47 40
+OpLine %3 47 23
+%162 = OpFAdd  %11  %161 %63
+OpLine %3 47 5
+%163 = OpAccessChain  %97  %137 %99
+OpStore %163 %162
+OpLine %3 1 1
+%164 = OpLoad  %12  %137
+%165 = OpCompositeExtract  %4  %164 0
+OpStore %128 %165
+%166 = OpCompositeExtract  %5  %164 1
+OpStore %129 %166
+%167 = OpCompositeExtract  %6  %164 2
+OpStore %130 %167
+%168 = OpCompositeExtract  %7  %164 3
+OpStore %131 %168
+%169 = OpCompositeExtract  %8  %164 4
+OpStore %132 %169
+%170 = OpCompositeExtract  %9  %164 5
+OpStore %133 %170
+%171 = OpCompositeExtract  %10  %164 6
+OpStore %134 %171
+%172 = OpCompositeExtract  %11  %164 7
+OpStore %135 %172
+OpReturn
+OpFunctionEnd
+%199 = OpFunction  %2  None %55
+%173 = OpLabel
+%200 = OpVariable  %65  Function %201
+%202 = OpVariable  %65  Function %203
+%176 = OpLoad  %4  %175
+%178 = OpLoad  %5  %177
+%180 = OpLoad  %6  %179
+%182 = OpLoad  %7  %181
+%184 = OpLoad  %8  %183
+%186 = OpLoad  %9  %185
+%188 = OpLoad  %10  %187
+%190 = OpLoad  %11  %189
+%174 = OpCompositeConstruct  %12  %176 %178 %180 %182 %184 %186 %188 %190
+OpBranch %204
+%204 = OpLabel
+OpLine %3 53 5
+OpStore %200 %174
+OpLine %3 55 5
+%205 = OpAccessChain  %68  %200 %70
+%206 = OpLoad  %4  %205
+OpLine %3 55 25
+%207 = OpFAdd  %4  %206 %56
+OpLine %3 55 5
+%208 = OpAccessChain  %68  %202 %70
+OpStore %208 %207
+OpLine %3 56 5
+%209 = OpAccessChain  %73  %200 %75
+%210 = OpLoad  %5  %209
+OpLine %3 56 25
+%211 = OpFAdd  %5  %210 %57
+OpLine %3 56 5
+%212 = OpAccessChain  %73  %202 %75
+OpStore %212 %211
+OpLine %3 57 5
+%213 = OpAccessChain  %77  %200 %79
+%214 = OpLoad  %6  %213
+OpLine %3 57 23
+%215 = OpFAdd  %6  %214 %58
+OpLine %3 57 5
+%216 = OpAccessChain  %77  %202 %79
+OpStore %216 %215
+OpLine %3 58 5
+%217 = OpAccessChain  %81  %200 %83
+%218 = OpLoad  %7  %217
+OpLine %3 58 40
+OpLine %3 58 23
+%219 = OpFAdd  %7  %218 %59
+OpLine %3 58 5
+%220 = OpAccessChain  %81  %202 %83
+OpStore %220 %219
+OpLine %3 59 5
+%221 = OpAccessChain  %85  %200 %87
+%222 = OpLoad  %8  %221
+OpLine %3 59 23
+%223 = OpFAdd  %8  %222 %60
+OpLine %3 59 5
+%224 = OpAccessChain  %85  %202 %87
+OpStore %224 %223
+OpLine %3 60 5
+%225 = OpAccessChain  %89  %200 %91
+%226 = OpLoad  %9  %225
+OpLine %3 60 40
+OpLine %3 60 23
+%227 = OpFAdd  %9  %226 %61
+OpLine %3 60 5
+%228 = OpAccessChain  %89  %202 %91
+OpStore %228 %227
+OpLine %3 61 5
+%229 = OpAccessChain  %93  %200 %95
+%230 = OpLoad  %10  %229
+OpLine %3 61 23
+%231 = OpFAdd  %10  %230 %62
+OpLine %3 61 5
+%232 = OpAccessChain  %93  %202 %95
+OpStore %232 %231
+OpLine %3 62 5
+%233 = OpAccessChain  %97  %200 %99
+%234 = OpLoad  %11  %233
+OpLine %3 62 40
+OpLine %3 62 23
+%235 = OpFAdd  %11  %234 %63
+OpLine %3 62 5
+%236 = OpAccessChain  %97  %202 %99
+OpStore %236 %235
+OpLine %3 1 1
+%237 = OpLoad  %12  %202
+%238 = OpCompositeExtract  %4  %237 0
+OpStore %191 %238
+%239 = OpCompositeExtract  %5  %237 1
+OpStore %192 %239
+%240 = OpCompositeExtract  %6  %237 2
+OpStore %193 %240
+%241 = OpCompositeExtract  %7  %237 3
+OpStore %194 %241
+%242 = OpCompositeExtract  %8  %237 4
+OpStore %195 %242
+%243 = OpCompositeExtract  %9  %237 5
+OpStore %196 %243
+%244 = OpCompositeExtract  %10  %237 6
+OpStore %197 %244
+%245 = OpCompositeExtract  %11  %237 7
+OpStore %198 %245
+OpReturn
+OpFunctionEnd
+%265 = OpFunction  %2  None %55
+%246 = OpLabel
+%267 = OpVariable  %65  Function %268
+%249 = OpLoad  %4  %248
+%251 = OpLoad  %5  %250
+%253 = OpLoad  %6  %252
+%255 = OpLoad  %7  %254
+%257 = OpLoad  %8  %256
+%259 = OpLoad  %9  %258
+%261 = OpLoad  %10  %260
+%263 = OpLoad  %11  %262
+%247 = OpCompositeConstruct  %12  %249 %251 %253 %255 %257 %259 %261 %263
+OpBranch %269
+%269 = OpLabel
+OpLine %3 68 5
+OpStore %267 %247
+OpLine %3 69 5
+OpLine %3 69 5
+%270 = OpAccessChain  %68  %267 %70
+OpStore %270 %266
+OpLine %3 70 12
+%271 = OpAccessChain  %68  %267 %70
+%272 = OpLoad  %4  %271
+OpStore %264 %272
+OpReturn
+OpFunctionEnd
\ No newline at end of file
diff --git a/naga/tests/out/spv/wgsl-f16-polyfill.spvasm b/naga/tests/out/spv/wgsl-f16-polyfill.spvasm
new file mode 100644
index 00000000000..8b6b111750f
--- /dev/null
+++ b/naga/tests/out/spv/wgsl-f16-polyfill.spvasm
@@ -0,0 +1,675 @@
+; SPIR-V
+; Version: 1.1
+; Generator: rspirv
+; Bound: 294
+OpCapability Shader
+OpCapability Float16
+OpCapability StorageBuffer16BitAccess
+OpCapability UniformAndStorageBuffer16BitAccess
+OpExtension "SPV_KHR_16bit_storage"
+%1 = OpExtInstImport "GLSL.std.450"
+OpMemoryModel Logical GLSL450
+OpEntryPoint Fragment %50 "test_direct" %14 %18 %20 %24 %26 %30 %32 %36 %38 %40 %41 %43 %44 %46 %47 %49
+OpEntryPoint Fragment %140 "test_struct" %112 %115 %117 %120 %122 %125 %127 %130 %132 %133 %134 %135 %136 %137 %138 %139
+OpEntryPoint Fragment %211 "test_copy_input" %183 %186 %188 %191 %193 %196 %198 %201 %203 %204 %205 %206 %207 %208 %209 %210
+OpEntryPoint Fragment %285 "test_return_partial" %264 %267 %269 %272 %274 %277 %279 %282 %284
+OpExecutionMode %50 OriginUpperLeft
+OpExecutionMode %140 OriginUpperLeft
+OpExecutionMode %211 OriginUpperLeft
+OpExecutionMode %285 OriginUpperLeft
+%3 = OpString "f16-polyfill.wgsl"
+OpSource Unknown 0 %3 "enable f16;
+
+@fragment
+fn test_direct(
+    @location(0) scalar_f16: f16,
+    @location(1) scalar_f32: f32,
+    @location(2) vec2_f16: vec2<f16>,
+    @location(3) vec2_f32: vec2<f32>,
+    @location(4) vec3_f16: vec3<f16>,
+    @location(5) vec3_f32: vec3<f32>,
+    @location(6) vec4_f16: vec4<f16>,
+    @location(7) vec4_f32: vec4<f32>,
+) -> F16IO {
+    var output: F16IO;
+    output.scalar_f16 = scalar_f16 + 1.0h;
+    output.scalar_f32 = scalar_f32 + 1.0;
+    output.vec2_f16 = vec2_f16 + vec2(1.0h);
+    output.vec2_f32 = vec2_f32 + vec2(1.0);
+    output.vec3_f16 = vec3_f16 + vec3(1.0h);
+    output.vec3_f32 = vec3_f32 + vec3(1.0);
+    output.vec4_f16 = vec4_f16 + vec4(1.0h);
+    output.vec4_f32 = vec4_f32 + vec4(1.0);
+    return output;
+}
+
+struct F16IO {
+    @location(0) scalar_f16: f16,
+    @location(1) scalar_f32: f32,
+    @location(2) vec2_f16: vec2<f16>,
+    @location(3) vec2_f32: vec2<f32>,
+    @location(4) vec3_f16: vec3<f16>,
+    @location(5) vec3_f32: vec3<f32>,
+    @location(6) vec4_f16: vec4<f16>,
+    @location(7) vec4_f32: vec4<f32>,
+}
+
+@fragment
+fn test_struct(input: F16IO) -> F16IO {
+    var output: F16IO;
+    output.scalar_f16 = input.scalar_f16 + 1.0h;
+    output.scalar_f32 = input.scalar_f32 + 1.0;
+    output.vec2_f16 = input.vec2_f16 + vec2(1.0h);
+    output.vec2_f32 = input.vec2_f32 + vec2(1.0);
+    output.vec3_f16 = input.vec3_f16 + vec3(1.0h);
+    output.vec3_f32 = input.vec3_f32 + vec3(1.0);
+    output.vec4_f16 = input.vec4_f16 + vec4(1.0h);
+    output.vec4_f32 = input.vec4_f32 + vec4(1.0);
+    return output;
+}
+
+@fragment
+fn test_copy_input(input_original: F16IO) -> F16IO {
+    var input = input_original;
+    var output: F16IO;
+    output.scalar_f16 = input.scalar_f16 + 1.0h;
+    output.scalar_f32 = input.scalar_f32 + 1.0;
+    output.vec2_f16 = input.vec2_f16 + vec2(1.0h);
+    output.vec2_f32 = input.vec2_f32 + vec2(1.0);
+    output.vec3_f16 = input.vec3_f16 + vec3(1.0h);
+    output.vec3_f32 = input.vec3_f32 + vec3(1.0);
+    output.vec4_f16 = input.vec4_f16 + vec4(1.0h);
+    output.vec4_f32 = input.vec4_f32 + vec4(1.0);
+    return output;
+}
+
+@fragment
+fn test_return_partial(input_original: F16IO) -> @location(0) f16 {
+    var input = input_original;
+    input.scalar_f16 = 0.0h;
+    return input.scalar_f16;
+}
+"
+OpMemberName %12 0 "scalar_f16"
+OpMemberName %12 1 "scalar_f32"
+OpMemberName %12 2 "vec2_f16"
+OpMemberName %12 3 "vec2_f32"
+OpMemberName %12 4 "vec3_f16"
+OpMemberName %12 5 "vec3_f32"
+OpMemberName %12 6 "vec4_f16"
+OpMemberName %12 7 "vec4_f32"
+OpName %12 "F16IO"
+OpName %14 "scalar_f16"
+OpName %18 "scalar_f32"
+OpName %20 "vec2_f16"
+OpName %24 "vec2_f32"
+OpName %26 "vec3_f16"
+OpName %30 "vec3_f32"
+OpName %32 "vec4_f16"
+OpName %36 "vec4_f32"
+OpName %38 "scalar_f16"
+OpName %40 "scalar_f32"
+OpName %41 "vec2_f16"
+OpName %43 "vec2_f32"
+OpName %44 "vec3_f16"
+OpName %46 "vec3_f32"
+OpName %47 "vec4_f16"
+OpName %49 "vec4_f32"
+OpName %50 "test_direct"
+OpName %60 "output"
+OpName %112 "scalar_f16"
+OpName %115 "scalar_f32"
+OpName %117 "vec2_f16"
+OpName %120 "vec2_f32"
+OpName %122 "vec3_f16"
+OpName %125 "vec3_f32"
+OpName %127 "vec4_f16"
+OpName %130 "vec4_f32"
+OpName %132 "scalar_f16"
+OpName %133 "scalar_f32"
+OpName %134 "vec2_f16"
+OpName %135 "vec2_f32"
+OpName %136 "vec3_f16"
+OpName %137 "vec3_f32"
+OpName %138 "vec4_f16"
+OpName %139 "vec4_f32"
+OpName %140 "test_struct"
+OpName %141 "output"
+OpName %183 "scalar_f16"
+OpName %186 "scalar_f32"
+OpName %188 "vec2_f16"
+OpName %191 "vec2_f32"
+OpName %193 "vec3_f16"
+OpName %196 "vec3_f32"
+OpName %198 "vec4_f16"
+OpName %201 "vec4_f32"
+OpName %203 "scalar_f16"
+OpName %204 "scalar_f32"
+OpName %205 "vec2_f16"
+OpName %206 "vec2_f32"
+OpName %207 "vec3_f16"
+OpName %208 "vec3_f32"
+OpName %209 "vec4_f16"
+OpName %210 "vec4_f32"
+OpName %211 "test_copy_input"
+OpName %212 "input"
+OpName %214 "output"
+OpName %264 "scalar_f16"
+OpName %267 "scalar_f32"
+OpName %269 "vec2_f16"
+OpName %272 "vec2_f32"
+OpName %274 "vec3_f16"
+OpName %277 "vec3_f32"
+OpName %279 "vec4_f16"
+OpName %282 "vec4_f32"
+OpName %285 "test_return_partial"
+OpName %287 "input"
+OpMemberDecorate %12 0 Offset 0
+OpMemberDecorate %12 1 Offset 4
+OpMemberDecorate %12 2 Offset 8
+OpMemberDecorate %12 3 Offset 16
+OpMemberDecorate %12 4 Offset 24
+OpMemberDecorate %12 5 Offset 32
+OpMemberDecorate %12 6 Offset 48
+OpMemberDecorate %12 7 Offset 64
+OpDecorate %14 Location 0
+OpDecorate %18 Location 1
+OpDecorate %20 Location 2
+OpDecorate %24 Location 3
+OpDecorate %26 Location 4
+OpDecorate %30 Location 5
+OpDecorate %32 Location 6
+OpDecorate %36 Location 7
+OpDecorate %38 Location 0
+OpDecorate %40 Location 1
+OpDecorate %41 Location 2
+OpDecorate %43 Location 3
+OpDecorate %44 Location 4
+OpDecorate %46 Location 5
+OpDecorate %47 Location 6
+OpDecorate %49 Location 7
+OpDecorate %112 Location 0
+OpDecorate %115 Location 1
+OpDecorate %117 Location 2
+OpDecorate %120 Location 3
+OpDecorate %122 Location 4
+OpDecorate %125 Location 5
+OpDecorate %127 Location 6
+OpDecorate %130 Location 7
+OpDecorate %132 Location 0
+OpDecorate %133 Location 1
+OpDecorate %134 Location 2
+OpDecorate %135 Location 3
+OpDecorate %136 Location 4
+OpDecorate %137 Location 5
+OpDecorate %138 Location 6
+OpDecorate %139 Location 7
+OpDecorate %183 Location 0
+OpDecorate %186 Location 1
+OpDecorate %188 Location 2
+OpDecorate %191 Location 3
+OpDecorate %193 Location 4
+OpDecorate %196 Location 5
+OpDecorate %198 Location 6
+OpDecorate %201 Location 7
+OpDecorate %203 Location 0
+OpDecorate %204 Location 1
+OpDecorate %205 Location 2
+OpDecorate %206 Location 3
+OpDecorate %207 Location 4
+OpDecorate %208 Location 5
+OpDecorate %209 Location 6
+OpDecorate %210 Location 7
+OpDecorate %264 Location 0
+OpDecorate %267 Location 1
+OpDecorate %269 Location 2
+OpDecorate %272 Location 3
+OpDecorate %274 Location 4
+OpDecorate %277 Location 5
+OpDecorate %279 Location 6
+OpDecorate %282 Location 7
+OpDecorate %284 Location 0
+%2 = OpTypeVoid
+%4 = OpTypeFloat 16
+%5 = OpTypeFloat 32
+%6 = OpTypeVector %4 2
+%7 = OpTypeVector %5 2
+%8 = OpTypeVector %4 3
+%9 = OpTypeVector %5 3
+%10 = OpTypeVector %4 4
+%11 = OpTypeVector %5 4
+%12 = OpTypeStruct %4 %5 %6 %7 %8 %9 %10 %11
+%15 = OpTypePointer Input %5
+%14 = OpVariable  %15  Input
+%18 = OpVariable  %15  Input
+%21 = OpTypePointer Input %7
+%20 = OpVariable  %21  Input
+%24 = OpVariable  %21  Input
+%27 = OpTypePointer Input %9
+%26 = OpVariable  %27  Input
+%30 = OpVariable  %27  Input
+%33 = OpTypePointer Input %11
+%32 = OpVariable  %33  Input
+%36 = OpVariable  %33  Input
+%39 = OpTypePointer Output %5
+%38 = OpVariable  %39  Output
+%40 = OpVariable  %39  Output
+%42 = OpTypePointer Output %7
+%41 = OpVariable  %42  Output
+%43 = OpVariable  %42  Output
+%45 = OpTypePointer Output %9
+%44 = OpVariable  %45  Output
+%46 = OpVariable  %45  Output
+%48 = OpTypePointer Output %11
+%47 = OpVariable  %48  Output
+%49 = OpVariable  %48  Output
+%51 = OpTypeFunction %2
+%52 = OpConstant  %4  0.000000000000000000000000000000000000000021524
+%53 = OpConstant  %5  1
+%54 = OpConstantComposite  %6  %52 %52
+%55 = OpConstantComposite  %7  %53 %53
+%56 = OpConstantComposite  %8  %52 %52 %52
+%57 = OpConstantComposite  %9  %53 %53 %53
+%58 = OpConstantComposite  %10  %52 %52 %52 %52
+%59 = OpConstantComposite  %11  %53 %53 %53 %53
+%61 = OpTypePointer Function %12
+%62 = OpConstantNull  %12
+%64 = OpTypePointer Function %4
+%67 = OpTypeInt 32 0
+%66 = OpConstant  %67  0
+%69 = OpTypePointer Function %5
+%71 = OpConstant  %67  1
+%73 = OpTypePointer Function %6
+%75 = OpConstant  %67  2
+%77 = OpTypePointer Function %7
+%79 = OpConstant  %67  3
+%81 = OpTypePointer Function %8
+%83 = OpConstant  %67  4
+%85 = OpTypePointer Function %9
+%87 = OpConstant  %67  5
+%89 = OpTypePointer Function %10
+%91 = OpConstant  %67  6
+%93 = OpTypePointer Function %11
+%95 = OpConstant  %67  7
+%112 = OpVariable  %15  Input
+%115 = OpVariable  %15  Input
+%117 = OpVariable  %21  Input
+%120 = OpVariable  %21  Input
+%122 = OpVariable  %27  Input
+%125 = OpVariable  %27  Input
+%127 = OpVariable  %33  Input
+%130 = OpVariable  %33  Input
+%132 = OpVariable  %39  Output
+%133 = OpVariable  %39  Output
+%134 = OpVariable  %42  Output
+%135 = OpVariable  %42  Output
+%136 = OpVariable  %45  Output
+%137 = OpVariable  %45  Output
+%138 = OpVariable  %48  Output
+%139 = OpVariable  %48  Output
+%142 = OpConstantNull  %12
+%183 = OpVariable  %15  Input
+%186 = OpVariable  %15  Input
+%188 = OpVariable  %21  Input
+%191 = OpVariable  %21  Input
+%193 = OpVariable  %27  Input
+%196 = OpVariable  %27  Input
+%198 = OpVariable  %33  Input
+%201 = OpVariable  %33  Input
+%203 = OpVariable  %39  Output
+%204 = OpVariable  %39  Output
+%205 = OpVariable  %42  Output
+%206 = OpVariable  %42  Output
+%207 = OpVariable  %45  Output
+%208 = OpVariable  %45  Output
+%209 = OpVariable  %48  Output
+%210 = OpVariable  %48  Output
+%213 = OpConstantNull  %12
+%215 = OpConstantNull  %12
+%264 = OpVariable  %15  Input
+%267 = OpVariable  %15  Input
+%269 = OpVariable  %21  Input
+%272 = OpVariable  %21  Input
+%274 = OpVariable  %27  Input
+%277 = OpVariable  %27  Input
+%279 = OpVariable  %33  Input
+%282 = OpVariable  %33  Input
+%284 = OpVariable  %39  Output
+%286 = OpConstant  %4  0
+%288 = OpConstantNull  %12
+%50 = OpFunction  %2  None %51
+%13 = OpLabel
+%60 = OpVariable  %61  Function %62
+%16 = OpLoad  %5  %14
+%17 = OpFConvert  %4  %16
+%19 = OpLoad  %5  %18
+%22 = OpLoad  %7  %20
+%23 = OpFConvert  %6  %22
+%25 = OpLoad  %7  %24
+%28 = OpLoad  %9  %26
+%29 = OpFConvert  %8  %28
+%31 = OpLoad  %9  %30
+%34 = OpLoad  %11  %32
+%35 = OpFConvert  %10  %34
+%37 = OpLoad  %11  %36
+OpBranch %63
+%63 = OpLabel
+OpLine %3 15 5
+OpLine %3 15 25
+%65 = OpFAdd  %4  %17 %52
+OpLine %3 15 5
+%68 = OpAccessChain  %64  %60 %66
+OpStore %68 %65
+OpLine %3 16 5
+OpLine %3 16 25
+%70 = OpFAdd  %5  %19 %53
+OpLine %3 16 5
+%72 = OpAccessChain  %69  %60 %71
+OpStore %72 %70
+OpLine %3 17 5
+OpLine %3 17 23
+%74 = OpFAdd  %6  %23 %54
+OpLine %3 17 5
+%76 = OpAccessChain  %73  %60 %75
+OpStore %76 %74
+OpLine %3 18 5
+OpLine %3 18 34
+OpLine %3 18 23
+%78 = OpFAdd  %7  %25 %55
+OpLine %3 18 5
+%80 = OpAccessChain  %77  %60 %79
+OpStore %80 %78
+OpLine %3 19 5
+OpLine %3 19 23
+%82 = OpFAdd  %8  %29 %56
+OpLine %3 19 5
+%84 = OpAccessChain  %81  %60 %83
+OpStore %84 %82
+OpLine %3 20 5
+OpLine %3 20 34
+OpLine %3 20 23
+%86 = OpFAdd  %9  %31 %57
+OpLine %3 20 5
+%88 = OpAccessChain  %85  %60 %87
+OpStore %88 %86
+OpLine %3 21 5
+OpLine %3 21 23
+%90 = OpFAdd  %10  %35 %58
+OpLine %3 21 5
+%92 = OpAccessChain  %89  %60 %91
+OpStore %92 %90
+OpLine %3 22 5
+OpLine %3 22 34
+OpLine %3 22 23
+%94 = OpFAdd  %11  %37 %59
+OpLine %3 22 5
+%96 = OpAccessChain  %93  %60 %95
+OpStore %96 %94
+OpLine %3 1 1
+%97 = OpLoad  %12  %60
+%98 = OpCompositeExtract  %4  %97 0
+%99 = OpFConvert  %5  %98
+OpStore %38 %99
+%100 = OpCompositeExtract  %5  %97 1
+OpStore %40 %100
+%101 = OpCompositeExtract  %6  %97 2
+%102 = OpFConvert  %7  %101
+OpStore %41 %102
+%103 = OpCompositeExtract  %7  %97 3
+OpStore %43 %103
+%104 = OpCompositeExtract  %8  %97 4
+%105 = OpFConvert  %9  %104
+OpStore %44 %105
+%106 = OpCompositeExtract  %9  %97 5
+OpStore %46 %106
+%107 = OpCompositeExtract  %10  %97 6
+%108 = OpFConvert  %11  %107
+OpStore %47 %108
+%109 = OpCompositeExtract  %11  %97 7
+OpStore %49 %109
+OpReturn
+OpFunctionEnd
+%140 = OpFunction  %2  None %51
+%110 = OpLabel
+%141 = OpVariable  %61  Function %142
+%113 = OpLoad  %5  %112
+%114 = OpFConvert  %4  %113
+%116 = OpLoad  %5  %115
+%118 = OpLoad  %7  %117
+%119 = OpFConvert  %6  %118
+%121 = OpLoad  %7  %120
+%123 = OpLoad  %9  %122
+%124 = OpFConvert  %8  %123
+%126 = OpLoad  %9  %125
+%128 = OpLoad  %11  %127
+%129 = OpFConvert  %10  %128
+%131 = OpLoad  %11  %130
+%111 = OpCompositeConstruct  %12  %114 %116 %119 %121 %124 %126 %129 %131
+OpBranch %143
+%143 = OpLabel
+OpLine %3 40 5
+%144 = OpCompositeExtract  %4  %111 0
+OpLine %3 40 25
+%145 = OpFAdd  %4  %144 %52
+OpLine %3 40 5
+%146 = OpAccessChain  %64  %141 %66
+OpStore %146 %145
+OpLine %3 41 5
+%147 = OpCompositeExtract  %5  %111 1
+OpLine %3 41 25
+%148 = OpFAdd  %5  %147 %53
+OpLine %3 41 5
+%149 = OpAccessChain  %69  %141 %71
+OpStore %149 %148
+OpLine %3 42 5
+%150 = OpCompositeExtract  %6  %111 2
+OpLine %3 42 23
+%151 = OpFAdd  %6  %150 %54
+OpLine %3 42 5
+%152 = OpAccessChain  %73  %141 %75
+OpStore %152 %151
+OpLine %3 43 5
+%153 = OpCompositeExtract  %7  %111 3
+OpLine %3 43 40
+OpLine %3 43 23
+%154 = OpFAdd  %7  %153 %55
+OpLine %3 43 5
+%155 = OpAccessChain  %77  %141 %79
+OpStore %155 %154
+OpLine %3 44 5
+%156 = OpCompositeExtract  %8  %111 4
+OpLine %3 44 23
+%157 = OpFAdd  %8  %156 %56
+OpLine %3 44 5
+%158 = OpAccessChain  %81  %141 %83
+OpStore %158 %157
+OpLine %3 45 5
+%159 = OpCompositeExtract  %9  %111 5
+OpLine %3 45 40
+OpLine %3 45 23
+%160 = OpFAdd  %9  %159 %57
+OpLine %3 45 5
+%161 = OpAccessChain  %85  %141 %87
+OpStore %161 %160
+OpLine %3 46 5
+%162 = OpCompositeExtract  %10  %111 6
+OpLine %3 46 23
+%163 = OpFAdd  %10  %162 %58
+OpLine %3 46 5
+%164 = OpAccessChain  %89  %141 %91
+OpStore %164 %163
+OpLine %3 47 5
+%165 = OpCompositeExtract  %11  %111 7
+OpLine %3 47 40
+OpLine %3 47 23
+%166 = OpFAdd  %11  %165 %59
+OpLine %3 47 5
+%167 = OpAccessChain  %93  %141 %95
+OpStore %167 %166
+OpLine %3 1 1
+%168 = OpLoad  %12  %141
+%169 = OpCompositeExtract  %4  %168 0
+%170 = OpFConvert  %5  %169
+OpStore %132 %170
+%171 = OpCompositeExtract  %5  %168 1
+OpStore %133 %171
+%172 = OpCompositeExtract  %6  %168 2
+%173 = OpFConvert  %7  %172
+OpStore %134 %173
+%174 = OpCompositeExtract  %7  %168 3
+OpStore %135 %174
+%175 = OpCompositeExtract  %8  %168 4
+%176 = OpFConvert  %9  %175
+OpStore %136 %176
+%177 = OpCompositeExtract  %9  %168 5
+OpStore %137 %177
+%178 = OpCompositeExtract  %10  %168 6
+%179 = OpFConvert  %11  %178
+OpStore %138 %179
+%180 = OpCompositeExtract  %11  %168 7
+OpStore %139 %180
+OpReturn
+OpFunctionEnd
+%211 = OpFunction  %2  None %51
+%181 = OpLabel
+%212 = OpVariable  %61  Function %213
+%214 = OpVariable  %61  Function %215
+%184 = OpLoad  %5  %183
+%185 = OpFConvert  %4  %184
+%187 = OpLoad  %5  %186
+%189 = OpLoad  %7  %188
+%190 = OpFConvert  %6  %189
+%192 = OpLoad  %7  %191
+%194 = OpLoad  %9  %193
+%195 = OpFConvert  %8  %194
+%197 = OpLoad  %9  %196
+%199 = OpLoad  %11  %198
+%200 = OpFConvert  %10  %199
+%202 = OpLoad  %11  %201
+%182 = OpCompositeConstruct  %12  %185 %187 %190 %192 %195 %197 %200 %202
+OpBranch %216
+%216 = OpLabel
+OpLine %3 53 5
+OpStore %212 %182
+OpLine %3 55 5
+%217 = OpAccessChain  %64  %212 %66
+%218 = OpLoad  %4  %217
+OpLine %3 55 25
+%219 = OpFAdd  %4  %218 %52
+OpLine %3 55 5
+%220 = OpAccessChain  %64  %214 %66
+OpStore %220 %219
+OpLine %3 56 5
+%221 = OpAccessChain  %69  %212 %71
+%222 = OpLoad  %5  %221
+OpLine %3 56 25
+%223 = OpFAdd  %5  %222 %53
+OpLine %3 56 5
+%224 = OpAccessChain  %69  %214 %71
+OpStore %224 %223
+OpLine %3 57 5
+%225 = OpAccessChain  %73  %212 %75
+%226 = OpLoad  %6  %225
+OpLine %3 57 23
+%227 = OpFAdd  %6  %226 %54
+OpLine %3 57 5
+%228 = OpAccessChain  %73  %214 %75
+OpStore %228 %227
+OpLine %3 58 5
+%229 = OpAccessChain  %77  %212 %79
+%230 = OpLoad  %7  %229
+OpLine %3 58 40
+OpLine %3 58 23
+%231 = OpFAdd  %7  %230 %55
+OpLine %3 58 5
+%232 = OpAccessChain  %77  %214 %79
+OpStore %232 %231
+OpLine %3 59 5
+%233 = OpAccessChain  %81  %212 %83
+%234 = OpLoad  %8  %233
+OpLine %3 59 23
+%235 = OpFAdd  %8  %234 %56
+OpLine %3 59 5
+%236 = OpAccessChain  %81  %214 %83
+OpStore %236 %235
+OpLine %3 60 5
+%237 = OpAccessChain  %85  %212 %87
+%238 = OpLoad  %9  %237
+OpLine %3 60 40
+OpLine %3 60 23
+%239 = OpFAdd  %9  %238 %57
+OpLine %3 60 5
+%240 = OpAccessChain  %85  %214 %87
+OpStore %240 %239
+OpLine %3 61 5
+%241 = OpAccessChain  %89  %212 %91
+%242 = OpLoad  %10  %241
+OpLine %3 61 23
+%243 = OpFAdd  %10  %242 %58
+OpLine %3 61 5
+%244 = OpAccessChain  %89  %214 %91
+OpStore %244 %243
+OpLine %3 62 5
+%245 = OpAccessChain  %93  %212 %95
+%246 = OpLoad  %11  %245
+OpLine %3 62 40
+OpLine %3 62 23
+%247 = OpFAdd  %11  %246 %59
+OpLine %3 62 5
+%248 = OpAccessChain  %93  %214 %95
+OpStore %248 %247
+OpLine %3 1 1
+%249 = OpLoad  %12  %214
+%250 = OpCompositeExtract  %4  %249 0
+%251 = OpFConvert  %5  %250
+OpStore %203 %251
+%252 = OpCompositeExtract  %5  %249 1
+OpStore %204 %252
+%253 = OpCompositeExtract  %6  %249 2
+%254 = OpFConvert  %7  %253
+OpStore %205 %254
+%255 = OpCompositeExtract  %7  %249 3
+OpStore %206 %255
+%256 = OpCompositeExtract  %8  %249 4
+%257 = OpFConvert  %9  %256
+OpStore %207 %257
+%258 = OpCompositeExtract  %9  %249 5
+OpStore %208 %258
+%259 = OpCompositeExtract  %10  %249 6
+%260 = OpFConvert  %11  %259
+OpStore %209 %260
+%261 = OpCompositeExtract  %11  %249 7
+OpStore %210 %261
+OpReturn
+OpFunctionEnd
+%285 = OpFunction  %2  None %51
+%262 = OpLabel
+%287 = OpVariable  %61  Function %288
+%265 = OpLoad  %5  %264
+%266 = OpFConvert  %4  %265
+%268 = OpLoad  %5  %267
+%270 = OpLoad  %7  %269
+%271 = OpFConvert  %6  %270
+%273 = OpLoad  %7  %272
+%275 = OpLoad  %9  %274
+%276 = OpFConvert  %8  %275
+%278 = OpLoad  %9  %277
+%280 = OpLoad  %11  %279
+%281 = OpFConvert  %10  %280
+%283 = OpLoad  %11  %282
+%263 = OpCompositeConstruct  %12  %266 %268 %271 %273 %276 %278 %281 %283
+OpBranch %289
+%289 = OpLabel
+OpLine %3 68 5
+OpStore %287 %263
+OpLine %3 69 5
+OpLine %3 69 5
+%290 = OpAccessChain  %64  %287 %66
+OpStore %290 %286
+OpLine %3 70 12
+%291 = OpAccessChain  %64  %287 %66
+%292 = OpLoad  %4  %291
+%293 = OpFConvert  %5  %292
+OpStore %284 %293
+OpReturn
+OpFunctionEnd
\ No newline at end of file
diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs
index 67428309541..8c3fa7d00dc 100644
--- a/wgpu-hal/src/vulkan/adapter.rs
+++ b/wgpu-hal/src/vulkan/adapter.rs
@@ -226,7 +226,7 @@ impl PhysicalDeviceFeatures {
     /// [`Adapter::required_device_extensions`]: super::Adapter::required_device_extensions
     fn from_extensions_and_requested_features(
         phd_capabilities: &PhysicalDeviceProperties,
-        _phd_features: &PhysicalDeviceFeatures,
+        phd_features: &PhysicalDeviceFeatures,
         enabled_extensions: &[&'static CStr],
         requested_features: wgt::Features,
         downlevel_flags: wgt::DownlevelFlags,
@@ -396,10 +396,17 @@ impl PhysicalDeviceFeatures {
                 _ => None,
             },
             _16bit_storage: if requested_features.contains(wgt::Features::SHADER_F16) {
+                // Check if the device actually supports storage_input_output16
+                let storage_input_output16_supported = phd_features
+                    ._16bit_storage
+                    .as_ref()
+                    .map(|features| features.storage_input_output16 != 0)
+                    .unwrap_or(false);
+
                 Some(
                     vk::PhysicalDevice16BitStorageFeatures::default()
                         .storage_buffer16_bit_access(true)
-                        .storage_input_output16(true)
+                        .storage_input_output16(storage_input_output16_supported)
                         .uniform_and_storage_buffer16_bit_access(true),
                 )
             } else {
@@ -736,12 +743,12 @@ impl PhysicalDeviceFeatures {
 
         if let (Some(ref f16_i8), Some(ref bit16)) = (self.shader_float16_int8, self._16bit_storage)
         {
+            // Note storage_input_output16 is not required, we polyfill f16 I/O using f32 types when this capability is not available
             features.set(
                 F::SHADER_F16,
                 f16_i8.shader_float16 != 0
                     && bit16.storage_buffer16_bit_access != 0
-                    && bit16.uniform_and_storage_buffer16_bit_access != 0
-                    && bit16.storage_input_output16 != 0,
+                    && bit16.uniform_and_storage_buffer16_bit_access != 0,
             );
         }
 
@@ -2109,6 +2116,15 @@ impl super::Adapter {
                     spv::ZeroInitializeWorkgroupMemoryMode::Polyfill
                 },
                 force_loop_bounding: true,
+                use_storage_input_output_16: features.contains(wgt::Features::SHADER_F16) && {
+                    // Check if the device actually supports storage_input_output16
+                    let phd_features = self.physical_device_features(enabled_extensions, features);
+                    phd_features
+                        ._16bit_storage
+                        .as_ref()
+                        .map(|storage_features| storage_features.storage_input_output16 != 0)
+                        .unwrap_or(false)
+                },
                 // We need to build this separately for each invocation, so just default it out here
                 binding_map: BTreeMap::default(),
                 debug_info: None,

From e9d75c3663d97e4545007ea083f6f40477d06a35 Mon Sep 17 00:00:00 2001
From: Erich Gubler <erichdongubler@gmail.com>
Date: Wed, 16 Jul 2025 15:56:15 -0400
Subject: [PATCH 09/20] fixup! [naga spv-out] Add f16 io polyfill

---
 naga/src/back/spv/f16_polyfill.rs | 11 ++++++-----
 wgpu-hal/src/vulkan/adapter.rs    |  3 ++-
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/naga/src/back/spv/f16_polyfill.rs b/naga/src/back/spv/f16_polyfill.rs
index abcdb2092f2..e1f0e5bd224 100644
--- a/naga/src/back/spv/f16_polyfill.rs
+++ b/naga/src/back/spv/f16_polyfill.rs
@@ -1,17 +1,18 @@
 /*!
-This module provides functionality polyfills f16 input/output variables
-when the StorageInputOutput16 capability is not available or disabled.
+This module provides functionality polyfills `f16` input/output variables when the
+`StorageInputOutput16` capability is not available or disabled.
 
 It works by:
-1. Declaring f16 I/O variables as f32 in SPIR-V
-2. Converting between f16 and f32 at runtime using OpFConvert
+
+1. Declaring `f16` I/O variables as `f32` in SPIR-V
+2. Converting between `f16` and `f32` at runtime using `OpFConvert`
 3. Maintaining mappings to track which variables need conversion
 */
 
 use crate::back::spv::{Instruction, LocalType, NumericType, Word};
 use alloc::vec::Vec;
 
-/// Manages f16 I/O polyfill state and operations.
+/// Manages `f16` I/O polyfill state and operations.
 #[derive(Default)]
 pub(super) struct F16IoPolyfill {
     use_native: bool,
diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs
index 8c3fa7d00dc..49ecbad564b 100644
--- a/wgpu-hal/src/vulkan/adapter.rs
+++ b/wgpu-hal/src/vulkan/adapter.rs
@@ -743,7 +743,8 @@ impl PhysicalDeviceFeatures {
 
         if let (Some(ref f16_i8), Some(ref bit16)) = (self.shader_float16_int8, self._16bit_storage)
         {
-            // Note storage_input_output16 is not required, we polyfill f16 I/O using f32 types when this capability is not available
+            // Note `storage_input_output16` is not required, we polyfill `f16` I/O using `f32`
+            // types when this capability is not available
             features.set(
                 F::SHADER_F16,
                 f16_i8.shader_float16 != 0

From a8cddd9590b07ee0b04f683d42ae20beea7aa2b9 Mon Sep 17 00:00:00 2001
From: Erich Gubler <erichdongubler@gmail.com>
Date: Wed, 16 Jul 2025 17:34:48 -0400
Subject: [PATCH 10/20] fixup! [naga spv-out] Add f16 io polyfill

---
 naga/src/back/spv/f16_polyfill.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/naga/src/back/spv/f16_polyfill.rs b/naga/src/back/spv/f16_polyfill.rs
index e1f0e5bd224..9ce6d0fe39f 100644
--- a/naga/src/back/spv/f16_polyfill.rs
+++ b/naga/src/back/spv/f16_polyfill.rs
@@ -14,7 +14,7 @@ use alloc::vec::Vec;
 
 /// Manages `f16` I/O polyfill state and operations.
 #[derive(Default)]
-pub(super) struct F16IoPolyfill {
+pub(in crate::back::spv) struct F16IoPolyfill {
     use_native: bool,
     variable_map: crate::FastHashMap<Word, (Word, Word)>,
 }

From c5e2ad36bb444fb2c28fcb221e8b13c5f9b34a25 Mon Sep 17 00:00:00 2001
From: cryvosh <cryvosh@gmail.com>
Date: Thu, 7 Aug 2025 19:53:12 -0400
Subject: [PATCH 11/20] Fix merge oops

---
 wgpu-hal/src/vulkan/adapter.rs | 48 +++++++++++++++++++++++++---------
 1 file changed, 36 insertions(+), 12 deletions(-)

diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs
index 49ecbad564b..2ff2a65ca50 100644
--- a/wgpu-hal/src/vulkan/adapter.rs
+++ b/wgpu-hal/src/vulkan/adapter.rs
@@ -815,7 +815,7 @@ impl PhysicalDeviceFeatures {
         features.set(
             F::EXPERIMENTAL_RAY_QUERY
             // Although this doesn't really require ray queries, it does not make sense to be enabled if acceleration structures
-            // aren't enabled. 
+            // aren't enabled.
                 | F::EXTENDED_ACCELERATION_STRUCTURE_VERTEX_FORMATS,
             supports_acceleration_structures && caps.supports_extension(khr::ray_query::NAME),
         );
@@ -940,7 +940,7 @@ pub struct PhysicalDeviceProperties {
 
     /// Additional `vk::PhysicalDevice` properties from the
     /// `VK_EXT_mesh_shader` extension.
-    _mesh_shader: Option<vk::PhysicalDeviceMeshShaderPropertiesEXT<'static>>,
+    mesh_shader: Option<vk::PhysicalDeviceMeshShaderPropertiesEXT<'static>>,
 
     /// The device API version.
     ///
@@ -1168,14 +1168,29 @@ impl PhysicalDeviceProperties {
         let max_compute_workgroups_per_dimension = limits.max_compute_work_group_count[0]
             .min(limits.max_compute_work_group_count[1])
             .min(limits.max_compute_work_group_count[2]);
+        let (
+            max_task_workgroup_total_count,
+            max_task_workgroups_per_dimension,
+            max_mesh_multiview_count,
+            max_mesh_output_layers,
+        ) = match self.mesh_shader {
+            Some(m) => (
+                m.max_task_work_group_total_count,
+                m.max_task_work_group_count.into_iter().min().unwrap(),
+                m.max_mesh_multiview_view_count,
+                m.max_mesh_output_layers,
+            ),
+            None => (0, 0, 0, 0),
+        };
 
-        // Prevent very large buffers on mesa and most android devices.
+        // Prevent very large buffers on mesa and most android devices, and in all cases
+        // don't risk confusing JS by exceeding the range of a double.
         let is_nvidia = self.properties.vendor_id == crate::auxil::db::nvidia::VENDOR;
         let max_buffer_size =
             if (cfg!(target_os = "linux") || cfg!(target_os = "android")) && !is_nvidia {
                 i32::MAX as u64
             } else {
-                u64::MAX
+                1u64 << 52
             };
 
         let mut max_binding_array_elements = 0;
@@ -1275,6 +1290,12 @@ impl PhysicalDeviceProperties {
             max_compute_workgroups_per_dimension,
             max_buffer_size,
             max_non_sampler_bindings: u32::MAX,
+
+            max_task_workgroup_total_count,
+            max_task_workgroups_per_dimension,
+            max_mesh_multiview_count,
+            max_mesh_output_layers,
+
             max_blas_primitive_count,
             max_blas_geometry_count,
             max_tlas_instance_count,
@@ -1409,7 +1430,7 @@ impl super::InstanceShared {
 
                 if supports_mesh_shader {
                     let next = capabilities
-                        ._mesh_shader
+                        .mesh_shader
                         .insert(vk::PhysicalDeviceMeshShaderPropertiesEXT::default());
                     properties2 = properties2.push_next(next);
                 }
@@ -1702,7 +1723,7 @@ impl super::Instance {
         };
         let queue_flags = queue_families.first()?.queue_flags;
         if !queue_flags.contains(vk::QueueFlags::GRAPHICS) {
-            log::warn!("The first queue only exposes {:?}", queue_flags);
+            log::warn!("The first queue only exposes {queue_flags:?}");
             return None;
         }
 
@@ -1830,10 +1851,10 @@ impl super::Adapter {
             });
 
         if !unsupported_extensions.is_empty() {
-            log::warn!("Missing extensions: {:?}", unsupported_extensions);
+            log::warn!("Missing extensions: {unsupported_extensions:?}");
         }
 
-        log::debug!("Supported extensions: {:?}", supported_extensions);
+        log::debug!("Supported extensions: {supported_extensions:?}");
         supported_extensions
     }
 
@@ -2179,6 +2200,9 @@ impl super::Adapter {
                 self.private_caps.maximum_samplers,
             )),
             memory_allocations_counter: Default::default(),
+
+            texture_identity_factory: super::ResourceIdentityFactory::new(),
+            texture_view_identity_factory: super::ResourceIdentityFactory::new(),
         });
 
         let relay_semaphores = super::RelaySemaphores::new(&shared)?;
@@ -2527,7 +2551,7 @@ impl crate::Adapter for super::Adapter {
                 Ok(true) => (),
                 Ok(false) => return None,
                 Err(e) => {
-                    log::error!("get_physical_device_surface_support: {}", e);
+                    log::error!("get_physical_device_surface_support: {e}");
                     return None;
                 }
             }
@@ -2542,7 +2566,7 @@ impl crate::Adapter for super::Adapter {
             } {
                 Ok(caps) => caps,
                 Err(e) => {
-                    log::error!("get_physical_device_surface_capabilities: {}", e);
+                    log::error!("get_physical_device_surface_capabilities: {e}");
                     return None;
                 }
             }
@@ -2576,7 +2600,7 @@ impl crate::Adapter for super::Adapter {
             } {
                 Ok(present_modes) => present_modes,
                 Err(e) => {
-                    log::error!("get_physical_device_surface_present_modes: {}", e);
+                    log::error!("get_physical_device_surface_present_modes: {e}");
                     // Per definition of `SurfaceCapabilities`, there must be at least one present mode.
                     return None;
                 }
@@ -2592,7 +2616,7 @@ impl crate::Adapter for super::Adapter {
             } {
                 Ok(formats) => formats,
                 Err(e) => {
-                    log::error!("get_physical_device_surface_formats: {}", e);
+                    log::error!("get_physical_device_surface_formats: {e}");
                     // Per definition of `SurfaceCapabilities`, there must be at least one present format.
                     return None;
                 }

From 0389c1b3e60ecb4f87e6172957dfc9cc4cc9ddde Mon Sep 17 00:00:00 2001
From: cryvosh <cryvosh@gmail.com>
Date: Thu, 7 Aug 2025 20:14:55 -0400
Subject: [PATCH 12/20] Feedback

---
 naga/src/back/spv/block.rs        |  4 ++--
 naga/src/back/spv/f16_polyfill.rs |  9 ++++-----
 naga/src/back/spv/writer.rs       | 12 ++++++------
 3 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/naga/src/back/spv/block.rs b/naga/src/back/spv/block.rs
index b0b061626f6..a69790380fa 100644
--- a/naga/src/back/spv/block.rs
+++ b/naga/src/back/spv/block.rs
@@ -237,7 +237,7 @@ impl Writer {
                 }
             };
 
-            if let Some((f32_ty, _)) = self.io_f16_polyfills.get_polyfill_info(res_member.id) {
+            if let Some(f32_ty) = self.io_f16_polyfills.get_polyfill_info(res_member.id) {
                 let converted = self.id_gen.next();
                 super::f16_polyfill::F16IoPolyfill::emit_f16_to_f32_conversion(
                     member_value_id,
@@ -2325,7 +2325,7 @@ impl BlockContext<'_> {
             ExpressionPointer::Ready { pointer_id } => {
                 let id = self.gen_id();
 
-                if let Some((f32_ty, _)) =
+                if let Some(f32_ty) =
                     self.writer.io_f16_polyfills.get_polyfill_info(pointer_id)
                 {
                     block
diff --git a/naga/src/back/spv/f16_polyfill.rs b/naga/src/back/spv/f16_polyfill.rs
index 9ce6d0fe39f..a982348c83e 100644
--- a/naga/src/back/spv/f16_polyfill.rs
+++ b/naga/src/back/spv/f16_polyfill.rs
@@ -16,7 +16,7 @@ use alloc::vec::Vec;
 #[derive(Default)]
 pub(in crate::back::spv) struct F16IoPolyfill {
     use_native: bool,
-    variable_map: crate::FastHashMap<Word, (Word, Word)>,
+    variable_map: crate::FastHashMap<Word, Word>,
 }
 
 impl F16IoPolyfill {
@@ -42,12 +42,11 @@ impl F16IoPolyfill {
             }
     }
 
-    pub fn register_variable(&mut self, variable_id: Word, f32_type_id: Word, f16_type_id: Word) {
-        self.variable_map
-            .insert(variable_id, (f32_type_id, f16_type_id));
+    pub fn register_variable(&mut self, variable_id: Word, f32_type_id: Word) {
+        self.variable_map.insert(variable_id, f32_type_id);
     }
 
-    pub fn get_polyfill_info(&self, variable_id: Word) -> Option<(Word, Word)> {
+    pub fn get_polyfill_info(&self, variable_id: Word) -> Option<Word> {
         self.variable_map.get(&variable_id).copied()
     }
 
diff --git a/naga/src/back/spv/writer.rs b/naga/src/back/spv/writer.rs
index e54cffe19cf..769949b82d0 100644
--- a/naga/src/back/spv/writer.rs
+++ b/naga/src/back/spv/writer.rs
@@ -734,7 +734,7 @@ impl Writer {
                     iface.varying_ids.push(varying_id);
                     let mut id = self.id_gen.next();
 
-                    if let Some((f32_ty, _)) = self.io_f16_polyfills.get_polyfill_info(varying_id) {
+                    if let Some(f32_ty) = self.io_f16_polyfills.get_polyfill_info(varying_id) {
                         prelude
                             .body
                             .push(Instruction::load(f32_ty, id, varying_id, None));
@@ -778,8 +778,8 @@ impl Writer {
                             binding,
                         )?;
                         iface.varying_ids.push(varying_id);
-                        let id = self.id_gen.next();
-                        if let Some((f32_ty, _)) =
+                        let mut id = self.id_gen.next();
+                        if let Some(f32_ty) =
                             self.io_f16_polyfills.get_polyfill_info(varying_id)
                         {
                             prelude
@@ -792,7 +792,8 @@ impl Writer {
                                 converted,
                                 &mut prelude.body,
                             );
-                            constituent_ids.push(converted);
+                            id = converted;
+                            constituent_ids.push(id);
                         } else {
                             prelude
                                 .body
@@ -1960,9 +1961,8 @@ impl Writer {
 
             let f32_type_id = self.get_localtype_id(f32_value_local);
             let ptr_id = self.get_pointer_type_id(f32_type_id, class);
-            let f16_type_id = self.get_handle_type_id(ty);
             self.io_f16_polyfills
-                .register_variable(id, f32_type_id, f16_type_id);
+                .register_variable(id, f32_type_id);
 
             ptr_id
         } else {

From 7642b6425170b87b475acb6a52a37d302e3aa5ae Mon Sep 17 00:00:00 2001
From: cryvosh <cryvosh@gmail.com>
Date: Thu, 7 Aug 2025 20:18:42 -0400
Subject: [PATCH 13/20] fmt

---
 naga/src/back/spv/block.rs  | 4 +---
 naga/src/back/spv/writer.rs | 7 ++-----
 2 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/naga/src/back/spv/block.rs b/naga/src/back/spv/block.rs
index a69790380fa..1b20d998c87 100644
--- a/naga/src/back/spv/block.rs
+++ b/naga/src/back/spv/block.rs
@@ -2325,9 +2325,7 @@ impl BlockContext<'_> {
             ExpressionPointer::Ready { pointer_id } => {
                 let id = self.gen_id();
 
-                if let Some(f32_ty) =
-                    self.writer.io_f16_polyfills.get_polyfill_info(pointer_id)
-                {
+                if let Some(f32_ty) = self.writer.io_f16_polyfills.get_polyfill_info(pointer_id) {
                     block
                         .body
                         .push(Instruction::load(f32_ty, id, pointer_id, None));
diff --git a/naga/src/back/spv/writer.rs b/naga/src/back/spv/writer.rs
index 769949b82d0..64a81b67a31 100644
--- a/naga/src/back/spv/writer.rs
+++ b/naga/src/back/spv/writer.rs
@@ -779,9 +779,7 @@ impl Writer {
                         )?;
                         iface.varying_ids.push(varying_id);
                         let mut id = self.id_gen.next();
-                        if let Some(f32_ty) =
-                            self.io_f16_polyfills.get_polyfill_info(varying_id)
-                        {
+                        if let Some(f32_ty) = self.io_f16_polyfills.get_polyfill_info(varying_id) {
                             prelude
                                 .body
                                 .push(Instruction::load(f32_ty, id, varying_id, None));
@@ -1961,8 +1959,7 @@ impl Writer {
 
             let f32_type_id = self.get_localtype_id(f32_value_local);
             let ptr_id = self.get_pointer_type_id(f32_type_id, class);
-            self.io_f16_polyfills
-                .register_variable(id, f32_type_id);
+            self.io_f16_polyfills.register_variable(id, f32_type_id);
 
             ptr_id
         } else {

From 8f13d221dae886f3e3f5a8c3a7e77a8db6c01ee3 Mon Sep 17 00:00:00 2001
From: cryvosh <cryvosh@gmail.com>
Date: Thu, 7 Aug 2025 21:15:32 -0400
Subject: [PATCH 14/20] Cleanup

---
 naga/src/back/spv/block.rs        | 31 ++++------
 naga/src/back/spv/f16_polyfill.rs | 14 ++---
 naga/src/back/spv/writer.rs       | 95 +++++++++++++++++--------------
 3 files changed, 69 insertions(+), 71 deletions(-)

diff --git a/naga/src/back/spv/block.rs b/naga/src/back/spv/block.rs
index 1b20d998c87..fa6768d3c33 100644
--- a/naga/src/back/spv/block.rs
+++ b/naga/src/back/spv/block.rs
@@ -237,18 +237,7 @@ impl Writer {
                 }
             };
 
-            if let Some(f32_ty) = self.io_f16_polyfills.get_polyfill_info(res_member.id) {
-                let converted = self.id_gen.next();
-                super::f16_polyfill::F16IoPolyfill::emit_f16_to_f32_conversion(
-                    member_value_id,
-                    f32_ty,
-                    converted,
-                    body,
-                );
-                body.push(Instruction::store(res_member.id, converted, None));
-            } else {
-                body.push(Instruction::store(res_member.id, member_value_id, None));
-            }
+            self.store_io_with_f16_polyfill(body, res_member.id, member_value_id);
 
             match res_member.built_in {
                 Some(crate::BuiltIn::Position { .. })
@@ -2325,16 +2314,16 @@ impl BlockContext<'_> {
             ExpressionPointer::Ready { pointer_id } => {
                 let id = self.gen_id();
 
-                if let Some(f32_ty) = self.writer.io_f16_polyfills.get_polyfill_info(pointer_id) {
-                    block
-                        .body
-                        .push(Instruction::load(f32_ty, id, pointer_id, None));
-                    let converted = self.gen_id();
-                    super::f16_polyfill::F16IoPolyfill::emit_f32_to_f16_conversion(
-                        id,
-                        result_type_id,
-                        converted,
+                if self
+                    .writer
+                    .io_f16_polyfills
+                    .get_f32_io_type(pointer_id)
+                    .is_some()
+                {
+                    let converted = self.writer.load_io_with_f16_polyfill(
                         &mut block.body,
+                        pointer_id,
+                        result_type_id,
                     );
                     return Ok(converted);
                 }
diff --git a/naga/src/back/spv/f16_polyfill.rs b/naga/src/back/spv/f16_polyfill.rs
index a982348c83e..ee391f7f245 100644
--- a/naga/src/back/spv/f16_polyfill.rs
+++ b/naga/src/back/spv/f16_polyfill.rs
@@ -16,14 +16,14 @@ use alloc::vec::Vec;
 #[derive(Default)]
 pub(in crate::back::spv) struct F16IoPolyfill {
     use_native: bool,
-    variable_map: crate::FastHashMap<Word, Word>,
+    io_var_to_f32_type: crate::FastHashMap<Word, Word>,
 }
 
 impl F16IoPolyfill {
     pub fn new(use_storage_input_output_16: bool) -> Self {
         Self {
             use_native: use_storage_input_output_16,
-            variable_map: crate::FastHashMap::default(),
+            io_var_to_f32_type: crate::FastHashMap::default(),
         }
     }
 
@@ -42,12 +42,12 @@ impl F16IoPolyfill {
             }
     }
 
-    pub fn register_variable(&mut self, variable_id: Word, f32_type_id: Word) {
-        self.variable_map.insert(variable_id, f32_type_id);
+    pub fn register_io_var(&mut self, variable_id: Word, f32_type_id: Word) {
+        self.io_var_to_f32_type.insert(variable_id, f32_type_id);
     }
 
-    pub fn get_polyfill_info(&self, variable_id: Word) -> Option<Word> {
-        self.variable_map.get(&variable_id).copied()
+    pub fn get_f32_io_type(&self, variable_id: Word) -> Option<Word> {
+        self.io_var_to_f32_type.get(&variable_id).copied()
     }
 
     pub fn emit_f16_to_f32_conversion(
@@ -98,7 +98,7 @@ impl F16IoPolyfill {
 
 impl crate::back::spv::recyclable::Recyclable for F16IoPolyfill {
     fn recycle(mut self) -> Self {
-        self.variable_map = self.variable_map.recycle();
+        self.io_var_to_f32_type = self.io_var_to_f32_type.recycle();
         self
     }
 }
diff --git a/naga/src/back/spv/writer.rs b/naga/src/back/spv/writer.rs
index 64a81b67a31..48d13f03c25 100644
--- a/naga/src/back/spv/writer.rs
+++ b/naga/src/back/spv/writer.rs
@@ -732,28 +732,11 @@ impl Writer {
                         binding,
                     )?;
                     iface.varying_ids.push(varying_id);
-                    let mut id = self.id_gen.next();
-
-                    if let Some(f32_ty) = self.io_f16_polyfills.get_polyfill_info(varying_id) {
-                        prelude
-                            .body
-                            .push(Instruction::load(f32_ty, id, varying_id, None));
-                        let converted = self.id_gen.next();
-                        super::f16_polyfill::F16IoPolyfill::emit_f32_to_f16_conversion(
-                            id,
-                            argument_type_id,
-                            converted,
-                            &mut prelude.body,
-                        );
-                        id = converted;
-                    } else {
-                        prelude.body.push(Instruction::load(
-                            argument_type_id,
-                            id,
-                            varying_id,
-                            None,
-                        ));
-                    }
+                    let id = self.load_io_with_f16_polyfill(
+                        &mut prelude.body,
+                        varying_id,
+                        argument_type_id,
+                    );
 
                     if binding == &crate::Binding::BuiltIn(crate::BuiltIn::LocalInvocationId) {
                         local_invocation_id = Some(id);
@@ -778,26 +761,9 @@ impl Writer {
                             binding,
                         )?;
                         iface.varying_ids.push(varying_id);
-                        let mut id = self.id_gen.next();
-                        if let Some(f32_ty) = self.io_f16_polyfills.get_polyfill_info(varying_id) {
-                            prelude
-                                .body
-                                .push(Instruction::load(f32_ty, id, varying_id, None));
-                            let converted = self.id_gen.next();
-                            super::f16_polyfill::F16IoPolyfill::emit_f32_to_f16_conversion(
-                                id,
-                                type_id,
-                                converted,
-                                &mut prelude.body,
-                            );
-                            id = converted;
-                            constituent_ids.push(id);
-                        } else {
-                            prelude
-                                .body
-                                .push(Instruction::load(type_id, id, varying_id, None));
-                            constituent_ids.push(id);
-                        }
+                        let id =
+                            self.load_io_with_f16_polyfill(&mut prelude.body, varying_id, type_id);
+                        constituent_ids.push(id);
 
                         if binding == &crate::Binding::BuiltIn(crate::BuiltIn::LocalInvocationId) {
                             local_invocation_id = Some(id);
@@ -1959,7 +1925,7 @@ impl Writer {
 
             let f32_type_id = self.get_localtype_id(f32_value_local);
             let ptr_id = self.get_pointer_type_id(f32_type_id, class);
-            self.io_f16_polyfills.register_variable(id, f32_type_id);
+            self.io_f16_polyfills.register_io_var(id, f32_type_id);
 
             ptr_id
         } else {
@@ -2172,6 +2138,49 @@ impl Writer {
         Ok(id)
     }
 
+    /// Load an IO variable, converting from `f32` to `f16` if polyfill is active.
+    /// Returns the id of the loaded value matching `target_type_id`.
+    pub(super) fn load_io_with_f16_polyfill(
+        &mut self,
+        body: &mut Vec<Instruction>,
+        varying_id: Word,
+        target_type_id: Word,
+    ) -> Word {
+        let tmp = self.id_gen.next();
+        if let Some(f32_ty) = self.io_f16_polyfills.get_f32_io_type(varying_id) {
+            body.push(Instruction::load(f32_ty, tmp, varying_id, None));
+            let converted = self.id_gen.next();
+            super::f16_polyfill::F16IoPolyfill::emit_f32_to_f16_conversion(
+                tmp,
+                target_type_id,
+                converted,
+                body,
+            );
+            converted
+        } else {
+            body.push(Instruction::load(target_type_id, tmp, varying_id, None));
+            tmp
+        }
+    }
+
+    /// Store an IO variable, converting from `f16` to `f32` if polyfill is active.
+    pub(super) fn store_io_with_f16_polyfill(
+        &mut self,
+        body: &mut Vec<Instruction>,
+        varying_id: Word,
+        value_id: Word,
+    ) {
+        if let Some(f32_ty) = self.io_f16_polyfills.get_f32_io_type(varying_id) {
+            let converted = self.id_gen.next();
+            super::f16_polyfill::F16IoPolyfill::emit_f16_to_f32_conversion(
+                value_id, f32_ty, converted, body,
+            );
+            body.push(Instruction::store(varying_id, converted, None));
+        } else {
+            body.push(Instruction::store(varying_id, value_id, None));
+        }
+    }
+
     fn write_global_variable(
         &mut self,
         ir_module: &crate::Module,

From 6efd4ba5cc43b4e1dc884e25b253a161d0660ef0 Mon Sep 17 00:00:00 2001
From: cryvosh <cryvosh@gmail.com>
Date: Thu, 7 Aug 2025 21:23:16 -0400
Subject: [PATCH 15/20] Cleanup comments

---
 naga/src/back/spv/mod.rs              | 8 ++++----
 naga/tests/naga/spirv_capabilities.rs | 6 +++---
 wgpu-hal/src/vulkan/adapter.rs        | 2 +-
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/naga/src/back/spv/mod.rs b/naga/src/back/spv/mod.rs
index 53bfd2cd845..ab3abe95515 100644
--- a/naga/src/back/spv/mod.rs
+++ b/naga/src/back/spv/mod.rs
@@ -774,8 +774,8 @@ pub struct Writer {
     ray_get_committed_intersection_function: Option<Word>,
     ray_get_candidate_intersection_function: Option<Word>,
 
-    /// F16 I/O polyfill manager for handling f16 input/output variables
-    /// when StorageInputOutput16 capability is not available.
+    /// F16 I/O polyfill manager for handling `f16` input/output variables
+    /// when `StorageInputOutput16` capability is not available.
     io_f16_polyfills: f16_polyfill::F16IoPolyfill,
 }
 
@@ -859,8 +859,8 @@ pub struct Options<'a> {
     /// to think the number of iterations is bounded.
     pub force_loop_bounding: bool,
 
-    /// Whether to use the StorageInputOutput16 capability for f16 shader I/O.
-    /// When false, f16 I/O is polyfilled using f32 types with conversions.
+    /// Whether to use the `StorageInputOutput16` capability for `f16` shader I/O.
+    /// When false, `f16` I/O is polyfilled using `f32` types with conversions.
     pub use_storage_input_output_16: bool,
 
     pub debug_info: Option<DebugInfo<'a>>,
diff --git a/naga/tests/naga/spirv_capabilities.rs b/naga/tests/naga/spirv_capabilities.rs
index aa99298273d..6d0e8153b81 100644
--- a/naga/tests/naga/spirv_capabilities.rs
+++ b/naga/tests/naga/spirv_capabilities.rs
@@ -253,7 +253,7 @@ fn f16_io_capabilities() {
         .unwrap();
     let caps_native = writer_native.get_capabilities_used();
 
-    // Should include StorageInputOutput16 for native f16 I/O
+    // Should include `StorageInputOutput16` for native `f16` I/O
     assert!(caps_native.contains(&Ca::StorageInputOutput16));
 
     // Test polyfill path: use_storage_input_output_16 = false
@@ -269,10 +269,10 @@ fn f16_io_capabilities() {
         .unwrap();
     let caps_polyfill = writer_polyfill.get_capabilities_used();
 
-    // Should not include StorageInputOutput16 when polyfilled
+    // Should not include `StorageInputOutput16` when polyfilled
     assert!(!caps_polyfill.contains(&Ca::StorageInputOutput16));
 
-    // But should still include the basic f16 capabilities
+    // But should still include the basic `f16` capabilities
     assert!(caps_polyfill.contains(&Ca::Float16));
 }
 
diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs
index 2ff2a65ca50..35d7aafb3f7 100644
--- a/wgpu-hal/src/vulkan/adapter.rs
+++ b/wgpu-hal/src/vulkan/adapter.rs
@@ -396,7 +396,7 @@ impl PhysicalDeviceFeatures {
                 _ => None,
             },
             _16bit_storage: if requested_features.contains(wgt::Features::SHADER_F16) {
-                // Check if the device actually supports storage_input_output16
+                // Check if the device actually supports `storage_input_output16`
                 let storage_input_output16_supported = phd_features
                     ._16bit_storage
                     .as_ref()

From 7098938a18b234a2abb5d0e621b61c598f6062d0 Mon Sep 17 00:00:00 2001
From: cryvosh <cryvosh@gmail.com>
Date: Thu, 7 Aug 2025 21:59:26 -0400
Subject: [PATCH 16/20] Cleanup capability check

---
 wgpu-hal/src/vulkan/adapter.rs | 29 ++++++++++++-----------------
 1 file changed, 12 insertions(+), 17 deletions(-)

diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs
index 35d7aafb3f7..ce840590c36 100644
--- a/wgpu-hal/src/vulkan/adapter.rs
+++ b/wgpu-hal/src/vulkan/adapter.rs
@@ -198,6 +198,13 @@ impl PhysicalDeviceFeatures {
         info
     }
 
+    fn supports_storage_input_output_16(&self) -> bool {
+        self._16bit_storage
+            .as_ref()
+            .map(|features| features.storage_input_output16 != 0)
+            .unwrap_or(false)
+    }
+
     /// Create a `PhysicalDeviceFeatures` that can be used to create a logical
     /// device.
     ///
@@ -396,17 +403,12 @@ impl PhysicalDeviceFeatures {
                 _ => None,
             },
             _16bit_storage: if requested_features.contains(wgt::Features::SHADER_F16) {
-                // Check if the device actually supports `storage_input_output16`
-                let storage_input_output16_supported = phd_features
-                    ._16bit_storage
-                    .as_ref()
-                    .map(|features| features.storage_input_output16 != 0)
-                    .unwrap_or(false);
-
                 Some(
                     vk::PhysicalDevice16BitStorageFeatures::default()
                         .storage_buffer16_bit_access(true)
-                        .storage_input_output16(storage_input_output16_supported)
+                        .storage_input_output16(
+                            phd_features.supports_storage_input_output_16(),
+                        )
                         .uniform_and_storage_buffer16_bit_access(true),
                 )
             } else {
@@ -2138,15 +2140,8 @@ impl super::Adapter {
                     spv::ZeroInitializeWorkgroupMemoryMode::Polyfill
                 },
                 force_loop_bounding: true,
-                use_storage_input_output_16: features.contains(wgt::Features::SHADER_F16) && {
-                    // Check if the device actually supports storage_input_output16
-                    let phd_features = self.physical_device_features(enabled_extensions, features);
-                    phd_features
-                        ._16bit_storage
-                        .as_ref()
-                        .map(|storage_features| storage_features.storage_input_output16 != 0)
-                        .unwrap_or(false)
-                },
+                use_storage_input_output_16: features.contains(wgt::Features::SHADER_F16)
+                    && self.phd_features.supports_storage_input_output_16(),
                 // We need to build this separately for each invocation, so just default it out here
                 binding_map: BTreeMap::default(),
                 debug_info: None,

From c2eb34c928b687073fdc8b04c8a13605faad8f93 Mon Sep 17 00:00:00 2001
From: cryvosh <cryvosh@gmail.com>
Date: Thu, 7 Aug 2025 22:07:43 -0400
Subject: [PATCH 17/20] fmt

---
 wgpu-hal/src/vulkan/adapter.rs | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs
index ce840590c36..a51312a8030 100644
--- a/wgpu-hal/src/vulkan/adapter.rs
+++ b/wgpu-hal/src/vulkan/adapter.rs
@@ -406,9 +406,7 @@ impl PhysicalDeviceFeatures {
                 Some(
                     vk::PhysicalDevice16BitStorageFeatures::default()
                         .storage_buffer16_bit_access(true)
-                        .storage_input_output16(
-                            phd_features.supports_storage_input_output_16(),
-                        )
+                        .storage_input_output16(phd_features.supports_storage_input_output_16())
                         .uniform_and_storage_buffer16_bit_access(true),
                 )
             } else {

From bd259cdc9708ca4ffe92b642880d002640c72dca Mon Sep 17 00:00:00 2001
From: cryvosh <cryvosh@gmail.com>
Date: Thu, 7 Aug 2025 23:50:21 -0400
Subject: [PATCH 18/20] Changelog

---
 CHANGELOG.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0dd31e77071..60d8796a400 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -61,7 +61,8 @@ By @Vecvec in [#7913](https://github.com/gfx-rs/wgpu/pull/7913).
 
 #### Naga
 
-Naga now requires that no type be larger than 1 GB. This limit may be lowered in the future; feedback on an appropriate value for the limit is welcome. By @andyleiserson in [#7950](https://github.com/gfx-rs/wgpu/pull/7950).
+- Naga now requires that no type be larger than 1 GB. This limit may be lowered in the future; feedback on an appropriate value for the limit is welcome. By @andyleiserson in [#7950](https://github.com/gfx-rs/wgpu/pull/7950).
+- Add f16 IO polyfill on Vulkan backend to enable SHADER_F16 use without requiring `storageInputOutput16`. By @cryvosh in [#7884](https://github.com/gfx-rs/wgpu/pull/7884)
 
 ### Bug Fixes
 
@@ -161,7 +162,6 @@ By @Vecvec in [#7829](https://github.com/gfx-rs/wgpu/pull/7829).
 - Diagnostic rendering methods (i.e., `naga::{front::wgsl::ParseError,WithSpan}::emit_error_to_string_with_path`) now accept more types for their `path` argument via a new sealed `AsDiagnosticFilePath` trait. By @atlv24, @bushrat011899, and @ErichDonGubler in [#7643](https://github.com/gfx-rs/wgpu/pull/7643).
 - Add support for [quad operations](https://www.w3.org/TR/WGSL/#quad-builtin-functions) (requires `SUBGROUP` feature to be enabled). By @dzamkov and @valaphee in [#7683](https://github.com/gfx-rs/wgpu/pull/7683).
 - Add support for `atomicCompareExchangeWeak` in HLSL and GLSL backends. By @cryvosh in [#7658](https://github.com/gfx-rs/wgpu/pull/7658)
-- Add f16 IO polyfill on Vulkan backend to enable SHADER_F16 use without requiring `storageInputOutput16`. By @cryvosh in [#7884](https://github.com/gfx-rs/wgpu/pull/7884)
 
 ### General
 

From 520f9878f59376309441ddefe4730e0491ddf7b1 Mon Sep 17 00:00:00 2001
From: cryvosh <cryvosh@gmail.com>
Date: Sun, 10 Aug 2025 14:41:14 -0400
Subject: [PATCH 19/20] Add test

---
 naga/src/back/spv/f16_polyfill.rs           |   4 +-
 naga/tests/in/wgsl/f16-native.wgsl          |   8 ++
 naga/tests/in/wgsl/f16-polyfill.wgsl        |   8 ++
 naga/tests/out/spv/wgsl-f16-native.spvasm   | 110 +++++++++++++++++-
 naga/tests/out/spv/wgsl-f16-polyfill.spvasm | 118 +++++++++++++++++++-
 5 files changed, 242 insertions(+), 6 deletions(-)

diff --git a/naga/src/back/spv/f16_polyfill.rs b/naga/src/back/spv/f16_polyfill.rs
index ee391f7f245..824490265af 100644
--- a/naga/src/back/spv/f16_polyfill.rs
+++ b/naga/src/back/spv/f16_polyfill.rs
@@ -1,6 +1,6 @@
 /*!
-This module provides functionality polyfills `f16` input/output variables when the
-`StorageInputOutput16` capability is not available or disabled.
+This module provides functionality for polyfilling `f16` input/output variables
+when the `StorageInputOutput16` capability is not available or disabled.
 
 It works by:
 
diff --git a/naga/tests/in/wgsl/f16-native.wgsl b/naga/tests/in/wgsl/f16-native.wgsl
index 2dea0baaa29..fda726df765 100644
--- a/naga/tests/in/wgsl/f16-native.wgsl
+++ b/naga/tests/in/wgsl/f16-native.wgsl
@@ -69,3 +69,11 @@ fn test_return_partial(input_original: F16IO) -> @location(0) f16 {
     input.scalar_f16 = 0.0h;
     return input.scalar_f16;
 }
+
+@fragment
+fn test_component_access(input: F16IO) -> F16IO {
+    var output: F16IO;
+    output.vec2_f16.x = input.vec2_f16.y;
+    output.vec2_f16.y = input.vec2_f16.x;
+    return output;
+}
\ No newline at end of file
diff --git a/naga/tests/in/wgsl/f16-polyfill.wgsl b/naga/tests/in/wgsl/f16-polyfill.wgsl
index 2dea0baaa29..fda726df765 100644
--- a/naga/tests/in/wgsl/f16-polyfill.wgsl
+++ b/naga/tests/in/wgsl/f16-polyfill.wgsl
@@ -69,3 +69,11 @@ fn test_return_partial(input_original: F16IO) -> @location(0) f16 {
     input.scalar_f16 = 0.0h;
     return input.scalar_f16;
 }
+
+@fragment
+fn test_component_access(input: F16IO) -> F16IO {
+    var output: F16IO;
+    output.vec2_f16.x = input.vec2_f16.y;
+    output.vec2_f16.y = input.vec2_f16.x;
+    return output;
+}
\ No newline at end of file
diff --git a/naga/tests/out/spv/wgsl-f16-native.spvasm b/naga/tests/out/spv/wgsl-f16-native.spvasm
index 78f1b0d9b58..43210270933 100644
--- a/naga/tests/out/spv/wgsl-f16-native.spvasm
+++ b/naga/tests/out/spv/wgsl-f16-native.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.1
 ; Generator: rspirv
-; Bound: 273
+; Bound: 318
 OpCapability Shader
 OpCapability Float16
 OpCapability StorageBuffer16BitAccess
@@ -14,10 +14,12 @@ OpEntryPoint Fragment %54 "test_direct" %14 %17 %20 %23 %26 %29 %32 %35 %38 %40
 OpEntryPoint Fragment %136 "test_struct" %112 %114 %116 %118 %120 %122 %124 %126 %128 %129 %130 %131 %132 %133 %134 %135
 OpEntryPoint Fragment %199 "test_copy_input" %175 %177 %179 %181 %183 %185 %187 %189 %191 %192 %193 %194 %195 %196 %197 %198
 OpEntryPoint Fragment %265 "test_return_partial" %248 %250 %252 %254 %256 %258 %260 %262 %264
+OpEntryPoint Fragment %299 "test_component_access" %275 %277 %279 %281 %283 %285 %287 %289 %291 %292 %293 %294 %295 %296 %297 %298
 OpExecutionMode %54 OriginUpperLeft
 OpExecutionMode %136 OriginUpperLeft
 OpExecutionMode %199 OriginUpperLeft
 OpExecutionMode %265 OriginUpperLeft
+OpExecutionMode %299 OriginUpperLeft
 %3 = OpString "f16-native.wgsl"
 OpSource Unknown 0 %3 "enable f16;
 
@@ -90,7 +92,14 @@ fn test_return_partial(input_original: F16IO) -> @location(0) f16 {
     input.scalar_f16 = 0.0h;
     return input.scalar_f16;
 }
-"
+
+@fragment
+fn test_component_access(input: F16IO) -> F16IO {
+    var output: F16IO;
+    output.vec2_f16.x = input.vec2_f16.y;
+    output.vec2_f16.y = input.vec2_f16.x;
+    return output;
+}"
 OpMemberName %12 0 "scalar_f16"
 OpMemberName %12 1 "scalar_f32"
 OpMemberName %12 2 "vec2_f16"
@@ -165,6 +174,24 @@ OpName %260 "vec4_f16"
 OpName %262 "vec4_f32"
 OpName %265 "test_return_partial"
 OpName %267 "input"
+OpName %275 "scalar_f16"
+OpName %277 "scalar_f32"
+OpName %279 "vec2_f16"
+OpName %281 "vec2_f32"
+OpName %283 "vec3_f16"
+OpName %285 "vec3_f32"
+OpName %287 "vec4_f16"
+OpName %289 "vec4_f32"
+OpName %291 "scalar_f16"
+OpName %292 "scalar_f32"
+OpName %293 "vec2_f16"
+OpName %294 "vec2_f32"
+OpName %295 "vec3_f16"
+OpName %296 "vec3_f32"
+OpName %297 "vec4_f16"
+OpName %298 "vec4_f32"
+OpName %299 "test_component_access"
+OpName %300 "output"
 OpMemberDecorate %12 0 Offset 0
 OpMemberDecorate %12 1 Offset 4
 OpMemberDecorate %12 2 Offset 8
@@ -230,6 +257,22 @@ OpDecorate %258 Location 5
 OpDecorate %260 Location 6
 OpDecorate %262 Location 7
 OpDecorate %264 Location 0
+OpDecorate %275 Location 0
+OpDecorate %277 Location 1
+OpDecorate %279 Location 2
+OpDecorate %281 Location 3
+OpDecorate %283 Location 4
+OpDecorate %285 Location 5
+OpDecorate %287 Location 6
+OpDecorate %289 Location 7
+OpDecorate %291 Location 0
+OpDecorate %292 Location 1
+OpDecorate %293 Location 2
+OpDecorate %294 Location 3
+OpDecorate %295 Location 4
+OpDecorate %296 Location 5
+OpDecorate %297 Location 6
+OpDecorate %298 Location 7
 %2 = OpTypeVoid
 %4 = OpTypeFloat 16
 %5 = OpTypeFloat 32
@@ -346,6 +389,23 @@ OpDecorate %264 Location 0
 %264 = OpVariable  %39  Output
 %266 = OpConstant  %4  0
 %268 = OpConstantNull  %12
+%275 = OpVariable  %15  Input
+%277 = OpVariable  %18  Input
+%279 = OpVariable  %21  Input
+%281 = OpVariable  %24  Input
+%283 = OpVariable  %27  Input
+%285 = OpVariable  %30  Input
+%287 = OpVariable  %33  Input
+%289 = OpVariable  %36  Input
+%291 = OpVariable  %39  Output
+%292 = OpVariable  %41  Output
+%293 = OpVariable  %43  Output
+%294 = OpVariable  %45  Output
+%295 = OpVariable  %47  Output
+%296 = OpVariable  %49  Output
+%297 = OpVariable  %51  Output
+%298 = OpVariable  %53  Output
+%301 = OpConstantNull  %12
 %54 = OpFunction  %2  None %55
 %13 = OpLabel
 %64 = OpVariable  %65  Function %66
@@ -652,4 +712,50 @@ OpLine %3 70 12
 %272 = OpLoad  %4  %271
 OpStore %264 %272
 OpReturn
+OpFunctionEnd
+%299 = OpFunction  %2  None %55
+%273 = OpLabel
+%300 = OpVariable  %65  Function %301
+%276 = OpLoad  %4  %275
+%278 = OpLoad  %5  %277
+%280 = OpLoad  %6  %279
+%282 = OpLoad  %7  %281
+%284 = OpLoad  %8  %283
+%286 = OpLoad  %9  %285
+%288 = OpLoad  %10  %287
+%290 = OpLoad  %11  %289
+%274 = OpCompositeConstruct  %12  %276 %278 %280 %282 %284 %286 %288 %290
+OpBranch %302
+%302 = OpLabel
+OpLine %3 76 5
+%303 = OpCompositeExtract  %6  %274 2
+%304 = OpCompositeExtract  %4  %303 1
+OpLine %3 76 5
+%305 = OpAccessChain  %68  %300 %79 %70
+OpStore %305 %304
+OpLine %3 77 5
+%306 = OpCompositeExtract  %6  %274 2
+%307 = OpCompositeExtract  %4  %306 0
+OpLine %3 77 5
+%308 = OpAccessChain  %68  %300 %79 %75
+OpStore %308 %307
+OpLine %3 1 1
+%309 = OpLoad  %12  %300
+%310 = OpCompositeExtract  %4  %309 0
+OpStore %291 %310
+%311 = OpCompositeExtract  %5  %309 1
+OpStore %292 %311
+%312 = OpCompositeExtract  %6  %309 2
+OpStore %293 %312
+%313 = OpCompositeExtract  %7  %309 3
+OpStore %294 %313
+%314 = OpCompositeExtract  %8  %309 4
+OpStore %295 %314
+%315 = OpCompositeExtract  %9  %309 5
+OpStore %296 %315
+%316 = OpCompositeExtract  %10  %309 6
+OpStore %297 %316
+%317 = OpCompositeExtract  %11  %309 7
+OpStore %298 %317
+OpReturn
 OpFunctionEnd
\ No newline at end of file
diff --git a/naga/tests/out/spv/wgsl-f16-polyfill.spvasm b/naga/tests/out/spv/wgsl-f16-polyfill.spvasm
index 8b6b111750f..d673816a486 100644
--- a/naga/tests/out/spv/wgsl-f16-polyfill.spvasm
+++ b/naga/tests/out/spv/wgsl-f16-polyfill.spvasm
@@ -1,7 +1,7 @@
 ; SPIR-V
 ; Version: 1.1
 ; Generator: rspirv
-; Bound: 294
+; Bound: 347
 OpCapability Shader
 OpCapability Float16
 OpCapability StorageBuffer16BitAccess
@@ -13,10 +13,12 @@ OpEntryPoint Fragment %50 "test_direct" %14 %18 %20 %24 %26 %30 %32 %36 %38 %40
 OpEntryPoint Fragment %140 "test_struct" %112 %115 %117 %120 %122 %125 %127 %130 %132 %133 %134 %135 %136 %137 %138 %139
 OpEntryPoint Fragment %211 "test_copy_input" %183 %186 %188 %191 %193 %196 %198 %201 %203 %204 %205 %206 %207 %208 %209 %210
 OpEntryPoint Fragment %285 "test_return_partial" %264 %267 %269 %272 %274 %277 %279 %282 %284
+OpEntryPoint Fragment %324 "test_component_access" %296 %299 %301 %304 %306 %309 %311 %314 %316 %317 %318 %319 %320 %321 %322 %323
 OpExecutionMode %50 OriginUpperLeft
 OpExecutionMode %140 OriginUpperLeft
 OpExecutionMode %211 OriginUpperLeft
 OpExecutionMode %285 OriginUpperLeft
+OpExecutionMode %324 OriginUpperLeft
 %3 = OpString "f16-polyfill.wgsl"
 OpSource Unknown 0 %3 "enable f16;
 
@@ -89,7 +91,14 @@ fn test_return_partial(input_original: F16IO) -> @location(0) f16 {
     input.scalar_f16 = 0.0h;
     return input.scalar_f16;
 }
-"
+
+@fragment
+fn test_component_access(input: F16IO) -> F16IO {
+    var output: F16IO;
+    output.vec2_f16.x = input.vec2_f16.y;
+    output.vec2_f16.y = input.vec2_f16.x;
+    return output;
+}"
 OpMemberName %12 0 "scalar_f16"
 OpMemberName %12 1 "scalar_f32"
 OpMemberName %12 2 "vec2_f16"
@@ -164,6 +173,24 @@ OpName %279 "vec4_f16"
 OpName %282 "vec4_f32"
 OpName %285 "test_return_partial"
 OpName %287 "input"
+OpName %296 "scalar_f16"
+OpName %299 "scalar_f32"
+OpName %301 "vec2_f16"
+OpName %304 "vec2_f32"
+OpName %306 "vec3_f16"
+OpName %309 "vec3_f32"
+OpName %311 "vec4_f16"
+OpName %314 "vec4_f32"
+OpName %316 "scalar_f16"
+OpName %317 "scalar_f32"
+OpName %318 "vec2_f16"
+OpName %319 "vec2_f32"
+OpName %320 "vec3_f16"
+OpName %321 "vec3_f32"
+OpName %322 "vec4_f16"
+OpName %323 "vec4_f32"
+OpName %324 "test_component_access"
+OpName %325 "output"
 OpMemberDecorate %12 0 Offset 0
 OpMemberDecorate %12 1 Offset 4
 OpMemberDecorate %12 2 Offset 8
@@ -229,6 +256,22 @@ OpDecorate %277 Location 5
 OpDecorate %279 Location 6
 OpDecorate %282 Location 7
 OpDecorate %284 Location 0
+OpDecorate %296 Location 0
+OpDecorate %299 Location 1
+OpDecorate %301 Location 2
+OpDecorate %304 Location 3
+OpDecorate %306 Location 4
+OpDecorate %309 Location 5
+OpDecorate %311 Location 6
+OpDecorate %314 Location 7
+OpDecorate %316 Location 0
+OpDecorate %317 Location 1
+OpDecorate %318 Location 2
+OpDecorate %319 Location 3
+OpDecorate %320 Location 4
+OpDecorate %321 Location 5
+OpDecorate %322 Location 6
+OpDecorate %323 Location 7
 %2 = OpTypeVoid
 %4 = OpTypeFloat 16
 %5 = OpTypeFloat 32
@@ -337,6 +380,23 @@ OpDecorate %284 Location 0
 %284 = OpVariable  %39  Output
 %286 = OpConstant  %4  0
 %288 = OpConstantNull  %12
+%296 = OpVariable  %15  Input
+%299 = OpVariable  %15  Input
+%301 = OpVariable  %21  Input
+%304 = OpVariable  %21  Input
+%306 = OpVariable  %27  Input
+%309 = OpVariable  %27  Input
+%311 = OpVariable  %33  Input
+%314 = OpVariable  %33  Input
+%316 = OpVariable  %39  Output
+%317 = OpVariable  %39  Output
+%318 = OpVariable  %42  Output
+%319 = OpVariable  %42  Output
+%320 = OpVariable  %45  Output
+%321 = OpVariable  %45  Output
+%322 = OpVariable  %48  Output
+%323 = OpVariable  %48  Output
+%326 = OpConstantNull  %12
 %50 = OpFunction  %2  None %51
 %13 = OpLabel
 %60 = OpVariable  %61  Function %62
@@ -672,4 +732,58 @@ OpLine %3 70 12
 %293 = OpFConvert  %5  %292
 OpStore %284 %293
 OpReturn
+OpFunctionEnd
+%324 = OpFunction  %2  None %51
+%294 = OpLabel
+%325 = OpVariable  %61  Function %326
+%297 = OpLoad  %5  %296
+%298 = OpFConvert  %4  %297
+%300 = OpLoad  %5  %299
+%302 = OpLoad  %7  %301
+%303 = OpFConvert  %6  %302
+%305 = OpLoad  %7  %304
+%307 = OpLoad  %9  %306
+%308 = OpFConvert  %8  %307
+%310 = OpLoad  %9  %309
+%312 = OpLoad  %11  %311
+%313 = OpFConvert  %10  %312
+%315 = OpLoad  %11  %314
+%295 = OpCompositeConstruct  %12  %298 %300 %303 %305 %308 %310 %313 %315
+OpBranch %327
+%327 = OpLabel
+OpLine %3 76 5
+%328 = OpCompositeExtract  %6  %295 2
+%329 = OpCompositeExtract  %4  %328 1
+OpLine %3 76 5
+%330 = OpAccessChain  %64  %325 %75 %66
+OpStore %330 %329
+OpLine %3 77 5
+%331 = OpCompositeExtract  %6  %295 2
+%332 = OpCompositeExtract  %4  %331 0
+OpLine %3 77 5
+%333 = OpAccessChain  %64  %325 %75 %71
+OpStore %333 %332
+OpLine %3 1 1
+%334 = OpLoad  %12  %325
+%335 = OpCompositeExtract  %4  %334 0
+%336 = OpFConvert  %5  %335
+OpStore %316 %336
+%337 = OpCompositeExtract  %5  %334 1
+OpStore %317 %337
+%338 = OpCompositeExtract  %6  %334 2
+%339 = OpFConvert  %7  %338
+OpStore %318 %339
+%340 = OpCompositeExtract  %7  %334 3
+OpStore %319 %340
+%341 = OpCompositeExtract  %8  %334 4
+%342 = OpFConvert  %9  %341
+OpStore %320 %342
+%343 = OpCompositeExtract  %9  %334 5
+OpStore %321 %343
+%344 = OpCompositeExtract  %10  %334 6
+%345 = OpFConvert  %11  %344
+OpStore %322 %345
+%346 = OpCompositeExtract  %11  %334 7
+OpStore %323 %346
+OpReturn
 OpFunctionEnd
\ No newline at end of file

From 00a72d31dde191da3f4a78c04cbb97a030be8b7e Mon Sep 17 00:00:00 2001
From: Teodor Tanasoaia <28601907+teoxoy@users.noreply.github.com>
Date: Thu, 21 Aug 2025 10:32:26 +0200
Subject: [PATCH 20/20] rem unneeded change

---
 naga/src/back/spv/block.rs | 15 ---------------
 1 file changed, 15 deletions(-)

diff --git a/naga/src/back/spv/block.rs b/naga/src/back/spv/block.rs
index fa6768d3c33..6f4e5d12574 100644
--- a/naga/src/back/spv/block.rs
+++ b/naga/src/back/spv/block.rs
@@ -2313,21 +2313,6 @@ impl BlockContext<'_> {
         match self.write_access_chain(pointer, block, access_type_adjustment)? {
             ExpressionPointer::Ready { pointer_id } => {
                 let id = self.gen_id();
-
-                if self
-                    .writer
-                    .io_f16_polyfills
-                    .get_f32_io_type(pointer_id)
-                    .is_some()
-                {
-                    let converted = self.writer.load_io_with_f16_polyfill(
-                        &mut block.body,
-                        pointer_id,
-                        result_type_id,
-                    );
-                    return Ok(converted);
-                }
-
                 let atomic_space =
                     match *self.fun_info[pointer].ty.inner_with(&self.ir_module.types) {
                         crate::TypeInner::Pointer { base, space } => {