Skip to content

[naga spv-out] Add f16 io polyfill #7884

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: trunk
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ By @Vecvec in [#7829](https://github.com/gfx-rs/wgpu/pull/7829).
- Diagnostic rendering methods (i.e., `naga::{front::wgsl::ParseError,WithSpan}::emit_error_to_string_with_path`) now accept more types for their `path` argument via a new sealed `AsDiagnosticFilePath` trait. By @atlv24, @bushrat011899, and @ErichDonGubler in [#7643](https://github.com/gfx-rs/wgpu/pull/7643).
- Add support for [quad operations](https://www.w3.org/TR/WGSL/#quad-builtin-functions) (requires `SUBGROUP` feature to be enabled). By @dzamkov and @valaphee in [#7683](https://github.com/gfx-rs/wgpu/pull/7683).
- Add support for `atomicCompareExchangeWeak` in HLSL and GLSL backends. By @cryvosh in [#7658](https://github.com/gfx-rs/wgpu/pull/7658)
- Add f16 IO polyfill on Vulkan backend to enable SHADER_F16 use without requiring `storageInputOutput16`. By @cryvosh in [#7884](https://github.com/gfx-rs/wgpu/pull/7884)

### General

Expand Down
30 changes: 29 additions & 1 deletion naga/src/back/spv/block.rs
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,18 @@ impl Writer {
}
};

body.push(Instruction::store(res_member.id, member_value_id, None));
if let Some((f32_ty, _)) = self.io_f16_polyfills.get_polyfill_info(res_member.id) {
let converted = self.id_gen.next();
super::f16_polyfill::F16IoPolyfill::emit_f16_to_f32_conversion(
member_value_id,
f32_ty,
converted,
body,
);
body.push(Instruction::store(res_member.id, converted, None));
} else {
body.push(Instruction::store(res_member.id, member_value_id, None));
}

match res_member.built_in {
Some(crate::BuiltIn::Position { .. })
Expand Down Expand Up @@ -2313,6 +2324,23 @@ impl BlockContext<'_> {
match self.write_access_chain(pointer, block, access_type_adjustment)? {
ExpressionPointer::Ready { pointer_id } => {
let id = self.gen_id();

if let Some((f32_ty, _)) =
self.writer.io_f16_polyfills.get_polyfill_info(pointer_id)
{
block
.body
.push(Instruction::load(f32_ty, id, pointer_id, None));
let converted = self.gen_id();
super::f16_polyfill::F16IoPolyfill::emit_f32_to_f16_conversion(
id,
result_type_id,
converted,
&mut block.body,
);
return Ok(converted);
}

let atomic_space =
match *self.fun_info[pointer].ty.inner_with(&self.ir_module.types) {
crate::TypeInner::Pointer { base, space } => {
Expand Down
105 changes: 105 additions & 0 deletions naga/src/back/spv/f16_polyfill.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
/*!
This module provides functionality polyfills `f16` input/output variables when the
`StorageInputOutput16` capability is not available or disabled.

It works by:

1. Declaring `f16` I/O variables as `f32` in SPIR-V
2. Converting between `f16` and `f32` at runtime using `OpFConvert`
3. Maintaining mappings to track which variables need conversion
*/

use crate::back::spv::{Instruction, LocalType, NumericType, Word};
use alloc::vec::Vec;

/// Manages `f16` I/O polyfill state and operations.
#[derive(Default)]
pub(in crate::back::spv) struct F16IoPolyfill {
use_native: bool,
variable_map: crate::FastHashMap<Word, (Word, Word)>,
}

impl F16IoPolyfill {
pub fn new(use_storage_input_output_16: bool) -> Self {
Self {
use_native: use_storage_input_output_16,
variable_map: crate::FastHashMap::default(),
}
}

pub fn needs_polyfill(&self, ty_inner: &crate::TypeInner) -> bool {
use crate::{ScalarKind as Sk, TypeInner};

!self.use_native
&& match *ty_inner {
TypeInner::Scalar(ref s) if s.kind == Sk::Float && s.width == 2 => true,
TypeInner::Vector { scalar, .. }
if scalar.kind == Sk::Float && scalar.width == 2 =>
{
true
}
_ => false,
}
}

pub fn register_variable(&mut self, variable_id: Word, f32_type_id: Word, f16_type_id: Word) {
self.variable_map
.insert(variable_id, (f32_type_id, f16_type_id));
}
Comment on lines +45 to +48
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

question: I noticed that f16_type_id is never used when recalled via get_polyfill_info. Is this a bug, or just some data that can be eliminated?


pub fn get_polyfill_info(&self, variable_id: Word) -> Option<(Word, Word)> {
self.variable_map.get(&variable_id).copied()
}

pub fn emit_f16_to_f32_conversion(
f16_value_id: Word,
f32_type_id: Word,
converted_id: Word,
body: &mut Vec<Instruction>,
) {
body.push(Instruction::unary(
spirv::Op::FConvert,
f32_type_id,
converted_id,
f16_value_id,
));
}

pub fn emit_f32_to_f16_conversion(
f32_value_id: Word,
f16_type_id: Word,
converted_id: Word,
body: &mut Vec<Instruction>,
) {
body.push(Instruction::unary(
spirv::Op::FConvert,
f16_type_id,
converted_id,
f32_value_id,
));
}

pub fn create_polyfill_type(ty_inner: &crate::TypeInner) -> Option<LocalType> {
use crate::{ScalarKind as Sk, TypeInner};

match *ty_inner {
TypeInner::Scalar(ref s) if s.kind == Sk::Float && s.width == 2 => {
Some(LocalType::Numeric(NumericType::Scalar(crate::Scalar::F32)))
}
TypeInner::Vector { size, scalar } if scalar.kind == Sk::Float && scalar.width == 2 => {
Some(LocalType::Numeric(NumericType::Vector {
size,
scalar: crate::Scalar::F32,
}))
}
_ => None,
}
}
}

impl crate::back::spv::recyclable::Recyclable for F16IoPolyfill {
fn recycle(mut self) -> Self {
self.variable_map = self.variable_map.recycle();
self
}
}
11 changes: 11 additions & 0 deletions naga/src/back/spv/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ Backend for [SPIR-V][spv] (Standard Portable Intermediate Representation).
*/

mod block;
mod f16_polyfill;
mod helpers;
mod image;
mod index;
Expand Down Expand Up @@ -744,6 +745,7 @@ pub struct Writer {
bounds_check_policies: BoundsCheckPolicies,
zero_initialize_workgroup_memory: ZeroInitializeWorkgroupMemoryMode,
force_loop_bounding: bool,
use_storage_input_output_16: bool,
void_type: Word,
//TODO: convert most of these into vectors, addressable by handle indices
lookup_type: crate::FastHashMap<LookupType, Word>,
Expand All @@ -770,6 +772,10 @@ pub struct Writer {

ray_get_committed_intersection_function: Option<Word>,
ray_get_candidate_intersection_function: Option<Word>,

/// F16 I/O polyfill manager for handling f16 input/output variables
/// when StorageInputOutput16 capability is not available.
io_f16_polyfills: f16_polyfill::F16IoPolyfill,
}

bitflags::bitflags! {
Expand Down Expand Up @@ -852,6 +858,10 @@ pub struct Options<'a> {
/// to think the number of iterations is bounded.
pub force_loop_bounding: bool,

/// Whether to use the StorageInputOutput16 capability for f16 shader I/O.
/// When false, f16 I/O is polyfilled using f32 types with conversions.
pub use_storage_input_output_16: bool,

pub debug_info: Option<DebugInfo<'a>>,
}

Expand All @@ -871,6 +881,7 @@ impl Default for Options<'_> {
bounds_check_policies: BoundsCheckPolicies::default(),
zero_initialize_workgroup_memory: ZeroInitializeWorkgroupMemoryMode::Polyfill,
force_loop_bounding: true,
use_storage_input_output_16: true,
debug_info: None,
}
}
Expand Down
93 changes: 80 additions & 13 deletions naga/src/back/spv/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ impl Writer {
bounds_check_policies: options.bounds_check_policies,
zero_initialize_workgroup_memory: options.zero_initialize_workgroup_memory,
force_loop_bounding: options.force_loop_bounding,
use_storage_input_output_16: options.use_storage_input_output_16,
void_type,
lookup_type: crate::FastHashMap::default(),
lookup_function: crate::FastHashMap::default(),
Expand All @@ -92,6 +93,9 @@ impl Writer {
temp_list: Vec::new(),
ray_get_committed_intersection_function: None,
ray_get_candidate_intersection_function: None,
io_f16_polyfills: super::f16_polyfill::F16IoPolyfill::new(
options.use_storage_input_output_16,
),
})
}

Expand Down Expand Up @@ -125,6 +129,7 @@ impl Writer {
bounds_check_policies: self.bounds_check_policies,
zero_initialize_workgroup_memory: self.zero_initialize_workgroup_memory,
force_loop_bounding: self.force_loop_bounding,
use_storage_input_output_16: self.use_storage_input_output_16,
capabilities_available: take(&mut self.capabilities_available),
binding_map: take(&mut self.binding_map),

Expand All @@ -151,6 +156,7 @@ impl Writer {
temp_list: take(&mut self.temp_list).recycle(),
ray_get_candidate_intersection_function: None,
ray_get_committed_intersection_function: None,
io_f16_polyfills: take(&mut self.io_f16_polyfills).recycle(),
};

*self = fresh;
Expand Down Expand Up @@ -726,10 +732,28 @@ impl Writer {
binding,
)?;
iface.varying_ids.push(varying_id);
let id = self.id_gen.next();
prelude
.body
.push(Instruction::load(argument_type_id, id, varying_id, None));
let mut id = self.id_gen.next();

if let Some((f32_ty, _)) = self.io_f16_polyfills.get_polyfill_info(varying_id) {
prelude
.body
.push(Instruction::load(f32_ty, id, varying_id, None));
let converted = self.id_gen.next();
super::f16_polyfill::F16IoPolyfill::emit_f32_to_f16_conversion(
id,
argument_type_id,
converted,
&mut prelude.body,
);
id = converted;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

question: You have an id = converted; statement here, but not in the other branch where we handle a Struct case as fallback. Should that be there, too?

} else {
prelude.body.push(Instruction::load(
argument_type_id,
id,
varying_id,
None,
));
}

if binding == &crate::Binding::BuiltIn(crate::BuiltIn::LocalInvocationId) {
local_invocation_id = Some(id);
Expand All @@ -755,10 +779,26 @@ impl Writer {
)?;
iface.varying_ids.push(varying_id);
let id = self.id_gen.next();
prelude
.body
.push(Instruction::load(type_id, id, varying_id, None));
constituent_ids.push(id);
if let Some((f32_ty, _)) =
self.io_f16_polyfills.get_polyfill_info(varying_id)
{
prelude
.body
.push(Instruction::load(f32_ty, id, varying_id, None));
let converted = self.id_gen.next();
super::f16_polyfill::F16IoPolyfill::emit_f32_to_f16_conversion(
id,
type_id,
converted,
&mut prelude.body,
);
constituent_ids.push(converted);
} else {
prelude
.body
.push(Instruction::load(type_id, id, varying_id, None));
constituent_ids.push(id);
}

if binding == &crate::Binding::BuiltIn(crate::BuiltIn::GlobalInvocationId) {
local_invocation_id = Some(id);
Expand Down Expand Up @@ -1220,8 +1260,10 @@ impl Writer {
.insert(spirv::Capability::StorageBuffer16BitAccess);
self.capabilities_used
.insert(spirv::Capability::UniformAndStorageBuffer16BitAccess);
self.capabilities_used
.insert(spirv::Capability::StorageInputOutput16);
if self.use_storage_input_output_16 {
self.capabilities_used
.insert(spirv::Capability::StorageInputOutput16);
}
}
Instruction::type_float(id, bits)
}
Expand Down Expand Up @@ -1904,8 +1946,28 @@ impl Writer {
ty: Handle<crate::Type>,
binding: &crate::Binding,
) -> Result<Word, Error> {
use crate::TypeInner;

let id = self.id_gen.next();
let pointer_type_id = self.get_handle_pointer_type_id(ty, class);
let ty_inner = &ir_module.types[ty].inner;
let needs_polyfill = self.needs_f16_polyfill(ty_inner);

let pointer_type_id = if needs_polyfill {
let f32_value_local =
super::f16_polyfill::F16IoPolyfill::create_polyfill_type(ty_inner)
.expect("needs_polyfill returned true but create_polyfill_type returned None");

let f32_type_id = self.get_localtype_id(f32_value_local);
let ptr_id = self.get_pointer_type_id(f32_type_id, class);
let f16_type_id = self.get_handle_type_id(ty);
self.io_f16_polyfills
.register_variable(id, f32_type_id, f16_type_id);

ptr_id
} else {
self.get_handle_pointer_type_id(ty, class)
};

Instruction::variable(pointer_type_id, id, class, None)
.to_words(&mut self.logical_layout.declarations);

Expand Down Expand Up @@ -2088,8 +2150,9 @@ impl Writer {
// > shader, must be decorated Flat
if class == spirv::StorageClass::Input && stage == crate::ShaderStage::Fragment {
let is_flat = match ir_module.types[ty].inner {
crate::TypeInner::Scalar(scalar)
| crate::TypeInner::Vector { scalar, .. } => match scalar.kind {
TypeInner::Scalar(scalar) | TypeInner::Vector { scalar, .. } => match scalar
.kind
{
Sk::Uint | Sk::Sint | Sk::Bool => true,
Sk::Float => false,
Sk::AbstractInt | Sk::AbstractFloat => {
Expand Down Expand Up @@ -2584,6 +2647,10 @@ impl Writer {
self.decorate(id, spirv::Decoration::NonUniform, &[]);
Ok(())
}

pub(super) fn needs_f16_polyfill(&self, ty_inner: &crate::TypeInner) -> bool {
self.io_f16_polyfills.needs_polyfill(ty_inner)
}
}

#[test]
Expand Down
13 changes: 13 additions & 0 deletions naga/tests/in/wgsl/f16-native.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
targets = "SPIRV"
god_mode = true

[spv]
debug = true
version = [1, 1]
use_storage_input_output_16 = true
capabilities = ["Float16"]

[bounds_check_policies]
index = "ReadZeroSkipWrite"
buffer = "ReadZeroSkipWrite"
image = "ReadZeroSkipWrite"
Loading