diff --git a/CHANGELOG.md b/CHANGELOG.md index 5165006631a..2d44fd5847e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -57,6 +57,7 @@ Bottom level categories: #### Validation - Add clip distances validation for `maxInterStageShaderVariables`. By @ErichDonGubler in [8762](https://github.com/gfx-rs/wgpu/pull/8762). This may break some existing programs, but it compiles with the WebGPU spec. +- Bring immediates in line with webgpu spec. By @atlv24 in [#9280](https://github.com/gfx-rs/wgpu/pull/9280). ### Bug Fixes diff --git a/examples/features/src/ray_shadows/mod.rs b/examples/features/src/ray_shadows/mod.rs index da7bd8a47ef..68203043ce5 100644 --- a/examples/features/src/ray_shadows/mod.rs +++ b/examples/features/src/ray_shadows/mod.rs @@ -94,7 +94,7 @@ impl crate::framework::Example for Example { fn required_limits() -> wgpu::Limits { wgpu::Limits { - max_immediate_size: 16, + max_immediate_size: 12, ..wgpu::Limits::default() } .using_minimum_supported_acceleration_structure_values() @@ -186,7 +186,7 @@ impl crate::framework::Example for Example { let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor { label: None, bind_group_layouts: &[Some(&bind_group_layout)], - immediate_size: 16, + immediate_size: 12, }); let pipeline = device.create_render_pipeline(&wgpu::RenderPipelineDescriptor { diff --git a/examples/features/src/ray_shadows/shader.wgsl b/examples/features/src/ray_shadows/shader.wgsl index 0bb430b75c3..7cb8faa44a2 100644 --- a/examples/features/src/ray_shadows/shader.wgsl +++ b/examples/features/src/ray_shadows/shader.wgsl @@ -37,7 +37,6 @@ var acc_struct: acceleration_structure; struct ImmediateData { light: vec3, - padding: f32, } var pc: ImmediateData; diff --git a/tests/tests/wgpu-validation/api/immediates.rs b/tests/tests/wgpu-validation/api/immediates.rs new file mode 100644 index 00000000000..eddc6f75ac0 --- /dev/null +++ b/tests/tests/wgpu-validation/api/immediates.rs @@ -0,0 +1,360 @@ +//! Validation tests for `var` + +use wgpu_test::fail; + +const COMPUTE_SHADER: &str = " + var im: vec4; + + @group(0) @binding(0) + var output: vec4; + + @compute @workgroup_size(1) + fn main() { + output = im; + } +"; + +fn setup_compute() -> ( + wgpu::Device, + wgpu::Queue, + wgpu::ComputePipeline, + wgpu::BindGroup, +) { + let (device, queue) = wgpu::Device::noop(&wgpu::DeviceDescriptor { + required_features: wgpu::Features::IMMEDIATES, + required_limits: wgpu::Limits { + max_immediate_size: 64, + ..Default::default() + }, + ..Default::default() + }); + + let sm = device.create_shader_module(wgpu::ShaderModuleDescriptor { + label: None, + source: wgpu::ShaderSource::Wgsl(COMPUTE_SHADER.into()), + }); + + let bgl = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { + label: None, + entries: &[wgpu::BindGroupLayoutEntry { + binding: 0, + visibility: wgpu::ShaderStages::COMPUTE, + ty: wgpu::BindingType::Buffer { + ty: wgpu::BufferBindingType::Storage { read_only: false }, + has_dynamic_offset: false, + min_binding_size: None, + }, + count: None, + }], + }); + + let layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor { + label: None, + bind_group_layouts: &[Some(&bgl)], + immediate_size: 16, + }); + + let pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor { + label: None, + layout: Some(&layout), + module: &sm, + entry_point: Some("main"), + compilation_options: Default::default(), + cache: None, + }); + + let buffer = device.create_buffer(&wgpu::BufferDescriptor { + label: None, + size: 16, + usage: wgpu::BufferUsages::STORAGE, + mapped_at_creation: false, + }); + + let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor { + label: None, + layout: &bgl, + entries: &[wgpu::BindGroupEntry { + binding: 0, + resource: buffer.as_entire_binding(), + }], + }); + + (device, queue, pipeline, bind_group) +} + +#[test] +fn dispatch_without_setting_immediates_fails() { + let (device, _queue, pipeline, bind_group) = setup_compute(); + + let mut encoder = device.create_command_encoder(&Default::default()); + { + let mut pass = encoder.begin_compute_pass(&Default::default()); + pass.set_pipeline(&pipeline); + pass.set_bind_group(0, &bind_group, &[]); + pass.dispatch_workgroups(1, 1, 1); + } + fail(&device, || encoder.finish(), Some("immediate data slots")); +} + +#[test] +fn dispatch_with_partial_immediates_fails() { + let (device, _queue, pipeline, bind_group) = setup_compute(); + + let mut encoder = device.create_command_encoder(&Default::default()); + { + let mut pass = encoder.begin_compute_pass(&Default::default()); + pass.set_pipeline(&pipeline); + pass.set_bind_group(0, &bind_group, &[]); + pass.set_immediates(0, &[0u8; 8]); + pass.dispatch_workgroups(1, 1, 1); + } + fail(&device, || encoder.finish(), Some("immediate data slots")); +} + +#[test] +fn dispatch_with_all_immediates_set_succeeds() { + let (device, _queue, pipeline, bind_group) = setup_compute(); + + let mut encoder = device.create_command_encoder(&Default::default()); + { + let mut pass = encoder.begin_compute_pass(&Default::default()); + pass.set_pipeline(&pipeline); + pass.set_bind_group(0, &bind_group, &[]); + pass.set_immediates(0, &[0u8; 16]); + pass.dispatch_workgroups(1, 1, 1); + } + wgpu_test::valid(&device, || encoder.finish()); +} + +#[test] +fn dispatch_with_incremental_immediates_succeeds() { + let (device, _queue, pipeline, bind_group) = setup_compute(); + + let mut encoder = device.create_command_encoder(&Default::default()); + { + let mut pass = encoder.begin_compute_pass(&Default::default()); + pass.set_pipeline(&pipeline); + pass.set_bind_group(0, &bind_group, &[]); + pass.set_immediates(0, &[0u8; 8]); + pass.set_immediates(8, &[0u8; 8]); + pass.dispatch_workgroups(1, 1, 1); + } + wgpu_test::valid(&device, || encoder.finish()); +} + +const STRUCT_SHADER: &str = " + struct S { + a: f32, + // 12 bytes padding + b: vec4, + } + var im: S; + + @group(0) @binding(0) + var output: vec4; + + @compute @workgroup_size(1) + fn main() { + output = im.b; + } +"; + +#[test] +fn struct_padding_slots_not_required() { + let (device, _q) = wgpu::Device::noop(&wgpu::DeviceDescriptor { + required_features: wgpu::Features::IMMEDIATES, + required_limits: wgpu::Limits { + max_immediate_size: 64, + ..Default::default() + }, + ..Default::default() + }); + + let sm = device.create_shader_module(wgpu::ShaderModuleDescriptor { + label: None, + source: wgpu::ShaderSource::Wgsl(STRUCT_SHADER.into()), + }); + + let bgl = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { + label: None, + entries: &[wgpu::BindGroupLayoutEntry { + binding: 0, + visibility: wgpu::ShaderStages::COMPUTE, + ty: wgpu::BindingType::Buffer { + ty: wgpu::BufferBindingType::Storage { read_only: false }, + has_dynamic_offset: false, + min_binding_size: None, + }, + count: None, + }], + }); + + let layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor { + label: None, + bind_group_layouts: &[Some(&bgl)], + immediate_size: 32, + }); + + let pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor { + label: None, + layout: Some(&layout), + module: &sm, + entry_point: Some("main"), + compilation_options: Default::default(), + cache: None, + }); + + let buffer = device.create_buffer(&wgpu::BufferDescriptor { + label: None, + size: 16, + usage: wgpu::BufferUsages::STORAGE, + mapped_at_creation: false, + }); + + let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor { + label: None, + layout: &bgl, + entries: &[wgpu::BindGroupEntry { + binding: 0, + resource: buffer.as_entire_binding(), + }], + }); + + let mut encoder = device.create_command_encoder(&Default::default()); + { + let mut pass = encoder.begin_compute_pass(&Default::default()); + pass.set_pipeline(&pipeline); + pass.set_bind_group(0, &bind_group, &[]); + // skip padding at bytes 4..16 + pass.set_immediates(0, &[0u8; 4]); + pass.set_immediates(16, &[0u8; 16]); + pass.dispatch_workgroups(1, 1, 1); + } + wgpu_test::valid(&device, || encoder.finish()); +} + +const NO_IMMEDIATES_SHADER: &str = " + @group(0) @binding(0) + var output: u32; + + @compute @workgroup_size(1) + fn main() { + output = 42u; + } +"; + +#[test] +fn pipeline_without_immediates_needs_none() { + let (device, _queue) = wgpu::Device::noop(&wgpu::DeviceDescriptor::default()); + + let sm = device.create_shader_module(wgpu::ShaderModuleDescriptor { + label: None, + source: wgpu::ShaderSource::Wgsl(NO_IMMEDIATES_SHADER.into()), + }); + + let bgl = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { + label: None, + entries: &[wgpu::BindGroupLayoutEntry { + binding: 0, + visibility: wgpu::ShaderStages::COMPUTE, + ty: wgpu::BindingType::Buffer { + ty: wgpu::BufferBindingType::Storage { read_only: false }, + has_dynamic_offset: false, + min_binding_size: None, + }, + count: None, + }], + }); + + let layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor { + label: None, + bind_group_layouts: &[Some(&bgl)], + immediate_size: 0, + }); + + let pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor { + label: None, + layout: Some(&layout), + module: &sm, + entry_point: Some("main"), + compilation_options: Default::default(), + cache: None, + }); + + let buffer = device.create_buffer(&wgpu::BufferDescriptor { + label: None, + size: 4, + usage: wgpu::BufferUsages::STORAGE, + mapped_at_creation: false, + }); + + let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor { + label: None, + layout: &bgl, + entries: &[wgpu::BindGroupEntry { + binding: 0, + resource: buffer.as_entire_binding(), + }], + }); + + let mut encoder = device.create_command_encoder(&Default::default()); + { + let mut pass = encoder.begin_compute_pass(&Default::default()); + pass.set_pipeline(&pipeline); + pass.set_bind_group(0, &bind_group, &[]); + pass.dispatch_workgroups(1, 1, 1); + } + wgpu_test::valid(&device, || encoder.finish()); +} + +#[test] +fn auto_layout_infers_immediate_size() { + let (device, _q) = wgpu::Device::noop(&wgpu::DeviceDescriptor { + required_features: wgpu::Features::IMMEDIATES, + required_limits: wgpu::Limits { + max_immediate_size: 64, + ..Default::default() + }, + ..Default::default() + }); + + let sm = device.create_shader_module(wgpu::ShaderModuleDescriptor { + label: None, + source: wgpu::ShaderSource::Wgsl(COMPUTE_SHADER.into()), + }); + + let pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor { + label: None, + layout: None, + module: &sm, + entry_point: Some("main"), + compilation_options: Default::default(), + cache: None, + }); + + let buffer = device.create_buffer(&wgpu::BufferDescriptor { + label: None, + size: 16, + usage: wgpu::BufferUsages::STORAGE, + mapped_at_creation: false, + }); + + let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor { + label: None, + layout: &pipeline.get_bind_group_layout(0), + entries: &[wgpu::BindGroupEntry { + binding: 0, + resource: buffer.as_entire_binding(), + }], + }); + + let mut encoder = device.create_command_encoder(&Default::default()); + { + let mut pass = encoder.begin_compute_pass(&Default::default()); + pass.set_pipeline(&pipeline); + pass.set_bind_group(0, &bind_group, &[]); + pass.set_immediates(0, &[0u8; 16]); + pass.dispatch_workgroups(1, 1, 1); + } + wgpu_test::valid(&device, || encoder.finish()); +} diff --git a/tests/tests/wgpu-validation/api/mod.rs b/tests/tests/wgpu-validation/api/mod.rs index ebb050a8c26..b7919d8d16d 100644 --- a/tests/tests/wgpu-validation/api/mod.rs +++ b/tests/tests/wgpu-validation/api/mod.rs @@ -8,6 +8,7 @@ mod encoding; mod error_scopes; mod experimental; mod external_texture; +mod immediates; mod instance; mod render_pipeline; mod texture; diff --git a/wgpu-core/src/command/bundle.rs b/wgpu-core/src/command/bundle.rs index 00bb942714c..9472a3b5a69 100644 --- a/wgpu-core/src/command/bundle.rs +++ b/wgpu-core/src/command/bundle.rs @@ -286,6 +286,7 @@ impl RenderBundleEncoder { texture_memory_init_actions: Vec::new(), next_dynamic_offset: 0, binder: Binder::new(), + immediate_slots_set: 0, }; let indices = &state.device.tracker_indices; @@ -596,11 +597,6 @@ fn set_pipeline( .commands .push(ArcRenderCommand::SetPipeline(pipeline.clone())); - // If this pipeline uses immediates, zero out their values. - if let Some(cmd) = pipeline_state.zero_immediates() { - state.commands.push(cmd); - } - state.pipeline = Some(pipeline_state); state @@ -712,6 +708,7 @@ fn set_immediates( size_bytes, values_offset, }); + state.immediate_slots_set |= crate::immediates::slots_for_range(offset, size_bytes); Ok(()) } @@ -1279,9 +1276,6 @@ struct PipelineState { /// How this pipeline's vertex shader traverses each vertex buffer, indexed /// by vertex buffer slot number. steps: Vec, - - /// Size of the immediate data ranges this pipeline uses. Copied from the pipeline layout. - immediate_size: u32, } impl PipelineState { @@ -1289,22 +1283,7 @@ impl PipelineState { Self { pipeline: pipeline.clone(), steps: pipeline.vertex_steps.to_vec(), - immediate_size: pipeline.layout.immediate_size, - } - } - - /// Return a sequence of commands to zero the immediate data ranges this - /// pipeline uses. If no initialization is necessary, return `None`. - fn zero_immediates(&self) -> Option { - if self.immediate_size == 0 { - return None; } - - Some(ArcRenderCommand::SetImmediate { - offset: 0, - size_bytes: self.immediate_size, - values_offset: None, - }) } } @@ -1346,6 +1325,7 @@ struct State { texture_memory_init_actions: Vec, next_dynamic_offset: usize, binder: Binder, + immediate_slots_set: u16, } impl State { @@ -1424,6 +1404,14 @@ impl State { } } + let required = pipeline.pipeline.immediate_slots_required; + if required & !self.immediate_slots_set != 0 { + return Err(DrawError::MissingImmediateData { + required, + set: self.immediate_slots_set, + }); + } + Ok(()) } else { Err(DrawError::MissingPipeline(pass::MissingPipeline)) diff --git a/wgpu-core/src/command/compute.rs b/wgpu-core/src/command/compute.rs index 7ae15dd5356..331f0a26985 100644 --- a/wgpu-core/src/command/compute.rs +++ b/wgpu-core/src/command/compute.rs @@ -129,6 +129,8 @@ pub enum DispatchError { InvalidGroupSize { current: [u32; 3], limit: u32 }, #[error(transparent)] BindingSizeTooSmall(#[from] LateMinBufferBindingSizeMismatch), + #[error("Not all immediate data slots required by the pipeline have been set (required: 0x{required:04X}, set: 0x{set:04X})")] + MissingImmediateData { required: u16, set: u16 }, } impl WebGpuError for DispatchError { @@ -262,6 +264,8 @@ struct State<'scope, 'snatch_guard, 'cmd_enc> { immediates: Vec, + immediate_slots_set: u16, + intermediate_trackers: Tracker, } @@ -270,6 +274,13 @@ impl<'scope, 'snatch_guard, 'cmd_enc> State<'scope, 'snatch_guard, 'cmd_enc> { if let Some(pipeline) = self.pipeline.as_ref() { self.pass.binder.check_compatibility(pipeline.as_ref())?; self.pass.binder.check_late_buffer_bindings()?; + let required = pipeline.immediate_slots_required; + if required & !self.immediate_slots_set != 0 { + return Err(DispatchError::MissingImmediateData { + required, + set: self.immediate_slots_set, + }); + } Ok(()) } else { Err(DispatchError::MissingPipeline(pass::MissingPipeline)) @@ -558,6 +569,8 @@ pub(super) fn encode_compute_pass( immediates: Vec::new(), + immediate_slots_set: 0, + intermediate_trackers: Tracker::new( device.ordered_buffer_usages, device.ordered_texture_usages, @@ -674,6 +687,7 @@ pub(super) fn encode_compute_pass( }, ) .map_pass_err(scope)?; + state.immediate_slots_set |= crate::immediates::slots_for_range(offset, size_bytes); } ArcComputeCommand::Dispatch(groups) => { let scope = PassErrorScope::Dispatch { indirect: false }; diff --git a/wgpu-core/src/command/draw.rs b/wgpu-core/src/command/draw.rs index 642ba95c605..57778dc606a 100644 --- a/wgpu-core/src/command/draw.rs +++ b/wgpu-core/src/command/draw.rs @@ -75,6 +75,8 @@ pub enum DrawError { highest_view_index: u32, max_multiviews: u32, }, + #[error("Not all immediate data slots required by the pipeline have been set (required: 0x{required:04X}, set: 0x{set:04X})")] + MissingImmediateData { required: u16, set: u16 }, } impl WebGpuError for DrawError { diff --git a/wgpu-core/src/command/render.rs b/wgpu-core/src/command/render.rs index 36e70bb919f..2c01238fc0a 100644 --- a/wgpu-core/src/command/render.rs +++ b/wgpu-core/src/command/render.rs @@ -528,6 +528,8 @@ struct State<'scope, 'snatch_guard, 'cmd_enc> { pass: pass::PassState<'scope, 'snatch_guard, 'cmd_enc>, + immediate_slots_set: u16, + active_occlusion_query: Option<(Arc, u32)>, active_pipeline_statistics_query: Option<(Arc, u32)>, } @@ -580,6 +582,13 @@ impl<'scope, 'snatch_guard, 'cmd_enc> State<'scope, 'snatch_guard, 'cmd_enc> { wanted_mesh_pipeline: !pipeline.is_mesh, }); } + let required = pipeline.immediate_slots_required; + if required & !self.immediate_slots_set != 0 { + return Err(DrawError::MissingImmediateData { + required, + set: self.immediate_slots_set, + }); + } Ok(()) } else { Err(DrawError::MissingPipeline(pass::MissingPipeline)) @@ -601,6 +610,7 @@ impl<'scope, 'snatch_guard, 'cmd_enc> State<'scope, 'snatch_guard, 'cmd_enc> { self.pipeline = None; self.index.reset(); self.vertex = Default::default(); + self.immediate_slots_set = 0; } } @@ -1957,6 +1967,8 @@ pub(super) fn encode_render_pass( string_offset: 0, }, + immediate_slots_set: 0, + active_occlusion_query: None, active_pipeline_statistics_query: None, }; @@ -2033,6 +2045,8 @@ pub(super) fn encode_render_pass( |_| {}, ) .map_pass_err(scope)?; + state.immediate_slots_set |= + crate::immediates::slots_for_range(offset, size_bytes); } ArcRenderCommand::SetScissor(rect) => { let scope = PassErrorScope::SetScissorRect; diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs index 73043f2e458..406c1591d10 100644 --- a/wgpu-core/src/device/resource.rs +++ b/wgpu-core/src/device/resource.rs @@ -3779,6 +3779,7 @@ impl Device { fn create_derived_pipeline_layout( self: &Arc, mut derived_group_layouts: Box>, + immediate_size: u32, ) -> Result, pipeline::ImplicitLayoutError> { while derived_group_layouts .last() @@ -3819,7 +3820,7 @@ impl Device { let layout_desc = binding_model::ResolvedPipelineLayoutDescriptor { label: None, bind_group_layouts: Cow::Owned(bind_group_layouts), - immediate_size: 0, //TODO? + immediate_size, }; let layout = self.create_pipeline_layout_impl(&layout_desc, true)?; @@ -3880,7 +3881,11 @@ impl Device { let pipeline_layout = match binding_layout_source { validation::BindingLayoutSource::Provided(pipeline_layout) => pipeline_layout, validation::BindingLayoutSource::Derived(entries) => { - self.create_derived_pipeline_layout(entries)? + let immediate_size = shader_module + .interface + .as_ref() + .map_or(0, |i| i.immediate_size); + self.create_derived_pipeline_layout(entries, immediate_size)? } }; @@ -3927,12 +3932,18 @@ impl Device { }, )?; + let immediate_slots_required = shader_module + .interface + .as_ref() + .map_or(0, |iface| iface.immediate_slots_required); + let pipeline = pipeline::ComputePipeline { raw: ManuallyDrop::new(raw), layout: pipeline_layout, device: self.clone(), _shader_module: shader_module, late_sized_buffer_groups, + immediate_slots_required, label: desc.label.to_string(), tracking_data: TrackingData::new(self.tracker_indices.compute_pipelines.clone()), }; @@ -4607,7 +4618,26 @@ impl Device { let pipeline_layout = match binding_layout_source { validation::BindingLayoutSource::Provided(pipeline_layout) => pipeline_layout, validation::BindingLayoutSource::Derived(entries) => { - self.create_derived_pipeline_layout(entries)? + let immediate_size = { + let immediate_size_of = |sm: &pipeline::ShaderModule| { + sm.interface.as_ref().map(|i| i.immediate_size) + }; + let vertex = match desc.vertex { + pipeline::RenderPipelineVertexProcessor::Vertex(ref v) => { + immediate_size_of(&v.stage.module) + } + pipeline::RenderPipelineVertexProcessor::Mesh(ref task, ref mesh) => task + .as_ref() + .and_then(|t| immediate_size_of(&t.stage.module)) + .max(immediate_size_of(&mesh.stage.module)), + }; + let fragment = desc + .fragment + .as_ref() + .and_then(|f| immediate_size_of(&f.stage.module)); + vertex.max(fragment).unwrap_or(0) + }; + self.create_derived_pipeline_layout(entries, immediate_size)? } }; @@ -4739,6 +4769,12 @@ impl Device { shader_modules }; + let immediate_slots_required = shader_modules + .iter() + .filter_map(|sm| sm.interface.as_ref()) + .map(|i| i.immediate_slots_required) + .fold(0u16, core::ops::BitOr::bitor); + let pipeline = pipeline::RenderPipeline { raw: ManuallyDrop::new(raw), layout: pipeline_layout, @@ -4750,6 +4786,7 @@ impl Device { strip_index_format: desc.primitive.strip_index_format, vertex_steps, late_sized_buffer_groups, + immediate_slots_required, label: desc.label.to_string(), tracking_data: TrackingData::new(self.tracker_indices.render_pipelines.clone()), is_mesh, diff --git a/wgpu-core/src/immediates.rs b/wgpu-core/src/immediates.rs new file mode 100644 index 00000000000..bc5e60c81d2 --- /dev/null +++ b/wgpu-core/src/immediates.rs @@ -0,0 +1,90 @@ +/// Returns the bitmask of slots covered by a `set_immediates(offset, size_bytes)` call. +pub(crate) fn slots_for_range(offset: u32, size_bytes: u32) -> u16 { + // u32 upcast to avoid overflow panic on n = 16 + let bits_below = |n: u32| ((1u32 << n.min(16)) - 1) as u16; + let lo = offset / 4; + let hi = (offset + size_bytes).div_ceil(4); + bits_below(hi) - bits_below(lo) +} + +/// Computes a bitmask of which u32 immediate slots must be set before draw/dispatch. +/// Bit N is set if the u32 at byte N*4 must be written by `set_immediates`. +/// +/// For structs, gaps between members are padding and those slots need not be set. +/// For scalars, vectors, and matrices, all slots in the span are required +/// (the spec only defines padding exemptions at the struct-member level). +pub(crate) fn slots_for_type(ty: &naga::TypeInner, gctx: naga::proc::GlobalCtx) -> u16 { + match *ty { + naga::TypeInner::Struct { ref members, .. } => { + let mut mask: u16 = 0; + for member in members { + let member_size = gctx.types[member.ty].inner.size(gctx); + mask |= slots_for_range(member.offset, member_size); + } + mask + } + _ => { + let size = ty.size(gctx); + slots_for_range(0, size) + } + } +} + +/// Returns the `var` type from a naga module, if any. +fn immediate_type(module: &naga::Module) -> Option<&naga::TypeInner> { + module + .global_variables + .iter() + .find(|(_, var)| var.space == naga::AddressSpace::Immediate) + .map(|(_, var)| &module.types[var.ty].inner) +} + +/// Returns the required immediate slot bitmask for a naga module. +/// Zero if the module has no `var`. +pub(crate) fn slots_for_module(module: &naga::Module) -> u16 { + immediate_type(module).map_or(0, |ty| slots_for_type(ty, module.to_ctx())) +} + +/// Returns the byte size of the `var` type in a naga module. +/// Zero if the module has no `var`. +pub(crate) fn size_for_module(module: &naga::Module) -> u32 { + immediate_type(module).map_or(0, |ty| ty.size(module.to_ctx())) +} + +#[cfg(test)] +#[cfg(feature = "wgsl")] +mod tests { + use super::slots_for_module; + + fn immediate_slots(wgsl: &str) -> u16 { + slots_for_module(&naga::front::wgsl::parse_str(wgsl).unwrap()) + } + + #[test] + fn non_struct() { + assert_eq!(immediate_slots("var im: vec4;"), 0b1111); + assert_eq!(immediate_slots("var im: mat4x4;"), 0xFFFF); + } + + #[test] + fn struct_with_padding() { + assert_eq!( + immediate_slots( + "struct S { a: f32, b: vec4 } + var im: S;" + ), + 0b1111_0001, + ); + } + + #[test] + fn struct_no_padding() { + assert_eq!( + immediate_slots( + "struct S { a: f32, b: f32 } + var im: S;" + ), + 0b11, + ); + } +} diff --git a/wgpu-core/src/lib.rs b/wgpu-core/src/lib.rs index cc01e0840ee..c6cc0c03cc1 100644 --- a/wgpu-core/src/lib.rs +++ b/wgpu-core/src/lib.rs @@ -76,6 +76,7 @@ mod hash_utils; pub mod hub; pub mod id; pub mod identity; +mod immediates; mod indirect_validation; mod init_tracker; pub mod instance; diff --git a/wgpu-core/src/pipeline.rs b/wgpu-core/src/pipeline.rs index a9d3ad0a8e2..e0b8059e350 100644 --- a/wgpu-core/src/pipeline.rs +++ b/wgpu-core/src/pipeline.rs @@ -278,6 +278,7 @@ pub struct ComputePipeline { pub(crate) device: Arc, pub(crate) _shader_module: Arc, pub(crate) late_sized_buffer_groups: ArrayVec, + pub(crate) immediate_slots_required: u16, /// The `label` from the descriptor used to create the resource. pub(crate) label: String, pub(crate) tracking_data: TrackingData, @@ -815,6 +816,7 @@ pub struct RenderPipeline { pub(crate) strip_index_format: Option, pub(crate) vertex_steps: Vec, pub(crate) late_sized_buffer_groups: ArrayVec, + pub(crate) immediate_slots_required: u16, /// The `label` from the descriptor used to create the resource. pub(crate) label: String, pub(crate) tracking_data: TrackingData, diff --git a/wgpu-core/src/validation.rs b/wgpu-core/src/validation.rs index d32804d409d..41456e2f95d 100644 --- a/wgpu-core/src/validation.rs +++ b/wgpu-core/src/validation.rs @@ -299,6 +299,8 @@ pub struct Interface { limits: wgt::Limits, resources: naga::Arena, entry_points: FastHashMap<(naga::ShaderStage, String), EntryPoint>, + pub(crate) immediate_slots_required: u16, + pub(crate) immediate_size: u32, } #[derive(Clone, Debug, Error)] @@ -1236,6 +1238,9 @@ impl Interface { resource_mapping.insert(var_handle, handle); } + let immediate_slots_required = crate::immediates::slots_for_module(module); + let immediate_size = crate::immediates::size_for_module(module); + let mut entry_points = FastHashMap::default(); entry_points.reserve(module.entry_points.len()); for (index, entry_point) in module.entry_points.iter().enumerate() { @@ -1300,6 +1305,8 @@ impl Interface { limits, resources, entry_points, + immediate_slots_required, + immediate_size, } }