@@ -117,10 +117,10 @@ fn spd_downsample_mips_0_1(x: u32, y: u32, workgroup_id: vec2u, local_invocation
117
117
if mips <= 1u { return ; }
118
118
119
119
#ifdef SUBGROUP_SUPPORT
120
- v [0 ] = spd_reduce_quad (v [0 ], subgroup_invocation_id );
121
- v [1 ] = spd_reduce_quad (v [1 ], subgroup_invocation_id );
122
- v [2 ] = spd_reduce_quad (v [2 ], subgroup_invocation_id );
123
- v [3 ] = spd_reduce_quad (v [3 ], subgroup_invocation_id );
120
+ v [0 ] = spd_reduce_quad (v [0 ]);
121
+ v [1 ] = spd_reduce_quad (v [1 ]);
122
+ v [2 ] = spd_reduce_quad (v [2 ]);
123
+ v [3 ] = spd_reduce_quad (v [3 ]);
124
124
125
125
if local_invocation_index % 4u == 0u {
126
126
spd_store ((workgroup_id * 16u ) + vec2 (x / 2u , y / 2u ), v [0 ], 1u , slice );
@@ -181,7 +181,7 @@ fn spd_downsample_next_four(x: u32, y: u32, workgroup_id: vec2u, local_invocatio
181
181
fn spd_downsample_mip_2 (x : u32 , y : u32 , workgroup_id : vec2u , local_invocation_index : u32 , base_mip : u32 , slice : u32 , subgroup_invocation_id : u32 ) {
182
182
#ifdef SUBGROUP_SUPPORT
183
183
var v = spd_load_intermediate (x , y );
184
- v = spd_reduce_quad (v , subgroup_invocation_id );
184
+ v = spd_reduce_quad (v );
185
185
if local_invocation_index % 4u == 0u {
186
186
spd_store ((workgroup_id * 8u ) + vec2 (x / 2u , y / 2u ), v , base_mip , slice );
187
187
spd_store_intermediate (x + (y / 2u ) % 2u , y , v );
@@ -204,7 +204,7 @@ fn spd_downsample_mip_3(x: u32, y: u32, workgroup_id: vec2u, local_invocation_in
204
204
#ifdef SUBGROUP_SUPPORT
205
205
if local_invocation_index < 64u {
206
206
var v = spd_load_intermediate (x * 2u + y % 2u , y * 2u );
207
- v = spd_reduce_quad (v , subgroup_invocation_id );
207
+ v = spd_reduce_quad (v );
208
208
if local_invocation_index % 4u == 0u {
209
209
spd_store ((workgroup_id * 4u ) + vec2 (x / 2u , y / 2u ), v , base_mip , slice );
210
210
spd_store_intermediate (x * 2u + y / 2u , y * 2u , v );
@@ -228,7 +228,7 @@ fn spd_downsample_mip_4(x: u32, y: u32, workgroup_id: vec2u, local_invocation_in
228
228
#ifdef SUBGROUP_SUPPORT
229
229
if local_invocation_index < 16u {
230
230
var v = spd_load_intermediate (x * 4u + y , y * 4u );
231
- v = spd_reduce_quad (v , subgroup_invocation_id );
231
+ v = spd_reduce_quad (v );
232
232
if local_invocation_index % 4u == 0u {
233
233
spd_store ((workgroup_id * 2u ) + vec2 (x / 2u , y / 2u ), v , base_mip , slice );
234
234
spd_store_intermediate (x / 2u + y , 0u , v );
@@ -252,7 +252,7 @@ fn spd_downsample_mip_5(x: u32, y: u32, workgroup_id: vec2u, local_invocation_in
252
252
#ifdef SUBGROUP_SUPPORT
253
253
if local_invocation_index < 4u {
254
254
var v = spd_load_intermediate (local_invocation_index , 0u );
255
- v = spd_reduce_quad (v , subgroup_invocation_id );
255
+ v = spd_reduce_quad (v );
256
256
if local_invocation_index % 4u == 0u {
257
257
spd_store (workgroup_id , v , base_mip , slice );
258
258
}
@@ -436,20 +436,12 @@ fn spd_reduce_4(v0: vec4f, v1: vec4f, v2: vec4f, v3: vec4f) -> vec4f {
436
436
}
437
437
438
438
#ifdef SUBGROUP_SUPPORT
439
- fn spd_reduce_quad (v : vec4f , subgroup_invocation_id : u32 ) -> vec4f {
440
- let quad = subgroup_invocation_id & (~0x3u );
439
+ fn spd_reduce_quad (v : vec4f ) -> vec4f {
441
440
let v0 = v ;
442
- let v1 = subgroupBroadcast ( v , quad | 1u );
443
- let v2 = subgroupBroadcast ( v , quad | 2u );
444
- let v3 = subgroupBroadcast ( v , quad | 3u );
441
+ let v1 = quadSwapX ( v );
442
+ let v2 = quadSwapY ( v );
443
+ let v3 = quadSwapDiagonal ( v );
445
444
return spd_reduce_4 (v0 , v1 , v2 , v3 );
446
-
447
- // TODO: Use subgroup quad operations once wgpu supports them
448
- // let v0 = v;
449
- // let v1 = quadSwapX(v);
450
- // let v2 = quadSwapY(v);
451
- // let v3 = quadSwapDiagonal(v);
452
- // return spd_reduce_4(v0, v1, v2, v3);
453
445
}
454
446
#endif
455
447
0 commit comments