@@ -32,28 +32,25 @@ int unpack(int x) {
3232
3333shared vec4 sm[4 ][4 ];
3434
35- void main(void ) {
35+ void mip1(ivec2 i, inout vec4 t) {
36+ // compute mip 1 using linear filtering
37+ /*
38+ * We just use a sampler with linear filter and
39+ * sample exactly between four texels.
40+ */
3641 ivec2 ts = textureSize(baseImage, 0 );
37-
3842 // the actual size of our work items is only half the baseImage size, because for the first mip level
3943 // each work item already uses linear filtering with a sampler to gather a 2x2 texel average
4044 ivec2 s = ts / ivec2 (2 );
41-
42- // Compute the (x, y) coordinates of the current work item within its workgroup using z-order curve
43- ivec2 l = ivec2 (unpack(int (gl_LocalInvocationID.x)),
44- unpack(int (gl_LocalInvocationID.x >> 1u)));
45-
46- // Compute the global (x, y) coordinate of this work item
47- ivec2 i = ivec2 (gl_WorkGroupID.xy) * ivec2 (16 ) + l;
48-
49- // compute mip 1 using linear filtering
5045 if (i.x >= s.x || i.y >= s.y)
5146 return ;
5247 // Compute a texture coordinate right at the corner between four texels
5348 vec2 tc = (vec2 (i * 2 ) + vec2 (1.0 )) / vec2 (ts);
54- vec4 t = textureLod(baseImage, tc, 0.0 );
49+ t = textureLod(baseImage, tc, 0.0 );
5550 imageStore(mips[0 ], i, t);
51+ }
5652
53+ void mip2(ivec2 i, inout vec4 t) {
5754 // compute mip 2 using subgroup quad sharing
5855 /*
5956 * The trick here is to assume a 1:1 correspondence between subgroup invocation ids
@@ -68,19 +65,23 @@ void main(void) {
6865 t = (t + h + v + d) * vec4 (0.25 );
6966 if ((gl_SubgroupInvocationID & 3 ) == 0 )
7067 imageStore(mips[1 ], i/ ivec2 (2 ), t);
68+ }
7169
70+ void mip3(ivec2 i, inout vec4 t) {
7271 // compute mip 3 using subgroup xor shuffles
7372 /*
7473 * The trick here is to exchange information between subgroup items with a stride
7574 * of 4 items. In order to do this, we have subgroupShuffleXor().
7675 */
77- h = subgroupShuffleXor(t, 4 );
78- v = subgroupShuffleXor(t, 8 );
79- d = subgroupShuffleXor(t, 12 );
76+ vec4 h = subgroupShuffleXor(t, 4 );
77+ vec4 v = subgroupShuffleXor(t, 8 );
78+ vec4 d = subgroupShuffleXor(t, 12 );
8079 t = (t + h + v + d) * vec4 (0.25 );
8180 if ((gl_SubgroupInvocationID & 15 ) == 0 )
8281 imageStore(mips[2 ], i/ ivec2 (4 ), t);
82+ }
8383
84+ void mip4(ivec2 l, ivec2 i, inout vec4 t) {
8485 // compute mip 4 using shared memory
8586 /*
8687 * For mip 4 we essentially have 8x8 work items.
@@ -94,12 +95,14 @@ void main(void) {
9495 t = (sm[smc.x][smc.y] + sm[smi.x][smc.y] + sm[smc.x][smi.y] + sm[smi.x][smi.y]) * 0.25 ;
9596 imageStore(mips[3 ], i/ ivec2 (8 ), t);
9697 }
98+ }
9799
100+ void mip5(ivec2 l, ivec2 i, vec4 t) {
98101 // compute mip 5 also using shared memory
99102 /*
100103 * For mip 5 we have 16x16 work items.
101104 */
102- smc = l / ivec2 (8 );
105+ ivec2 smc = l / ivec2 (8 );
103106 if ((l.x & 7 ) == 0 && (l.y & 7 ) == 0 )
104107 sm[smc.x][smc.y] = t;
105108 barrier();
@@ -108,3 +111,19 @@ void main(void) {
108111 imageStore(mips[4 ], i/ ivec2 (16 ), t);
109112 }
110113}
114+
115+ void main(void ) {
116+ // Compute the (x, y) coordinates of the current work item within its workgroup using z-order curve
117+ ivec2 l = ivec2 (unpack(int (gl_LocalInvocationID.x)),
118+ unpack(int (gl_LocalInvocationID.x >> 1u)));
119+
120+ // Compute the global (x, y) coordinate of this work item
121+ ivec2 i = ivec2 (gl_WorkGroupID.xy) * ivec2 (16 ) + l;
122+
123+ vec4 t = vec4 (0.0 );
124+ mip1(i, t);
125+ mip2(i, t);
126+ mip3(i, t);
127+ mip4(l, i, t);
128+ mip5(l, i, t);
129+ }
0 commit comments