1+ #pragma kernel BigTileLightListGen
2+
3+ #include "../TilePass.cs.hlsl"
4+ #include "../LightingConvexHullUtils.hlsl"
5+ #if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
6+ #include "../SortingComputeUtils.hlsl"
7+ #endif
8+
9+ #define EXACT_EDGE_TESTS
10+ #define PERFORM_SPHERICAL_INTERSECTION_TESTS
11+
12+ #define MAX_NR_BIGTILE_LIGHTS (MAX_NR_BIGTILE_LIGHTS_PLUSONE-1)
13+
14+
15+ uniform int g_iNrVisibLights;
16+ uniform uint2 g_viDimensions;
17+ uniform float4x4 g_mInvScrProjection;
18+ uniform float4x4 g_mScrProjection;
19+ uniform float g_fNearPlane;
20+ uniform float g_fFarPlane;
21+
22+ StructuredBuffer<float3> g_vBoundsBuffer : register( t1 );
23+ StructuredBuffer<SFiniteLightData> g_vLightData : register( t2 );
24+ StructuredBuffer<SFiniteLightBound> g_data : register( t3 );
25+
26+
27+ #define NR_THREADS 64
28+
29+ // output buffer
30+ RWBuffer<uint> g_vLightList : register( u0 );
31+
32+
33+ // 2kB (room for roughly 30 wavefronts)
34+ groupshared unsigned int lightsListLDS[MAX_NR_BIGTILE_LIGHTS_PLUSONE];
35+ groupshared uint lightOffs;
36+
37+
38+ float GetLinearDepth(float zDptBufSpace) // 0 is near 1 is far
39+ {
40+ float3 vP = float3(0.0f,0.0f,zDptBufSpace);
41+ float4 v4Pres = mul(g_mInvScrProjection, float4(vP,1.0));
42+ return v4Pres.z / v4Pres.w;
43+ }
44+
45+
46+ float3 GetViewPosFromLinDepth(float2 v2ScrPos, float fLinDepth)
47+ {
48+ float fSx = g_mScrProjection[0].x;
49+ float fCx = g_mScrProjection[0].z;
50+ float fSy = g_mScrProjection[1].y;
51+ float fCy = g_mScrProjection[1].z;
52+
53+ #if USE_LEFTHAND_CAMERASPACE
54+ return fLinDepth*float3( ((v2ScrPos.x-fCx)/fSx), ((v2ScrPos.y-fCy)/fSy), 1.0 );
55+ #else
56+ return fLinDepth*float3( -((v2ScrPos.x+fCx)/fSx), -((v2ScrPos.y+fCy)/fSy), 1.0 );
57+ #endif
58+ }
59+
60+ float GetOnePixDiagWorldDistAtDepthOne()
61+ {
62+ float fSx = g_mScrProjection[0].x;
63+ float fSy = g_mScrProjection[1].y;
64+
65+ return length( float2(1.0/fSx,1.0/fSy) );
66+ }
67+
68+
69+ #ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS
70+ void SphericalIntersectionTests(uint threadID, int iNrCoarseLights, float2 screenCoordinate);
71+ #endif
72+
73+ #ifdef EXACT_EDGE_TESTS
74+ void CullByExactEdgeTests(uint threadID, int iNrCoarseLights, uint2 viTilLL, uint2 viTilUR);
75+ #endif
76+
77+
78+
79+
80+ [numthreads(NR_THREADS, 1, 1)]
81+ void BigTileLightListGen(uint threadID : SV_GroupIndex, uint3 u3GroupID : SV_GroupID)
82+ {
83+ uint2 tileIDX = u3GroupID.xy;
84+ uint t=threadID;
85+
86+ uint iWidth = g_viDimensions.x;
87+ uint iHeight = g_viDimensions.y;
88+ uint nrBigTilesX = (iWidth+63)/64;
89+ uint nrBigTilesY = (iHeight+63)/64;
90+
91+ if(t==0) lightOffs = 0;
92+
93+ #if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
94+ GroupMemoryBarrierWithGroupSync();
95+ #endif
96+
97+
98+ uint2 viTilLL = 64*tileIDX;
99+ uint2 viTilUR = min( viTilLL+uint2(64,64), uint2(iWidth, iHeight) ); // not width and height minus 1 since viTilUR represents the end of the tile corner.
100+
101+ float2 vTileLL = float2(viTilLL.x/(float) iWidth, viTilLL.y/(float) iHeight);
102+ float2 vTileUR = float2(viTilUR.x/(float) iWidth, viTilUR.y/(float) iHeight);
103+
104+ // build coarse list using AABB
105+ for(int l=(int) t; l<(int) g_iNrVisibLights; l += NR_THREADS)
106+ {
107+ const float2 vMi = g_vBoundsBuffer[l].xy;
108+ const float2 vMa = g_vBoundsBuffer[l+g_iNrVisibLights].xy;
109+
110+ if( all(vMa>vTileLL) && all(vMi<vTileUR))
111+ {
112+ unsigned int uInc = 1;
113+ unsigned int uIndex;
114+ InterlockedAdd(lightOffs, uInc, uIndex);
115+ if(uIndex<MAX_NR_BIGTILE_LIGHTS) lightsListLDS[uIndex] = l; // add to light list
116+ }
117+ }
118+
119+ #if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
120+ GroupMemoryBarrierWithGroupSync();
121+ #endif
122+
123+ int iNrCoarseLights = min(lightOffs,MAX_NR_BIGTILE_LIGHTS);
124+
125+ #ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS
126+ SphericalIntersectionTests( t, iNrCoarseLights, float2(min(viTilLL.xy+uint2(64/2,64/2), uint2(iWidth-1, iHeight-1))) );
127+ #endif
128+
129+ #ifdef EXACT_EDGE_TESTS
130+ CullByExactEdgeTests(t, iNrCoarseLights, viTilLL.xy, viTilUR.xy);
131+ #endif
132+
133+
134+ // sort lights
135+ SORTLIST(lightsListLDS, iNrCoarseLights, MAX_NR_BIGTILE_LIGHTS_PLUSONE, t, NR_THREADS);
136+
137+ lightOffs = 0;
138+ GroupMemoryBarrierWithGroupSync();
139+ for(int i=t; i<iNrCoarseLights; i+=NR_THREADS) if(lightsListLDS[i]<g_iNrVisibLights) InterlockedAdd(lightOffs, 1);
140+ GroupMemoryBarrierWithGroupSync();
141+ iNrCoarseLights = lightOffs;
142+
143+ int offs = tileIDX.y*nrBigTilesX + tileIDX.x;
144+
145+ for(int i=t; i<(iNrCoarseLights+1); i+=NR_THREADS)
146+ g_vLightList[MAX_NR_BIGTILE_LIGHTS_PLUSONE*offs + i] = t==0 ? iNrCoarseLights : lightsListLDS[i-1];
147+ }
148+
149+
150+ #ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS
151+ void SphericalIntersectionTests(uint threadID, int iNrCoarseLights, float2 screenCoordinate)
152+ {
153+ #if USE_LEFTHAND_CAMERASPACE
154+ float3 V = GetViewPosFromLinDepth( screenCoordinate, 1.0);
155+ #else
156+ float3 V = GetViewPosFromLinDepth( screenCoordinate, -1.0);
157+ #endif
158+
159+ float onePixDiagDist = GetOnePixDiagWorldDistAtDepthOne();
160+ float halfTileSizeAtZDistOne = 32*onePixDiagDist; // scale by half a tile
161+
162+ for(int l=threadID; l<iNrCoarseLights; l+=NR_THREADS)
163+ {
164+ SFiniteLightBound lgtDat = g_data[lightsListLDS[l]];
165+
166+ if( !DoesSphereOverlapTile(V, halfTileSizeAtZDistOne, lgtDat.center.xyz, lgtDat.radius) )
167+ lightsListLDS[l]=0xffffffff;
168+ }
169+
170+ #if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
171+ GroupMemoryBarrierWithGroupSync();
172+ #endif
173+ }
174+ #endif
175+
176+
177+
178+
179+
180+
181+
182+ #ifdef EXACT_EDGE_TESTS
183+ float3 GetTileVertex(uint2 viTilLL, uint2 viTilUR, int i, float fTileFarPlane)
184+ {
185+ float x = (i&1)==0 ? viTilLL.x : viTilUR.x;
186+ float y = (i&2)==0 ? viTilLL.y : viTilUR.y;
187+ float z = (i&4)==0 ? g_fNearPlane : fTileFarPlane;
188+ #if !USE_LEFTHAND_CAMERASPACE
189+ z = -z;
190+ #endif
191+ return GetViewPosFromLinDepth( float2(x, y), z);
192+ }
193+
194+ void GetFrustEdge(out float3 vP0, out float3 vE0, const int e0, uint2 viTilLL, uint2 viTilUR, float fTileFarPlane)
195+ {
196+ int iSection = e0>>2; // section 0 is side edges, section 1 is near edges and section 2 is far edges
197+ int iSwizzle = e0&0x3;
198+
199+ int i=iSwizzle + (2*(iSection&0x2)); // offset by 4 at section 2
200+ vP0 = GetTileVertex(uint2(viTilLL.x, viTilUR.y), uint2(viTilUR.x, viTilLL.y), i, fTileFarPlane);
201+ vE0 = iSection==0 ? vP0 : (((iSwizzle&0x2)==0 ? 1.0f : (-1.0f))*((iSwizzle&0x1)==(iSwizzle>>1) ? float3(1,0,0) : float3(0,1,0)));
202+ }
203+
204+ void CullByExactEdgeTests(uint threadID, int iNrCoarseLights, uint2 viTilLL, uint2 viTilUR)
205+ {
206+ const bool bOnlyNeedFrustumSideEdges = true;
207+ const int nrFrustEdges = bOnlyNeedFrustumSideEdges ? 4 : 8; // max 8 since we never need to test 4 far edges of frustum since they are identical vectors to near edges and plane is placed at vP0 on light hull.
208+
209+ const int totNrEdgePairs = 12*nrFrustEdges;
210+ for(int l=0; l<iNrCoarseLights; l++)
211+ {
212+ const int idxCoarse = lightsListLDS[l];
213+ [branch]if(idxCoarse<(uint) g_iNrVisibLights && g_vLightData[idxCoarse].lightType!=SPHERE_LIGHT) // don't bother doing edge tests for sphere lights since these have camera aligned bboxes.
214+ {
215+ SFiniteLightBound lgtDat = g_data[idxCoarse];
216+
217+ const float3 boxX = lgtDat.boxAxisX.xyz;
218+ const float3 boxY = lgtDat.boxAxisY.xyz;
219+ const float3 boxZ = -lgtDat.boxAxisZ.xyz; // flip axis (so it points away from the light direction for a spot-light)
220+ const float3 center = lgtDat.center.xyz;
221+ const float2 scaleXY = lgtDat.scaleXY;
222+
223+ for(int i=threadID; i<totNrEdgePairs; i+=NR_THREADS)
224+ {
225+ int e0 = (int) (((uint)i)/((uint) nrFrustEdges)); // should become a shift right
226+ int e1 = i - e0*nrFrustEdges;
227+
228+ int idx_cur=0, idx_twin=0;
229+ float3 vP0, vE0;
230+ GetHullEdge(idx_cur, idx_twin, vP0, vE0, e0, boxX, boxY, boxZ, center, scaleXY);
231+
232+
233+ float3 vP1, vE1;
234+ GetFrustEdge(vP1, vE1, e1, viTilLL, viTilUR, g_fFarPlane);
235+
236+ // potential separation plane
237+ float3 vN = cross(vE0, vE1);
238+
239+ int positive=0, negative=0;
240+ for(int k=1; k<8; k++) // only need to test 7 verts (technically just 6).
241+ {
242+ int j = (idx_cur+k)&0x7;
243+ float3 vPh = GetHullVertex(boxX, boxY, boxZ, center, scaleXY, j);
244+ float fSignDist = idx_twin==j ? 0.0 : dot(vN, vPh-vP0);
245+ if(fSignDist>0) ++positive; else if(fSignDist<0) ++negative;
246+ }
247+ int resh = (positive>0 && negative>0) ? 0 : (positive>0 ? 1 : (negative>0 ? (-1) : 0));
248+
249+ positive=0; negative=0;
250+ for(int j=0; j<8; j++)
251+ {
252+ float3 vPf = GetTileVertex(viTilLL, viTilUR, j, g_fFarPlane);
253+ float fSignDist = dot(vN, vPf-vP0);
254+ if(fSignDist>0) ++positive; else if(fSignDist<0) ++negative;
255+ }
256+ int resf = (positive>0 && negative>0) ? 0 : (positive>0 ? 1 : (negative>0 ? (-1) : 0));
257+
258+ bool bFoundSepPlane = (resh*resf)<0;
259+ if(bFoundSepPlane) lightsListLDS[l]=0xffffffff;
260+ }
261+ }
262+ }
263+ #if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
264+ GroupMemoryBarrierWithGroupSync();
265+ #endif
266+ }
267+ #endif
0 commit comments