Skip to content

Commit eb682a9

Browse files
HDRenderLoop: Merge FTPL Build Light List part (tile, bigtile, cluster..)
- Only build light list, not light application for now - untested, crash when big tile are enabled
1 parent f1fe7e7 commit eb682a9

16 files changed

+1279
-278
lines changed

Assets/ScriptableRenderLoop/HDRenderLoop/HDRenderLoop.asset.meta

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Assets/ScriptableRenderLoop/HDRenderLoop/HDRenderLoop.cs

Lines changed: 74 additions & 46 deletions
Large diffs are not rendered by default.

Assets/ScriptableRenderLoop/HDRenderLoop/Lighting/LightDefinition.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ public struct LightData
5050
public int IESIndex;
5151
public int cookieIndex;
5252

53-
public GPULightType lightType;
53+
public GPULightType lightType;
5454
// Area Light specific
5555
public Vector2 size;
5656
public bool twoSided;

Assets/ScriptableRenderLoop/HDRenderLoop/Lighting/SinglePass/SinglePass.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,8 @@ public void OnDisable()
7474
s_PunctualShadowList = null;
7575
}
7676

77+
public void PrepareLightsForGPU(CullResults cullResults, Camera camera, HDRenderLoop.LightList lightList) {}
78+
7779
public void PushGlobalParams(Camera camera, RenderLoop loop, HDRenderLoop.LightList lightList)
7880
{
7981
s_DirectionalLights.SetData(lightList.directionalLights.ToArray());
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
#ifndef __CLUSTEREDUTILS_H__
2+
#define __CLUSTEREDUTILS_H__
3+
4+
#ifndef FLT_EPSILON
5+
#define FLT_EPSILON 1.192092896e-07f
6+
#endif
7+
8+
float GetScaleFromBase(float base)
9+
{
10+
const float C = (float)(1 << g_iLog2NumClusters);
11+
const float geomSeries = (1.0 - pow(base, C)) / (1 - base); // geometric series: sum_k=0^{C-1} base^k
12+
return geomSeries / (g_fFarPlane - g_fNearPlane);
13+
}
14+
15+
int SnapToClusterIdxFlex(float z_in, float suggestedBase, bool logBasePerTile)
16+
{
17+
#if USE_LEFTHAND_CAMERASPACE
18+
float z = z_in;
19+
#else
20+
float z = -z_in;
21+
#endif
22+
23+
float userscale = g_fClustScale;
24+
if (logBasePerTile)
25+
userscale = GetScaleFromBase(suggestedBase);
26+
27+
// using the inverse of the geometric series
28+
const float dist = max(0, z - g_fNearPlane);
29+
return (int)clamp(log2(dist * userscale * (suggestedBase - 1.0f) + 1) / log2(suggestedBase), 0.0, (float)((1 << g_iLog2NumClusters) - 1));
30+
}
31+
32+
int SnapToClusterIdx(float z_in, float suggestedBase)
33+
{
34+
#ifdef ENABLE_DEPTH_TEXTURE_BACKPLANE
35+
bool logBasePerTile = true; // resolved compile time
36+
#else
37+
bool logBasePerTile = false;
38+
#endif
39+
40+
return SnapToClusterIdxFlex(z_in, suggestedBase, logBasePerTile);
41+
}
42+
43+
float ClusterIdxToZFlex(int k, float suggestedBase, bool logBasePerTile)
44+
{
45+
float res;
46+
47+
float userscale = g_fClustScale;
48+
if (logBasePerTile)
49+
userscale = GetScaleFromBase(suggestedBase);
50+
51+
float dist = (pow(suggestedBase, (float)k) - 1.0) / (userscale * (suggestedBase - 1.0f));
52+
res = dist + g_fNearPlane;
53+
54+
#if USE_LEFTHAND_CAMERASPACE
55+
return res;
56+
#else
57+
return -res;
58+
#endif
59+
}
60+
61+
float ClusterIdxToZ(int k, float suggestedBase)
62+
{
63+
#ifdef ENABLE_DEPTH_TEXTURE_BACKPLANE
64+
bool logBasePerTile = true; // resolved compile time
65+
#else
66+
bool logBasePerTile = false;
67+
#endif
68+
69+
return ClusterIdxToZFlex(k, suggestedBase, logBasePerTile);
70+
}
71+
72+
// generate a log-base value such that half of the clusters are consumed from near plane to max. opaque depth of tile.
73+
float SuggestLogBase50(float tileFarPlane)
74+
{
75+
const float C = (float)(1 << g_iLog2NumClusters);
76+
float normDist = clamp((tileFarPlane - g_fNearPlane) / (g_fFarPlane - g_fNearPlane), FLT_EPSILON, 1.0);
77+
float suggested_base = pow((1.0 + sqrt(max(0.0, 1.0 - 4.0 * normDist * (1.0 - normDist)))) / (2.0 * normDist), 2.0 / C); //
78+
return max(g_fClustBase, suggested_base);
79+
}
80+
81+
// generate a log-base value such that (approximately) a quarter of the clusters are consumed from near plane to max. opaque depth of tile.
82+
float SuggestLogBase25(float tileFarPlane)
83+
{
84+
const float C = (float)(1 << g_iLog2NumClusters);
85+
float normDist = clamp((tileFarPlane - g_fNearPlane) / (g_fFarPlane - g_fNearPlane), FLT_EPSILON, 1.0);
86+
float suggested_base = pow((1 / 2.3) * max(0.0, (0.8 / normDist) - 1), 4.0 / (C * 2)); // approximate inverse of d*x^4 + (-x) + (1-d) = 0 - d is normalized distance
87+
return max(g_fClustBase, suggested_base);
88+
}
89+
90+
#endif

Assets/ScriptableRenderLoop/HDRenderLoop/Lighting/TilePass/ClusteredUtils.hlsl.meta

Lines changed: 9 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 267 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,267 @@
1+
#pragma kernel BigTileLightListGen
2+
3+
#include "../TilePass.cs.hlsl"
4+
#include "../LightingConvexHullUtils.hlsl"
5+
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
6+
#include "../SortingComputeUtils.hlsl"
7+
#endif
8+
9+
#define EXACT_EDGE_TESTS
10+
#define PERFORM_SPHERICAL_INTERSECTION_TESTS
11+
12+
#define MAX_NR_BIGTILE_LIGHTS (MAX_NR_BIGTILE_LIGHTS_PLUSONE-1)
13+
14+
15+
uniform int g_iNrVisibLights;
16+
uniform uint2 g_viDimensions;
17+
uniform float4x4 g_mInvScrProjection;
18+
uniform float4x4 g_mScrProjection;
19+
uniform float g_fNearPlane;
20+
uniform float g_fFarPlane;
21+
22+
StructuredBuffer<float3> g_vBoundsBuffer : register( t1 );
23+
StructuredBuffer<SFiniteLightData> g_vLightData : register( t2 );
24+
StructuredBuffer<SFiniteLightBound> g_data : register( t3 );
25+
26+
27+
#define NR_THREADS 64
28+
29+
// output buffer
30+
RWBuffer<uint> g_vLightList : register( u0 );
31+
32+
33+
// 2kB (room for roughly 30 wavefronts)
34+
groupshared unsigned int lightsListLDS[MAX_NR_BIGTILE_LIGHTS_PLUSONE];
35+
groupshared uint lightOffs;
36+
37+
38+
float GetLinearDepth(float zDptBufSpace) // 0 is near 1 is far
39+
{
40+
float3 vP = float3(0.0f,0.0f,zDptBufSpace);
41+
float4 v4Pres = mul(g_mInvScrProjection, float4(vP,1.0));
42+
return v4Pres.z / v4Pres.w;
43+
}
44+
45+
46+
float3 GetViewPosFromLinDepth(float2 v2ScrPos, float fLinDepth)
47+
{
48+
float fSx = g_mScrProjection[0].x;
49+
float fCx = g_mScrProjection[0].z;
50+
float fSy = g_mScrProjection[1].y;
51+
float fCy = g_mScrProjection[1].z;
52+
53+
#if USE_LEFTHAND_CAMERASPACE
54+
return fLinDepth*float3( ((v2ScrPos.x-fCx)/fSx), ((v2ScrPos.y-fCy)/fSy), 1.0 );
55+
#else
56+
return fLinDepth*float3( -((v2ScrPos.x+fCx)/fSx), -((v2ScrPos.y+fCy)/fSy), 1.0 );
57+
#endif
58+
}
59+
60+
float GetOnePixDiagWorldDistAtDepthOne()
61+
{
62+
float fSx = g_mScrProjection[0].x;
63+
float fSy = g_mScrProjection[1].y;
64+
65+
return length( float2(1.0/fSx,1.0/fSy) );
66+
}
67+
68+
69+
#ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS
70+
void SphericalIntersectionTests(uint threadID, int iNrCoarseLights, float2 screenCoordinate);
71+
#endif
72+
73+
#ifdef EXACT_EDGE_TESTS
74+
void CullByExactEdgeTests(uint threadID, int iNrCoarseLights, uint2 viTilLL, uint2 viTilUR);
75+
#endif
76+
77+
78+
79+
80+
[numthreads(NR_THREADS, 1, 1)]
81+
void BigTileLightListGen(uint threadID : SV_GroupIndex, uint3 u3GroupID : SV_GroupID)
82+
{
83+
uint2 tileIDX = u3GroupID.xy;
84+
uint t=threadID;
85+
86+
uint iWidth = g_viDimensions.x;
87+
uint iHeight = g_viDimensions.y;
88+
uint nrBigTilesX = (iWidth+63)/64;
89+
uint nrBigTilesY = (iHeight+63)/64;
90+
91+
if(t==0) lightOffs = 0;
92+
93+
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
94+
GroupMemoryBarrierWithGroupSync();
95+
#endif
96+
97+
98+
uint2 viTilLL = 64*tileIDX;
99+
uint2 viTilUR = min( viTilLL+uint2(64,64), uint2(iWidth, iHeight) ); // not width and height minus 1 since viTilUR represents the end of the tile corner.
100+
101+
float2 vTileLL = float2(viTilLL.x/(float) iWidth, viTilLL.y/(float) iHeight);
102+
float2 vTileUR = float2(viTilUR.x/(float) iWidth, viTilUR.y/(float) iHeight);
103+
104+
// build coarse list using AABB
105+
for(int l=(int) t; l<(int) g_iNrVisibLights; l += NR_THREADS)
106+
{
107+
const float2 vMi = g_vBoundsBuffer[l].xy;
108+
const float2 vMa = g_vBoundsBuffer[l+g_iNrVisibLights].xy;
109+
110+
if( all(vMa>vTileLL) && all(vMi<vTileUR))
111+
{
112+
unsigned int uInc = 1;
113+
unsigned int uIndex;
114+
InterlockedAdd(lightOffs, uInc, uIndex);
115+
if(uIndex<MAX_NR_BIGTILE_LIGHTS) lightsListLDS[uIndex] = l; // add to light list
116+
}
117+
}
118+
119+
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
120+
GroupMemoryBarrierWithGroupSync();
121+
#endif
122+
123+
int iNrCoarseLights = min(lightOffs,MAX_NR_BIGTILE_LIGHTS);
124+
125+
#ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS
126+
SphericalIntersectionTests( t, iNrCoarseLights, float2(min(viTilLL.xy+uint2(64/2,64/2), uint2(iWidth-1, iHeight-1))) );
127+
#endif
128+
129+
#ifdef EXACT_EDGE_TESTS
130+
CullByExactEdgeTests(t, iNrCoarseLights, viTilLL.xy, viTilUR.xy);
131+
#endif
132+
133+
134+
// sort lights
135+
SORTLIST(lightsListLDS, iNrCoarseLights, MAX_NR_BIGTILE_LIGHTS_PLUSONE, t, NR_THREADS);
136+
137+
lightOffs = 0;
138+
GroupMemoryBarrierWithGroupSync();
139+
for(int i=t; i<iNrCoarseLights; i+=NR_THREADS) if(lightsListLDS[i]<g_iNrVisibLights) InterlockedAdd(lightOffs, 1);
140+
GroupMemoryBarrierWithGroupSync();
141+
iNrCoarseLights = lightOffs;
142+
143+
int offs = tileIDX.y*nrBigTilesX + tileIDX.x;
144+
145+
for(int i=t; i<(iNrCoarseLights+1); i+=NR_THREADS)
146+
g_vLightList[MAX_NR_BIGTILE_LIGHTS_PLUSONE*offs + i] = t==0 ? iNrCoarseLights : lightsListLDS[i-1];
147+
}
148+
149+
150+
#ifdef PERFORM_SPHERICAL_INTERSECTION_TESTS
151+
void SphericalIntersectionTests(uint threadID, int iNrCoarseLights, float2 screenCoordinate)
152+
{
153+
#if USE_LEFTHAND_CAMERASPACE
154+
float3 V = GetViewPosFromLinDepth( screenCoordinate, 1.0);
155+
#else
156+
float3 V = GetViewPosFromLinDepth( screenCoordinate, -1.0);
157+
#endif
158+
159+
float onePixDiagDist = GetOnePixDiagWorldDistAtDepthOne();
160+
float halfTileSizeAtZDistOne = 32*onePixDiagDist; // scale by half a tile
161+
162+
for(int l=threadID; l<iNrCoarseLights; l+=NR_THREADS)
163+
{
164+
SFiniteLightBound lgtDat = g_data[lightsListLDS[l]];
165+
166+
if( !DoesSphereOverlapTile(V, halfTileSizeAtZDistOne, lgtDat.center.xyz, lgtDat.radius) )
167+
lightsListLDS[l]=0xffffffff;
168+
}
169+
170+
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
171+
GroupMemoryBarrierWithGroupSync();
172+
#endif
173+
}
174+
#endif
175+
176+
177+
178+
179+
180+
181+
182+
#ifdef EXACT_EDGE_TESTS
183+
float3 GetTileVertex(uint2 viTilLL, uint2 viTilUR, int i, float fTileFarPlane)
184+
{
185+
float x = (i&1)==0 ? viTilLL.x : viTilUR.x;
186+
float y = (i&2)==0 ? viTilLL.y : viTilUR.y;
187+
float z = (i&4)==0 ? g_fNearPlane : fTileFarPlane;
188+
#if !USE_LEFTHAND_CAMERASPACE
189+
z = -z;
190+
#endif
191+
return GetViewPosFromLinDepth( float2(x, y), z);
192+
}
193+
194+
void GetFrustEdge(out float3 vP0, out float3 vE0, const int e0, uint2 viTilLL, uint2 viTilUR, float fTileFarPlane)
195+
{
196+
int iSection = e0>>2; // section 0 is side edges, section 1 is near edges and section 2 is far edges
197+
int iSwizzle = e0&0x3;
198+
199+
int i=iSwizzle + (2*(iSection&0x2)); // offset by 4 at section 2
200+
vP0 = GetTileVertex(uint2(viTilLL.x, viTilUR.y), uint2(viTilUR.x, viTilLL.y), i, fTileFarPlane);
201+
vE0 = iSection==0 ? vP0 : (((iSwizzle&0x2)==0 ? 1.0f : (-1.0f))*((iSwizzle&0x1)==(iSwizzle>>1) ? float3(1,0,0) : float3(0,1,0)));
202+
}
203+
204+
void CullByExactEdgeTests(uint threadID, int iNrCoarseLights, uint2 viTilLL, uint2 viTilUR)
205+
{
206+
const bool bOnlyNeedFrustumSideEdges = true;
207+
const int nrFrustEdges = bOnlyNeedFrustumSideEdges ? 4 : 8; // max 8 since we never need to test 4 far edges of frustum since they are identical vectors to near edges and plane is placed at vP0 on light hull.
208+
209+
const int totNrEdgePairs = 12*nrFrustEdges;
210+
for(int l=0; l<iNrCoarseLights; l++)
211+
{
212+
const int idxCoarse = lightsListLDS[l];
213+
[branch]if(idxCoarse<(uint) g_iNrVisibLights && g_vLightData[idxCoarse].lightType!=SPHERE_LIGHT) // don't bother doing edge tests for sphere lights since these have camera aligned bboxes.
214+
{
215+
SFiniteLightBound lgtDat = g_data[idxCoarse];
216+
217+
const float3 boxX = lgtDat.boxAxisX.xyz;
218+
const float3 boxY = lgtDat.boxAxisY.xyz;
219+
const float3 boxZ = -lgtDat.boxAxisZ.xyz; // flip axis (so it points away from the light direction for a spot-light)
220+
const float3 center = lgtDat.center.xyz;
221+
const float2 scaleXY = lgtDat.scaleXY;
222+
223+
for(int i=threadID; i<totNrEdgePairs; i+=NR_THREADS)
224+
{
225+
int e0 = (int) (((uint)i)/((uint) nrFrustEdges)); // should become a shift right
226+
int e1 = i - e0*nrFrustEdges;
227+
228+
int idx_cur=0, idx_twin=0;
229+
float3 vP0, vE0;
230+
GetHullEdge(idx_cur, idx_twin, vP0, vE0, e0, boxX, boxY, boxZ, center, scaleXY);
231+
232+
233+
float3 vP1, vE1;
234+
GetFrustEdge(vP1, vE1, e1, viTilLL, viTilUR, g_fFarPlane);
235+
236+
// potential separation plane
237+
float3 vN = cross(vE0, vE1);
238+
239+
int positive=0, negative=0;
240+
for(int k=1; k<8; k++) // only need to test 7 verts (technically just 6).
241+
{
242+
int j = (idx_cur+k)&0x7;
243+
float3 vPh = GetHullVertex(boxX, boxY, boxZ, center, scaleXY, j);
244+
float fSignDist = idx_twin==j ? 0.0 : dot(vN, vPh-vP0);
245+
if(fSignDist>0) ++positive; else if(fSignDist<0) ++negative;
246+
}
247+
int resh = (positive>0 && negative>0) ? 0 : (positive>0 ? 1 : (negative>0 ? (-1) : 0));
248+
249+
positive=0; negative=0;
250+
for(int j=0; j<8; j++)
251+
{
252+
float3 vPf = GetTileVertex(viTilLL, viTilUR, j, g_fFarPlane);
253+
float fSignDist = dot(vN, vPf-vP0);
254+
if(fSignDist>0) ++positive; else if(fSignDist<0) ++negative;
255+
}
256+
int resf = (positive>0 && negative>0) ? 0 : (positive>0 ? 1 : (negative>0 ? (-1) : 0));
257+
258+
bool bFoundSepPlane = (resh*resf)<0;
259+
if(bFoundSepPlane) lightsListLDS[l]=0xffffffff;
260+
}
261+
}
262+
}
263+
#if !defined(SHADER_API_XBOXONE) && !defined(SHADER_API_PSSL)
264+
GroupMemoryBarrierWithGroupSync();
265+
#endif
266+
}
267+
#endif

0 commit comments

Comments
 (0)