Skip to content

Commit e6ff29d

Browse files
Merge pull request #12 from EvgeniiG/master
Use a new impl. of FastACos for area lights and fix the reference IBL impl.
2 parents 706fa80 + 77dccff commit e6ff29d

File tree

4 files changed

+28
-36
lines changed

4 files changed

+28
-36
lines changed

Assets/ScriptableRenderLoop/HDRenderLoop/Material/Lit/Lit.hlsl

Lines changed: 14 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -506,8 +506,7 @@ PreLightData GetPreLightData(float3 V, float3 positionWS, Coordinate coord, BSDF
506506

507507
// Area light specific
508508
// UVs for sampling the LUTs
509-
// TODO: Test with fastAcos
510-
float theta = acos(dot(bsdfData.normalWS, V));
509+
float theta = FastACos(dot(bsdfData.normalWS, V));
511510
// Scale and bias for the current precomputed table - the constant use here are the one that have been use when the table in LtcData.DisneyDiffuse.cs and LtcData.GGX.cs was use
512511
float2 uv = 0.0078125 + 0.984375 * float2(bsdfData.perceptualRoughness, theta * INV_HALF_PI);
513512

@@ -872,7 +871,6 @@ void EvaluateBSDF_Area( LightLoopContext lightLoopContext,
872871
}
873872

874873
#ifndef DIFFUSE_LAMBERT_BRDF
875-
// TODO: verify that we do not need to multiply by PI.
876874
ltcValue *= preLightData.ltcDisneyDiffuseMagnitude;
877875
#endif
878876

@@ -889,9 +887,6 @@ void EvaluateBSDF_Area( LightLoopContext lightLoopContext,
889887
ltcValue *= lightData.specularScale;
890888
specularLighting = fresnelTerm * lightData.color * ltcValue;
891889
}
892-
893-
// TODO: current area light code doesn't take into account artist attenuation radius!
894-
895890
#endif
896891
}
897892

@@ -905,13 +900,13 @@ float3 IntegrateLambertIBLRef( LightLoopContext lightLoopContext,
905900
uint sampleCount = 2048)
906901
{
907902
float3 N = bsdfData.normalWS;
903+
float3 tangentX = bsdfData.tangentWS;
904+
float3 tangentY = bsdfData.bitangentWS;
908905
float3 acc = float3(0.0, 0.0, 0.0);
906+
909907
// Add some jittering on Hammersley2d
910908
float2 randNum = InitRandom(N.xy * 0.5 + 0.5);
911909

912-
float3 tangentX, tangentY;
913-
GetLocalFrame(N, tangentX, tangentY);
914-
915910
for (uint i = 0; i < sampleCount; ++i)
916911
{
917912
float2 u = Hammersley2d(i, sampleCount);
@@ -938,15 +933,15 @@ float3 IntegrateDisneyDiffuseIBLRef(LightLoopContext lightLoopContext,
938933
float3 V, EnvLightData lightData, BSDFData bsdfData,
939934
uint sampleCount = 2048)
940935
{
941-
float3 N = bsdfData.normalWS;
942-
float NdotV = dot(N, V);
943-
float3 acc = float3(0.0, 0.0, 0.0);
936+
float3 N = bsdfData.normalWS;
937+
float3 tangentX = bsdfData.tangentWS;
938+
float3 tangentY = bsdfData.bitangentWS;
939+
float NdotV = saturate(dot(N, V));
940+
float3 acc = float3(0.0, 0.0, 0.0);
941+
944942
// Add some jittering on Hammersley2d
945943
float2 randNum = InitRandom(N.xy * 0.5 + 0.5);
946944

947-
float3 tangentX, tangentY;
948-
GetLocalFrame(N, tangentX, tangentY);
949-
950945
for (uint i = 0; i < sampleCount; ++i)
951946
{
952947
float2 u = Hammersley2d(i, sampleCount);
@@ -981,15 +976,14 @@ float3 IntegrateSpecularGGXIBLRef( LightLoopContext lightLoopContext,
981976
uint sampleCount = 2048)
982977
{
983978
float3 N = bsdfData.normalWS;
984-
float NdotV = saturate(dot(N, V));
979+
float3 tangentX = bsdfData.tangentWS;
980+
float3 tangentY = bsdfData.bitangentWS;
981+
float NdotV = saturate(dot(N, V));
985982
float3 acc = float3(0.0, 0.0, 0.0);
986983

987984
// Add some jittering on Hammersley2d
988985
float2 randNum = InitRandom(V.xy * 0.5 + 0.5);
989986

990-
float3 tangentX, tangentY;
991-
GetLocalFrame(N, tangentX, tangentY);
992-
993987
for (uint i = 0; i < sampleCount; ++i)
994988
{
995989
float2 u = Hammersley2d(i, sampleCount);
@@ -1047,7 +1041,7 @@ void EvaluateBSDF_Env( LightLoopContext lightLoopContext,
10471041
*/
10481042
diffuseLighting = float3(0.0, 0.0, 0.0);
10491043

1050-
weight = float2(0.0, 0.0);
1044+
weight = float2(0.0, 1.0);
10511045

10521046
#else
10531047
// TODO: factor this code in common, so other material authoring don't require to rewrite everything,

Assets/ScriptableRenderLoop/ShaderLibrary/AreaLighting.hlsl

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,10 @@
44
float IntegrateEdge(float3 v1, float3 v2)
55
{
66
float cosTheta = dot(v1, v2);
7-
// TODO: Explain the 0.9999 <= precision is important!
8-
cosTheta = Clamp(cosTheta, -0.9999, 0.9999);
9-
10-
// TODO: Experiment with fastAcos
11-
float theta = acos(cosTheta);
12-
float res = cross(v1, v2).z * theta / sin(theta);
7+
// Clamp to avoid artifacts. This particular constant gives the best results.
8+
cosTheta = Clamp(cosTheta, -0.9999, 0.9999);
9+
float theta = FastACos(cosTheta);
10+
float res = cross(v1, v2).z * theta / sin(theta);
1311

1412
return res;
1513
}

Assets/ScriptableRenderLoop/ShaderLibrary/Common.hlsl

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -195,13 +195,15 @@ void GetCubeFaceID(float3 dir, out int faceIndex)
195195

196196
#define MERGE_NAME(X, Y) X##Y
197197

198+
// Acos in 14 cycles.
198199
// Ref: https://seblagarde.wordpress.com/2014/12/01/inverse-trigonometric-functions-gpu-optimization-for-amd-gcn-architecture/
199200
float FastACos(float inX)
200201
{
201202
float x = abs(inX);
202-
float res = -0.156583 * x + HALF_PI;
203-
res *= sqrt(1.0 - x);
204-
return (inX >= 0) ? res : PI - res;
203+
float res = (0.0468878 * x + -0.203471) * x + 1.570796; // p(x)
204+
res *= sqrt(1.0f - x);
205+
206+
return (inX >= 0) ? res : PI - res; // Undo range reduction
205207
}
206208

207209
// Same cost as Acos + 1 FR

Assets/ScriptableRenderLoop/ShaderLibrary/CommonLighting.hlsl

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -123,15 +123,13 @@ float3 MapCubeToSphere(float3 v, float r2)
123123
return v * sqrt((float3)r2 - 0.5 * v2.yzx - 0.5 * v2.zxy + vr3.yxx * v2.zzy);
124124
}
125125

126-
// Computes the squared magnitude of the vector 'v' after mapping it
127-
// to a vector within the sphere of radius 'r', where r = sqrt(r2).
128-
// The vector is originally defined within the cube of dimensions [-r, r]^3.
129-
// The mapping is performed as per MapCubeToSphere().
130-
// 'dotV' is dot(v, v) (often calculated when calling such a function)
131-
float ComputeCubeToSphereMapSqMagnitude(float3 v, float dotV, float r2)
126+
// Computes the squared magnitude of the vector computed by MapCubeToSphere().
127+
float ComputeCubeToSphereMapSqMagnitude(float3 v, float r2)
132128
{
133129
float3 v2 = v * v;
134-
return r2 * dotV - v2.x * v2.y - v2.y * v2.z - v2.z * v2.x + v2.x * v2.y * v2.z * rcp(r2);
130+
// Note: dot(v, v) is often computed before this function is called,
131+
// so the compiler should optimize and use the precomputed result here.
132+
return r2 * dot(v, v) - v2.x * v2.y - v2.y * v2.z - v2.z * v2.x + v2.x * v2.y * v2.z * rcp(r2);
135133
}
136134

137135
#endif // UNITY_COMMON_LIGHTING_INCLUDED

0 commit comments

Comments
 (0)