Merge pull request #12 from EvgeniiG/master

sebastienlagarde · web-flow · commit e6ff29d972c4 · 2016-11-17T14:21:54.000+01:00
Use a new impl. of FastACos for area lights and fix the reference IBL impl.
diff --git a/Assets/ScriptableRenderLoop/HDRenderLoop/Material/Lit/Lit.hlsl b/Assets/ScriptableRenderLoop/HDRenderLoop/Material/Lit/Lit.hlsl
@@ -506,8 +506,7 @@ PreLightData GetPreLightData(float3 V, float3 positionWS, Coordinate coord, BSDF
 
     // Area light specific
     // UVs for sampling the LUTs
-    // TODO: Test with fastAcos
-    float theta = acos(dot(bsdfData.normalWS, V));
+    float theta = FastACos(dot(bsdfData.normalWS, V));
     // Scale and bias for the current precomputed table - the constant use here are the one that have been use when the table in LtcData.DisneyDiffuse.cs and LtcData.GGX.cs was use
     float2 uv = 0.0078125 + 0.984375 * float2(bsdfData.perceptualRoughness, theta * INV_HALF_PI);
 
@@ -872,7 +871,6 @@ void EvaluateBSDF_Area( LightLoopContext lightLoopContext,
         }
 
     #ifndef DIFFUSE_LAMBERT_BRDF
-        // TODO: verify that we do not need to multiply by PI.
         ltcValue *= preLightData.ltcDisneyDiffuseMagnitude;
     #endif
 
@@ -889,9 +887,6 @@ void EvaluateBSDF_Area( LightLoopContext lightLoopContext,
         ltcValue *= lightData.specularScale;
         specularLighting = fresnelTerm * lightData.color * ltcValue;
     }
-
-    // TODO: current area light code doesn't take into account artist attenuation radius!
-
 #endif
 }
 
@@ -905,13 +900,13 @@ float3 IntegrateLambertIBLRef(  LightLoopContext lightLoopContext,
                                 uint sampleCount = 2048)
 {
     float3 N        = bsdfData.normalWS;
+    float3 tangentX = bsdfData.tangentWS;
+    float3 tangentY = bsdfData.bitangentWS;
     float3 acc      = float3(0.0, 0.0, 0.0);
+
     // Add some jittering on Hammersley2d
     float2 randNum  = InitRandom(N.xy * 0.5 + 0.5);
 
-    float3 tangentX, tangentY;
-    GetLocalFrame(N, tangentX, tangentY);
-
     for (uint i = 0; i < sampleCount; ++i)
     {
         float2 u    = Hammersley2d(i, sampleCount);
@@ -938,15 +933,15 @@ float3 IntegrateDisneyDiffuseIBLRef(LightLoopContext lightLoopContext,
                                     float3 V, EnvLightData lightData, BSDFData bsdfData,
                                     uint sampleCount = 2048)
 {
-    float3 N = bsdfData.normalWS;
-    float NdotV = dot(N, V);
-    float3 acc  = float3(0.0, 0.0, 0.0);
+    float3 N        = bsdfData.normalWS;
+    float3 tangentX = bsdfData.tangentWS;
+    float3 tangentY = bsdfData.bitangentWS;
+    float  NdotV    = saturate(dot(N, V));
+    float3 acc      = float3(0.0, 0.0, 0.0);
+
     // Add some jittering on Hammersley2d
     float2 randNum  = InitRandom(N.xy * 0.5 + 0.5);
 
-    float3 tangentX, tangentY;
-    GetLocalFrame(N, tangentX, tangentY);
-
     for (uint i = 0; i < sampleCount; ++i)
     {
         float2 u    = Hammersley2d(i, sampleCount);
@@ -981,15 +976,14 @@ float3 IntegrateSpecularGGXIBLRef(  LightLoopContext lightLoopContext,
                                     uint sampleCount = 2048)
 {
     float3 N        = bsdfData.normalWS;
-    float NdotV     = saturate(dot(N, V));
+    float3 tangentX = bsdfData.tangentWS;
+    float3 tangentY = bsdfData.bitangentWS;
+    float  NdotV    = saturate(dot(N, V));
     float3 acc      = float3(0.0, 0.0, 0.0);
 
     // Add some jittering on Hammersley2d
     float2 randNum  = InitRandom(V.xy * 0.5 + 0.5);
 
-    float3 tangentX, tangentY;
-    GetLocalFrame(N, tangentX, tangentY);
-
     for (uint i = 0; i < sampleCount; ++i)
     {
         float2 u    = Hammersley2d(i, sampleCount);
@@ -1047,7 +1041,7 @@ void EvaluateBSDF_Env(  LightLoopContext lightLoopContext,
 */
     diffuseLighting = float3(0.0, 0.0, 0.0);
 
-    weight = float2(0.0, 0.0);
+    weight = float2(0.0, 1.0);
 
 #else
     // TODO: factor this code in common, so other material authoring don't require to rewrite everything, 
diff --git a/Assets/ScriptableRenderLoop/ShaderLibrary/AreaLighting.hlsl b/Assets/ScriptableRenderLoop/ShaderLibrary/AreaLighting.hlsl
@@ -4,12 +4,10 @@
 float IntegrateEdge(float3 v1, float3 v2)
 {
     float cosTheta = dot(v1, v2);
-    // TODO: Explain the 0.9999 <= precision is important!
-    cosTheta = Clamp(cosTheta, -0.9999, 0.9999);
-
-    // TODO: Experiment with fastAcos
-    float theta = acos(cosTheta);
-    float res = cross(v1, v2).z * theta / sin(theta);
+    // Clamp to avoid artifacts. This particular constant gives the best results.
+    cosTheta    = Clamp(cosTheta, -0.9999, 0.9999);
+    float theta = FastACos(cosTheta);
+    float res   = cross(v1, v2).z * theta / sin(theta);
 
     return res;
 }
diff --git a/Assets/ScriptableRenderLoop/ShaderLibrary/Common.hlsl b/Assets/ScriptableRenderLoop/ShaderLibrary/Common.hlsl
@@ -195,13 +195,15 @@ void GetCubeFaceID(float3 dir, out int faceIndex)
 
 #define MERGE_NAME(X, Y) X##Y
 
+// Acos in 14 cycles.
 // Ref: https://seblagarde.wordpress.com/2014/12/01/inverse-trigonometric-functions-gpu-optimization-for-amd-gcn-architecture/
 float FastACos(float inX)
 {
     float x = abs(inX);
-    float res = -0.156583 * x + HALF_PI;
-    res *= sqrt(1.0 - x);
-    return (inX >= 0) ? res : PI - res;
+    float res = (0.0468878 * x + -0.203471) * x + 1.570796; // p(x)
+    res *= sqrt(1.0f - x);
+
+    return (inX >= 0) ? res : PI - res; // Undo range reduction
 }
 
 // Same cost as Acos + 1 FR
diff --git a/Assets/ScriptableRenderLoop/ShaderLibrary/CommonLighting.hlsl b/Assets/ScriptableRenderLoop/ShaderLibrary/CommonLighting.hlsl
@@ -123,15 +123,13 @@ float3 MapCubeToSphere(float3 v, float r2)
     return v * sqrt((float3)r2 - 0.5 * v2.yzx - 0.5 * v2.zxy + vr3.yxx * v2.zzy);
 }
 
-// Computes the squared magnitude of the vector 'v' after mapping it
-// to a vector within the sphere of radius 'r', where r = sqrt(r2).
-// The vector is originally defined within the cube of dimensions [-r, r]^3.
-// The mapping is performed as per MapCubeToSphere().
-// 'dotV' is dot(v, v) (often calculated when calling such a function)
-float ComputeCubeToSphereMapSqMagnitude(float3 v, float dotV, float r2)
+// Computes the squared magnitude of the vector computed by MapCubeToSphere().
+float ComputeCubeToSphereMapSqMagnitude(float3 v, float r2)
 {
     float3 v2 = v * v;
-    return r2 * dotV - v2.x * v2.y - v2.y * v2.z - v2.z * v2.x + v2.x * v2.y * v2.z * rcp(r2);
+    // Note: dot(v, v) is often computed before this function is called,
+    // so the compiler should optimize and use the precomputed result here.
+    return r2 * dot(v, v) - v2.x * v2.y - v2.y * v2.z - v2.z * v2.x + v2.x * v2.y * v2.z * rcp(r2);
 }
 
 #endif // UNITY_COMMON_LIGHTING_INCLUDED

Original file line number	Diff line number	Diff line change
`@@ -123,15 +123,13 @@ float3 MapCubeToSphere(float3 v, float r2)`
`123`	`123`	`return v * sqrt((float3)r2 - 0.5 * v2.yzx - 0.5 * v2.zxy + vr3.yxx * v2.zzy);`
`124`	`124`	`}`
`125`	`125`
`126`		`-// Computes the squared magnitude of the vector 'v' after mapping it`
`127`		`-// to a vector within the sphere of radius 'r', where r = sqrt(r2).`
`128`		`-// The vector is originally defined within the cube of dimensions [-r, r]^3.`
`129`		`-// The mapping is performed as per MapCubeToSphere().`
`130`		`-// 'dotV' is dot(v, v) (often calculated when calling such a function)`
`131`		`-float ComputeCubeToSphereMapSqMagnitude(float3 v, float dotV, float r2)`
	`126`	`+// Computes the squared magnitude of the vector computed by MapCubeToSphere().`
	`127`	`+float ComputeCubeToSphereMapSqMagnitude(float3 v, float r2)`
`132`	`128`	`{`
`133`	`129`	`float3 v2 = v * v;`
`134`		`- return r2 * dotV - v2.x * v2.y - v2.y * v2.z - v2.z * v2.x + v2.x * v2.y * v2.z * rcp(r2);`
	`130`	`+ // Note: dot(v, v) is often computed before this function is called,`
	`131`	`+ // so the compiler should optimize and use the precomputed result here.`
	`132`	`+ return r2 * dot(v, v) - v2.x * v2.y - v2.y * v2.z - v2.z * v2.x + v2.x * v2.y * v2.z * rcp(r2);`
`135`	`133`	`}`
`136`	`134`
`137`	`135`	`#endif // UNITY_COMMON_LIGHTING_INCLUDED`