Skip to content

Commit 4075ab6

Browse files
committed
Refactored ef64 sqrt algorithm
1 parent 027662d commit 4075ab6

File tree

1 file changed

+32
-19
lines changed

1 file changed

+32
-19
lines changed

include/nbl/builtin/hlsl/emulated/float64_t.hlsl

Lines changed: 32 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -395,44 +395,57 @@ namespace hlsl
395395
return bit_cast<this_t>(data ^ ieee754::traits<float64_t>::signMask);
396396
}
397397

398+
/**
399+
* @brief Computes sqare root estimation.
400+
*
401+
* Can be less precise when FastMath is disabled.
402+
* sqrt(inf) = inf
403+
* sqrt(-0) = -0
404+
* sqrt(NaN) = NaN
405+
*/
398406
static this_t sqrt(this_t number)
399407
{
400-
// so it doesn't return NaN for -0.0
401408
bool isZero = !(number.data & 0x7FFFFFFFFFFFFFFFull);
402409
if (isZero)
403410
return number;
404411

405-
bool isNegative = (number.data >> 63) > 0;
406-
if (isNegative)
407-
return bit_cast<this_t>(ieee754::traits<this_t>::quietNaN);
408-
409-
if(!FastMath)
412+
static const uint64_t MaxFloat64AsUint64 = 0x7FEFFFFFFFFFFFFFull;
413+
if (number.data > MaxFloat64AsUint64)
410414
{
411415
bool isInf = cpp_compat_intrinsics_impl::isinf_uint_impl(number.data);
412416
if (isInf)
413417
return number;
414-
}
415418

416-
// find square root initial guess using the fast inverse square root algorithm
417-
nbl::hlsl::emulated_float64_t<true, true> invSquareRoot = number;
418-
{
419-
int64_t i = 0x5fe6eb50c7b537a9ull - (number.data >> 1);
420-
invSquareRoot.data = i;
421-
422-
nbl::hlsl::emulated_float64_t<true, true> threeHalfs = emulated_float64_t<true, true>::create(1.5);
423-
nbl::hlsl::emulated_float64_t<true, true> x2 = number * emulated_float64_t<true, true>::create(0.5);
424-
invSquareRoot = invSquareRoot * (threeHalfs - (x2 * invSquareRoot * invSquareRoot));
419+
// when (number.data > MaxFloat64AsUint64) and is not infinity, we can be sure that number is either NaN or negative
420+
return bit_cast<this_t>(ieee754::traits<this_t>::quietNaN);
425421
}
426422

423+
const float f32InverseSquareRoot = nbl::hlsl::rsqrt(_static_cast<float>(number));
424+
427425
// find sqrt approximation using the Newton-Raphson method
428-
nbl::hlsl::emulated_float64_t<true, true> squareRoot = nbl::hlsl::emulated_float64_t<true, true>::create(1.0) / invSquareRoot;
426+
this_t inverseSquareRoot = _static_cast<this_t>(f32InverseSquareRoot);
429427
const int Iterations = 5;
428+
static const this_t Half = this_t::create(0.5f);
429+
static const this_t ThreeHalfs = this_t::create(1.5f);
430+
const this_t x2 = number * Half;
431+
[[unroll]]
430432
for (int i = 0; i < Iterations; ++i)
431433
{
432-
squareRoot = nbl::hlsl::emulated_float64_t<true, true>::create(0.5) * (squareRoot + number / squareRoot);
434+
inverseSquareRoot = inverseSquareRoot * (ThreeHalfs - (x2 * inverseSquareRoot * inverseSquareRoot));
433435
}
434436

435-
return squareRoot;
437+
if (FastMath)
438+
{
439+
return this_t::create(1.0f) / inverseSquareRoot;
440+
}
441+
else
442+
{
443+
// 2 Newton-Raphson iterations to increase precision
444+
this_t squareRoot = this_t::create(1.0f) / inverseSquareRoot;
445+
squareRoot = Half * (squareRoot + number / squareRoot);
446+
squareRoot = Half * (squareRoot + number / squareRoot);
447+
return squareRoot;
448+
}
436449
}
437450

438451
NBL_CONSTEXPR_STATIC bool isFastMathSupported = FastMath;

0 commit comments

Comments
 (0)