@@ -395,6 +395,59 @@ namespace hlsl
395
395
return bit_cast<this_t>(data ^ ieee754::traits<float64_t>::signMask);
396
396
}
397
397
398
+ /**
399
+ * @brief Computes sqare root estimation.
400
+ *
401
+ * Can be less precise when FastMath is disabled.
402
+ * sqrt(inf) = inf
403
+ * sqrt(-0) = -0
404
+ * sqrt(NaN) = NaN
405
+ */
406
+ static this_t sqrt (this_t number)
407
+ {
408
+ bool isZero = !(number.data & 0x7FFFFFFFFFFFFFFFull);
409
+ if (isZero)
410
+ return number;
411
+
412
+ static const uint64_t MaxFloat64AsUint64 = 0x7FEFFFFFFFFFFFFFull;
413
+ if (number.data > MaxFloat64AsUint64)
414
+ {
415
+ bool isInf = cpp_compat_intrinsics_impl::isinf_uint_impl (number.data);
416
+ if (isInf)
417
+ return number;
418
+
419
+ // when (number.data > MaxFloat64AsUint64) and is not infinity, we can be sure that number is either NaN or negative
420
+ return bit_cast<this_t>(ieee754::traits<this_t>::quietNaN);
421
+ }
422
+
423
+ const float f32InverseSquareRoot = nbl::hlsl::rsqrt (_static_cast<float >(number));
424
+
425
+ // find sqrt approximation using the Newton-Raphson method
426
+ this_t inverseSquareRoot = _static_cast<this_t>(f32InverseSquareRoot);
427
+ const int Iterations = 5 ;
428
+ static const this_t Half = this_t::create (0.5f );
429
+ static const this_t ThreeHalfs = this_t::create (1.5f );
430
+ const this_t x2 = number * Half;
431
+ [[unroll]]
432
+ for (int i = 0 ; i < Iterations; ++i)
433
+ {
434
+ inverseSquareRoot = inverseSquareRoot * (ThreeHalfs - (x2 * inverseSquareRoot * inverseSquareRoot));
435
+ }
436
+
437
+ if (FastMath)
438
+ {
439
+ return this_t::create (1.0f ) / inverseSquareRoot;
440
+ }
441
+ else
442
+ {
443
+ // 2 Newton-Raphson iterations to increase precision
444
+ this_t squareRoot = this_t::create (1.0f ) / inverseSquareRoot;
445
+ squareRoot = Half * (squareRoot + number / squareRoot);
446
+ squareRoot = Half * (squareRoot + number / squareRoot);
447
+ return squareRoot;
448
+ }
449
+ }
450
+
398
451
NBL_CONSTEXPR_STATIC bool isFastMathSupported = FastMath;
399
452
};
400
453
0 commit comments