Skip to content

Commit 4042823

Browse files
authored
Merge pull request #909 from Devsh-Graphics-Programming/ef64_length
Added length function for emulated vectors
2 parents 1dfd3ab + 4075ab6 commit 4042823

File tree

3 files changed

+67
-1
lines changed

3 files changed

+67
-1
lines changed

include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -631,6 +631,19 @@ struct bitReverseAs_helper<T NBL_PARTIAL_REQ_BOT(concepts::UnsignedIntegralScala
631631
}
632632
};
633633

634+
#define VECTORIAL_SPECIALIZATION_CONCEPT concepts::Vectorial<T> && !is_vector_v<T>
635+
template<typename T>
636+
NBL_PARTIAL_REQ_TOP(VECTORIAL_SPECIALIZATION_CONCEPT)
637+
struct length_helper<T NBL_PARTIAL_REQ_BOT(VECTORIAL_SPECIALIZATION_CONCEPT) >
638+
{
639+
using scalar_t = typename vector_traits<T>::scalar_type;
640+
static inline scalar_t __call(NBL_CONST_REF_ARG(T) vec)
641+
{
642+
return scalar_t::sqrt(dot_helper<T>::__call(vec, vec));
643+
}
644+
};
645+
#undef VECTORIAL_SPECIALIZATION_CONCEPT
646+
634647
#ifdef __HLSL_VERSION
635648
// SPIR-V already defines specializations for builtin vector types
636649
#define VECTOR_SPECIALIZATION_CONCEPT concepts::Vectorial<T> && !is_vector_v<T>

include/nbl/builtin/hlsl/emulated/float64_t.hlsl

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -395,6 +395,59 @@ namespace hlsl
395395
return bit_cast<this_t>(data ^ ieee754::traits<float64_t>::signMask);
396396
}
397397

398+
/**
399+
* @brief Computes sqare root estimation.
400+
*
401+
* Can be less precise when FastMath is disabled.
402+
* sqrt(inf) = inf
403+
* sqrt(-0) = -0
404+
* sqrt(NaN) = NaN
405+
*/
406+
static this_t sqrt(this_t number)
407+
{
408+
bool isZero = !(number.data & 0x7FFFFFFFFFFFFFFFull);
409+
if (isZero)
410+
return number;
411+
412+
static const uint64_t MaxFloat64AsUint64 = 0x7FEFFFFFFFFFFFFFull;
413+
if (number.data > MaxFloat64AsUint64)
414+
{
415+
bool isInf = cpp_compat_intrinsics_impl::isinf_uint_impl(number.data);
416+
if (isInf)
417+
return number;
418+
419+
// when (number.data > MaxFloat64AsUint64) and is not infinity, we can be sure that number is either NaN or negative
420+
return bit_cast<this_t>(ieee754::traits<this_t>::quietNaN);
421+
}
422+
423+
const float f32InverseSquareRoot = nbl::hlsl::rsqrt(_static_cast<float>(number));
424+
425+
// find sqrt approximation using the Newton-Raphson method
426+
this_t inverseSquareRoot = _static_cast<this_t>(f32InverseSquareRoot);
427+
const int Iterations = 5;
428+
static const this_t Half = this_t::create(0.5f);
429+
static const this_t ThreeHalfs = this_t::create(1.5f);
430+
const this_t x2 = number * Half;
431+
[[unroll]]
432+
for (int i = 0; i < Iterations; ++i)
433+
{
434+
inverseSquareRoot = inverseSquareRoot * (ThreeHalfs - (x2 * inverseSquareRoot * inverseSquareRoot));
435+
}
436+
437+
if (FastMath)
438+
{
439+
return this_t::create(1.0f) / inverseSquareRoot;
440+
}
441+
else
442+
{
443+
// 2 Newton-Raphson iterations to increase precision
444+
this_t squareRoot = this_t::create(1.0f) / inverseSquareRoot;
445+
squareRoot = Half * (squareRoot + number / squareRoot);
446+
squareRoot = Half * (squareRoot + number / squareRoot);
447+
return squareRoot;
448+
}
449+
}
450+
398451
NBL_CONSTEXPR_STATIC bool isFastMathSupported = FastMath;
399452
};
400453

0 commit comments

Comments
 (0)