Skip to content

Commit 230a3de

Browse files
author
devsh
committed
Merge remote-tracking branch 'remotes/origin/mesh_loaders_kevin'
2 parents 4042823 + a6ef74c commit 230a3de

File tree

17 files changed

+1032
-971
lines changed

17 files changed

+1032
-971
lines changed

include/nbl/asset/ECommonEnums.h

Lines changed: 4 additions & 283 deletions
Large diffs are not rendered by default.

include/nbl/asset/IAccelerationStructure.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ namespace nbl::asset
2323
class IAccelerationStructure : public virtual core::IReferenceCounted
2424
{
2525
public:
26+
static constexpr inline size_t TransformDataMinAlignment = 16;
27+
2628
// build flags, we don't expose flags that don't make sense for certain levels
2729
enum class BUILD_FLAGS : uint8_t
2830
{

include/nbl/asset/IBuffer.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ struct SBufferRange
100100
inline operator SBufferRange<const BufferType>&() {return *reinterpret_cast<SBufferRange<const BufferType>*>(this);}
101101
inline operator const SBufferRange<const BufferType>&() const {return *reinterpret_cast<const SBufferRange<const BufferType>*>(this);}
102102

103-
template<typename BT> requires std::is_same_v<std::remove_const_t<BT>,BufferType>
103+
template<typename BT> requires (std::is_const_v<BT> && std::is_base_of_v<IBuffer,std::remove_const_t<BT>>)
104104
inline operator SBufferBinding<BT>() const { return {.offset=offset,.buffer=buffer}; }
105105

106106
explicit inline operator bool() const {return isValid();}

include/nbl/asset/IPolygonGeometry.h

Lines changed: 21 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,26 @@ class IPolygonGeometry : public IIndexableGeometry<BufferType>, public IPolygonG
203203
// For User defined semantics
204204
inline const core::vector<SDataView>& getAuxAttributeViews() const {return m_auxAttributeViews;}
205205

206+
inline E_INDEX_TYPE getIndexType() const
207+
{
208+
auto indexType = EIT_UNKNOWN;
209+
// disallowed index format
210+
if (base_t::m_indexView)
211+
{
212+
switch (base_t::m_indexView.composed.format)
213+
{
214+
case EF_R16_UINT:
215+
indexType = EIT_16BIT;
216+
break;
217+
case EF_R32_UINT: [[fallthrough]];
218+
indexType = EIT_32BIT;
219+
break;
220+
default:
221+
break;
222+
}
223+
}
224+
return indexType;
225+
}
206226

207227
// Does not set the `transform` or `geometryFlags` fields, because it doesn't care about it.
208228
// Also won't set second set of vertex data, opacity mipmaps, etc.
@@ -212,30 +232,12 @@ class IPolygonGeometry : public IIndexableGeometry<BufferType>, public IPolygonG
212232
// must be a triangle list, but don't want to compare pointers
213233
if (m_indexing && m_indexing->knownTopology()==EPT_TRIANGLE_LIST)// && m_indexing->degree() == TriangleList()->degree() && m_indexing->rate() == TriangleList->rate())
214234
{
215-
auto indexType = EIT_UNKNOWN;
216-
// disallowed index format
217-
if (base_t::m_indexView)
218-
{
219-
switch (base_t::m_indexView.composed.format)
220-
{
221-
case EF_R16_UINT:
222-
indexType = EIT_16BIT;
223-
break;
224-
case EF_R32_UINT: [[fallthrough]];
225-
indexType = EIT_32BIT;
226-
break;
227-
default:
228-
break;
229-
}
230-
if (indexType==EIT_UNKNOWN)
231-
return retval;
232-
}
233235
retval.vertexData[0] = base_t::m_positionView.src;
234236
retval.indexData = base_t::m_indexView.src;
235237
retval.maxVertex = base_t::m_positionView.getElementCount() - 1;
236238
retval.vertexStride = base_t::m_positionView.composed.getStride();
237239
retval.vertexFormat = base_t::m_positionView.composed.format;
238-
retval.indexType = indexType;
240+
retval.indexType = getIndexType();
239241
}
240242
return retval;
241243
}

include/nbl/asset/utils/CDirQuantCacheBase.h

Lines changed: 91 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -43,25 +43,26 @@ class CDirQuantCacheBase
4343

4444
Vector8u3() : x(0u),y(0u),z(0u) {}
4545
Vector8u3(const Vector8u3&) = default;
46-
explicit Vector8u3(const core::vectorSIMDu32& val)
46+
explicit Vector8u3(const hlsl::uint32_t4& val)
4747
{
4848
operator=(val);
4949
}
5050

5151
Vector8u3& operator=(const Vector8u3&) = default;
52-
Vector8u3& operator=(const core::vectorSIMDu32& val)
52+
Vector8u3& operator=(const hlsl::uint32_t4& val)
5353
{
5454
x = val.x;
5555
y = val.y;
5656
z = val.z;
5757
return *this;
5858
}
5959

60-
inline core::vectorSIMDu32 getValue() const
60+
hlsl::uint32_t4 getValue() const
6161
{
62-
return core::vectorSIMDu32(x,y,z);
62+
return { x, y, z, 0 };
6363
}
6464

65+
6566
private:
6667
uint8_t x;
6768
uint8_t y;
@@ -74,13 +75,13 @@ class CDirQuantCacheBase
7475

7576
Vector8u4() : x(0u),y(0u),z(0u),w(0u) {}
7677
Vector8u4(const Vector8u4&) = default;
77-
explicit Vector8u4(const core::vectorSIMDu32& val)
78+
explicit Vector8u4(const hlsl::uint32_t4& val)
7879
{
7980
operator=(val);
8081
}
8182

8283
Vector8u4& operator=(const Vector8u4&) = default;
83-
Vector8u4& operator=(const core::vectorSIMDu32& val)
84+
Vector8u4& operator=(const hlsl::uint32_t4& val)
8485
{
8586
x = val.x;
8687
y = val.y;
@@ -89,9 +90,9 @@ class CDirQuantCacheBase
8990
return *this;
9091
}
9192

92-
inline core::vectorSIMDu32 getValue() const
93+
hlsl::uint32_t4 getValue() const
9394
{
94-
return core::vectorSIMDu32(x,y,z,w);
95+
return { x, y, z, w };
9596
}
9697

9798
private:
@@ -108,16 +109,16 @@ class CDirQuantCacheBase
108109

109110
Vector1010102() : storage(0u) {}
110111
Vector1010102(const Vector1010102&) = default;
111-
explicit Vector1010102(const core::vectorSIMDu32& val)
112+
explicit Vector1010102(const hlsl::uint32_t4& val)
112113
{
113114
operator=(val);
114115
}
115116

116117
Vector1010102& operator=(const Vector1010102&) = default;
117-
Vector1010102& operator=(const core::vectorSIMDu32& val)
118+
Vector1010102& operator=(const hlsl::uint32_t4& val)
118119
{
119-
constexpr auto storageBits = quantizationBits+1u;
120-
storage = val.x|(val.y<<storageBits)|(val.z<<(storageBits*2u));
120+
constexpr auto storageBits = quantizationBits + 1u;
121+
storage = val.x | (val.y << storageBits) | (val.z << (storageBits * 2u));
121122
return *this;
122123
}
123124

@@ -130,13 +131,13 @@ class CDirQuantCacheBase
130131
return storage==other.storage;
131132
}
132133

133-
inline core::vectorSIMDu32 getValue() const
134+
hlsl::uint32_t4 getValue() const
134135
{
135-
constexpr auto storageBits = quantizationBits+1u;
136-
const core::vectorSIMDu32 mask((0x1u<<storageBits)-1u);
137-
return core::vectorSIMDu32(storage,storage>>storageBits,storage>>(storageBits*2u))&mask;
136+
constexpr auto storageBits = quantizationBits + 1u;
137+
const auto mask = (0x1u << storageBits) - 1u;
138+
return { storage & mask, (storage >> storageBits) & mask, (storage >> (storageBits * 2)) & mask, 0};
138139
}
139-
140+
140141
private:
141142
uint32_t storage;
142143
};
@@ -149,25 +150,25 @@ class CDirQuantCacheBase
149150

150151
Vector16u3() : x(0u),y(0u),z(0u) {}
151152
Vector16u3(const Vector16u3&) = default;
152-
explicit Vector16u3(const core::vectorSIMDu32& val)
153+
explicit Vector16u3(const hlsl::uint32_t4& val)
153154
{
154155
operator=(val);
155156
}
156157

157158
Vector16u3& operator=(const Vector16u3&) = default;
158-
Vector16u3& operator=(const core::vectorSIMDu32& val)
159+
Vector16u3& operator=(const hlsl::uint32_t4& val)
159160
{
160161
x = val.x;
161162
y = val.y;
162163
z = val.z;
163164
return *this;
164165
}
165166

166-
inline core::vectorSIMDu32 getValue() const
167+
hlsl::uint32_t4 getValue() const
167168
{
168-
return core::vectorSIMDu32(x,y,z);
169+
return { x, y, z, 0 };
169170
}
170-
171+
171172
private:
172173
uint16_t x;
173174
uint16_t y;
@@ -180,13 +181,13 @@ class CDirQuantCacheBase
180181

181182
Vector16u4() : x(0u),y(0u),z(0u),w(0u) {}
182183
Vector16u4(const Vector16u4&) = default;
183-
explicit Vector16u4(const core::vectorSIMDu32& val)
184+
explicit Vector16u4(const hlsl::uint32_t4& val)
184185
{
185186
operator=(val);
186187
}
187188

188189
Vector16u4& operator=(const Vector16u4&) = default;
189-
Vector16u4& operator=(const core::vectorSIMDu32& val)
190+
Vector16u4& operator=(const hlsl::uint32_t4& val)
190191
{
191192
x = val.x;
192193
y = val.y;
@@ -195,11 +196,11 @@ class CDirQuantCacheBase
195196
return *this;
196197
}
197198

198-
inline core::vectorSIMDu32 getValue() const
199+
hlsl::float32_t4 getValue() const
199200
{
200-
return core::vectorSIMDu32(x,y,z,w);
201+
return { x, y, z, w };
201202
}
202-
203+
203204
private:
204205
uint16_t x;
205206
uint16_t y;
@@ -377,11 +378,30 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl::
377378
std::tuple<cache_type_t<Formats>...> cache;
378379

379380
template<uint32_t dimensions, E_FORMAT CacheFormat>
380-
value_type_t<CacheFormat> quantize(const core::vectorSIMDf& value)
381+
value_type_t<CacheFormat> quantize(const hlsl::vector<hlsl::float32_t, dimensions>& value)
381382
{
382-
const auto negativeMask = value < core::vectorSIMDf(0.0f);
383+
using float32_tN = hlsl::vector<hlsl::float32_t, dimensions>;
384+
385+
auto to_vec_t4 = []<typename T>(hlsl::vector<T, dimensions> src, T padValue) -> hlsl::vector<T, 4>
386+
{
387+
if constexpr(dimensions == 1)
388+
{
389+
return {src.x, padValue, padValue, padValue};
390+
} else if constexpr (dimensions == 2)
391+
{
392+
return {src.x, src.y, padValue, padValue};
393+
} else if constexpr (dimensions == 3)
394+
{
395+
return {src.x, src.y, src.z, padValue};
396+
} else if constexpr (dimensions == 4)
397+
{
398+
return {src.x, src.y, src.z, src.w};
399+
}
400+
};
401+
402+
const auto negativeMask = to_vec_t4(lessThan(value, float32_tN(0.0f)), false);
383403

384-
const core::vectorSIMDf absValue = abs(value);
404+
const float32_tN absValue = abs(value);
385405
const auto key = Key(absValue);
386406

387407
constexpr auto quantizationBits = quantization_bits_v<CacheFormat>;
@@ -393,32 +413,50 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl::
393413
quantized = found->second;
394414
else
395415
{
396-
const core::vectorSIMDf fit = findBestFit<dimensions,quantizationBits>(absValue);
416+
const auto fit = findBestFit<dimensions,quantizationBits>(absValue);
417+
418+
const auto abs_fit = to_vec_t4(abs(fit), 0.f);
419+
quantized = hlsl::uint32_t4(abs_fit.x, abs_fit.y, abs_fit.z, abs_fit.w);
397420

398-
quantized = core::vectorSIMDu32(core::abs(fit));
399421
insertIntoCache<CacheFormat>(key,quantized);
400422
}
401423
}
402424

403-
const core::vectorSIMDu32 xorflag((0x1u<<(quantizationBits+1u))-1u);
404-
auto restoredAsVec = quantized.getValue()^core::mix(core::vectorSIMDu32(0u),xorflag,negativeMask);
405-
restoredAsVec += core::mix(core::vectorSIMDu32(0u),core::vectorSIMDu32(1u),negativeMask);
406-
return value_type_t<CacheFormat>(restoredAsVec&xorflag);
425+
auto select = [](hlsl::uint32_t4 val1, hlsl::uint32_t4 val2, hlsl::bool4 mask)
426+
{
427+
hlsl::uint32_t4 retval;
428+
retval.x = mask.x ? val2.x : val1.x;
429+
retval.y = mask.y ? val2.y : val1.y;
430+
retval.z = mask.z ? val2.z : val1.z;
431+
retval.w = mask.w ? val2.w : val1.w;
432+
return retval;
433+
};
434+
;
435+
// create all one bits
436+
const hlsl::uint32_t4 xorflag((0x1u << (quantizationBits + 1u)) - 1u);
437+
438+
// for positive number xoring with 0 keep its value
439+
// for negative number we xor with all one which will flip the bits, then we add one later. Flipping the bits then adding one will turn positive number into negative number
440+
auto restoredAsVec = quantized.getValue() ^ select(hlsl::uint32_t4(0u), hlsl::uint32_t4(xorflag), negativeMask);
441+
restoredAsVec += hlsl::uint32_t4(negativeMask);
442+
443+
return value_type_t<CacheFormat>(restoredAsVec);
407444
}
408445

409446
template<uint32_t dimensions, uint32_t quantizationBits>
410-
static inline core::vectorSIMDf findBestFit(const core::vectorSIMDf& value)
447+
static inline hlsl::vector<hlsl::float32_t, dimensions> findBestFit(const hlsl::vector<hlsl::float32_t, dimensions>& value)
411448
{
449+
using float32_tN = hlsl::vector<hlsl::float32_t, dimensions>;
412450
static_assert(dimensions>1u,"No point");
413451
static_assert(dimensions<=4u,"High Dimensions are Hard!");
414-
// precise normalize
415-
const auto vectorForDots = value.preciseDivision(length(value));
452+
453+
const auto vectorForDots = hlsl::normalize(value);
416454

417455
//
418-
core::vectorSIMDf fittingVector;
419-
core::vectorSIMDf floorOffset;
456+
float32_tN fittingVector;
457+
float32_tN floorOffset = {};
420458
constexpr uint32_t cornerCount = (0x1u<<(dimensions-1u))-1u;
421-
core::vectorSIMDf corners[cornerCount] = {};
459+
float32_tN corners[cornerCount] = {};
422460
{
423461
uint32_t maxDirCompIndex = 0u;
424462
for (auto i=1u; i<dimensions; i++)
@@ -430,9 +468,9 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl::
430468
if (maxDirectionComp < std::sqrtf(0.9998f / float(dimensions)))
431469
{
432470
_NBL_DEBUG_BREAK_IF(true);
433-
return core::vectorSIMDf(0.f);
471+
return float32_tN(0.f);
434472
}
435-
fittingVector = value.preciseDivision(core::vectorSIMDf(maxDirectionComp));
473+
fittingVector = value / maxDirectionComp;
436474
floorOffset[maxDirCompIndex] = 0.499f;
437475
const uint32_t localCorner[7][3] = {
438476
{1,0,0},
@@ -452,12 +490,12 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl::
452490
}
453491
}
454492

455-
core::vectorSIMDf bestFit;
493+
float32_tN bestFit;
456494
float closestTo1 = -1.f;
457-
auto evaluateFit = [&](const core::vectorSIMDf& newFit) -> void
495+
auto evaluateFit = [&](const float32_tN& newFit) -> void
458496
{
459-
auto newFitLen = core::length(newFit);
460-
const float dp = core::dot<core::vectorSIMDf>(newFit,vectorForDots).preciseDivision(newFitLen)[0];
497+
auto newFitLen = length(newFit);
498+
const float dp = hlsl::dot(newFit,vectorForDots) / (newFitLen);
461499
if (dp > closestTo1)
462500
{
463501
closestTo1 = dp;
@@ -466,18 +504,18 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl::
466504
};
467505

468506
constexpr uint32_t cubeHalfSize = (0x1u << quantizationBits) - 1u;
469-
const core::vectorSIMDf cubeHalfSizeND = core::vectorSIMDf(cubeHalfSize);
507+
const float32_tN cubeHalfSizeND = hlsl::promote<float32_tN>(cubeHalfSize);
470508
for (uint32_t n=cubeHalfSize; n>0u; n--)
471509
{
472510
//we'd use float addition in the interest of speed, to increment the loop
473511
//but adding a small number to a large one loses precision, so multiplication preferrable
474-
core::vectorSIMDf bottomFit = core::floor(fittingVector*float(n)+floorOffset);
475-
if ((bottomFit<=cubeHalfSizeND).all())
512+
const auto bottomFit = glm::floor(fittingVector * float(n) + floorOffset);
513+
if (hlsl::all(glm::lessThanEqual(bottomFit, cubeHalfSizeND)))
476514
evaluateFit(bottomFit);
477-
for (auto i=0u; i<cornerCount; i++)
515+
for (auto i = 0u; i < cornerCount; i++)
478516
{
479517
auto bottomFitTmp = bottomFit+corners[i];
480-
if ((bottomFitTmp<=cubeHalfSizeND).all())
518+
if (hlsl::all(glm::lessThanEqual(bottomFitTmp, cubeHalfSizeND)))
481519
evaluateFit(bottomFitTmp);
482520
}
483521
}

0 commit comments

Comments
 (0)