From 4a37ba53b067bba3f04333423299fd4a34080086 Mon Sep 17 00:00:00 2001
From: kevyuu <kevin.kayu@gmail.com>
Date: Wed, 25 Jun 2025 18:29:12 +0700
Subject: [PATCH 01/40] Implement createCylinder, createCone, createSphere,
 CreateArrow

---
 src/nbl/asset/utils/CGeometryCreator.cpp | 1010 +++++++++++++++-------
 1 file changed, 684 insertions(+), 326 deletions(-)

diff --git a/src/nbl/asset/utils/CGeometryCreator.cpp b/src/nbl/asset/utils/CGeometryCreator.cpp
index c5c6ac6765..c25a222a53 100644
--- a/src/nbl/asset/utils/CGeometryCreator.cpp
+++ b/src/nbl/asset/utils/CGeometryCreator.cpp
@@ -15,6 +15,11 @@
 namespace nbl::asset
 {
 
+static uint8_t packSnorm(float val)
+{
+	return round(hlsl::clamp(val, -1.0f, 1.0f) * 127);
+}
+
 core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCube(const hlsl::float32_t3 size) const
 {
 	using namespace hlsl;
@@ -156,7 +161,7 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCube(const h
 	{
 		const hlsl::vector<int8_t, 3> norm[6] =
 		{
-			hlsl::vector<int8_t,3>(0, 0, 127),
+			hlsl::vector<int8_t,3>(0, 0, 1),
 			hlsl::vector<int8_t,3>(127, 0, 0),
 			hlsl::vector<int8_t,3>(0, 0,-127),
 			hlsl::vector<int8_t,3>(-127, 0, 0),
@@ -186,121 +191,12 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCube(const h
 	return retval;
 }
 
-#if 0
-
-/*
-	a cylinder, a cone and a cross
-	point up on (0,1.f, 0.f )
-*/
-core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createArrow(
-	const uint32_t tesselationCylinder,
-	const uint32_t tesselationCone,
-	const float height,
-	const float cylinderHeight,
-	const float width0,
-	const float width1,
-	const video::SColor vtxColor0,
-	const video::SColor vtxColor1
-) const
+core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createSphere(float radius,
+				uint32_t polyCountX, uint32_t polyCountY, CQuantNormalCache* const quantNormalCacheOverride) const
 {
-    assert(height > cylinderHeight);
-
-    auto cylinder = createCylinderMesh(width0, cylinderHeight, tesselationCylinder, vtxColor0);
-    auto cone = createConeMesh(width1, height-cylinderHeight, tesselationCone, vtxColor1, vtxColor1);
-
-	auto cylinderVertices = reinterpret_cast<CylinderVertex*>(cylinder.bindings[0].buffer->getPointer());
-	auto coneVertices = reinterpret_cast<ConeVertex*>(cone.bindings[0].buffer->getPointer());
-
-	auto cylinderIndecies = reinterpret_cast<uint16_t*>(cylinder.indexBuffer.buffer->getPointer());
-	auto coneIndecies = reinterpret_cast<uint16_t*>(cone.indexBuffer.buffer->getPointer());
-
-	const auto cylinderVertexCount = cylinder.bindings[0].buffer->getSize() / sizeof(CylinderVertex);
-	const auto coneVertexCount = cone.bindings[0].buffer->getSize() / sizeof(ConeVertex);
-	const auto newArrowVertexCount = cylinderVertexCount + coneVertexCount;
-
-	const auto cylinderIndexCount = cylinder.indexBuffer.buffer->getSize() / sizeof(uint16_t);
-	const auto coneIndexCount = cone.indexBuffer.buffer->getSize() / sizeof(uint16_t);
-	const auto newArrowIndexCount = cylinderIndexCount + coneIndexCount;
-
-	for (auto i = 0ull; i < coneVertexCount; ++i)
-	{
-		core::vector3df_SIMD newPos = coneVertices[i].pos;
-		newPos.rotateYZByRAD(-1.5707963268);
-
-		for (auto c = 0; c < 3; ++c)
-			coneVertices[i].pos[c] = newPos[c];
-	}
-
-	auto newArrowVertexBuffer = asset::ICPUBuffer::create({ newArrowVertexCount * sizeof(ArrowVertex) });
-	newArrowVertexBuffer->setUsageFlags(newArrowVertexBuffer->getUsageFlags() | asset::IBuffer::EUF_VERTEX_BUFFER_BIT);
-	auto newArrowIndexBuffer = asset::ICPUBuffer::create({ newArrowIndexCount * sizeof(uint16_t) });
-	newArrowIndexBuffer->setUsageFlags(newArrowIndexBuffer->getUsageFlags() | asset::IBuffer::EUF_INDEX_BUFFER_BIT);
-
-	for (auto z = 0ull; z < newArrowVertexCount; ++z)
-	{
-		auto arrowVertex = reinterpret_cast<ArrowVertex*>(newArrowVertexBuffer->getPointer()) + z;
-
-		if (z < cylinderVertexCount)
-		{
-			auto cylinderVertex = (cylinderVertices + z);
-			memcpy(arrowVertex, cylinderVertex, sizeof(ArrowVertex));
-		}
-		else
-		{
-			auto coneVertex = (coneVertices + z - cylinderVertexCount);
-			memcpy(arrowVertex, coneVertex, offsetof(ConeVertex, normal)); // copy position and color
-			arrowVertex->uv[0] = 0;
-			arrowVertex->uv[1] = 0;
-			arrowVertex->normal = coneVertex->normal;
-		}
-	}
-
-	{
-		auto ArrowIndices = reinterpret_cast<uint16_t*>(newArrowIndexBuffer->getPointer());
-		auto newConeIndices = (ArrowIndices + cylinderIndexCount);
-
-		memcpy(ArrowIndices, cylinderIndecies, sizeof(uint16_t) * cylinderIndexCount);
-		memcpy(newConeIndices, coneIndecies, sizeof(uint16_t) * coneIndexCount);
-
-		for (auto i = 0ull; i < coneIndexCount; ++i)
-			*(newConeIndices + i) += cylinderVertexCount;
-	}
-
-	return_type arrow;
-
-	constexpr size_t vertexSize = sizeof(ArrowVertex);
-	arrow.inputParams = 
-	{ 0b1111u,0b1u,
-		{
-			{0u,EF_R32G32B32_SFLOAT,offsetof(ArrowVertex,pos)},
-			{0u,EF_R8G8B8A8_UNORM,offsetof(ArrowVertex,color)},
-			{0u,EF_R32G32_SFLOAT,offsetof(ArrowVertex,uv)},
-			{0u,EF_A2B10G10R10_SNORM_PACK32,offsetof(ArrowVertex,normal)}
-		},
-		{vertexSize,SVertexInputBindingParams::EVIR_PER_VERTEX} 
-	};
-
-	arrow.bindings[0] = { 0, std::move(newArrowVertexBuffer) }; 
-	arrow.indexBuffer = { 0, std::move(newArrowIndexBuffer) };
-	arrow.indexCount = newArrowIndexCount;
-	arrow.indexType = EIT_16BIT;
-
-    return arrow;
-}
+	using namespace hlsl;
 
-/* A sphere with proper normals and texture coords */
-core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createSphere(float radius, uint32_t polyCountX, uint32_t polyCountY, IMeshManipulator* const meshManipulatorOverride) const
-{
-	// we are creating the sphere mesh here.
-	return_type retval;
-	constexpr size_t vertexSize = sizeof(CGeometryCreator::SphereVertex);
-	CQuantNormalCache* const quantNormalCache = (meshManipulatorOverride == nullptr) ? defaultMeshManipulator->getQuantNormalCache() : meshManipulatorOverride->getQuantNormalCache();
-	retval.inputParams = { 0b1111u,0b1u,{
-											{0u,EF_R32G32B32_SFLOAT,offsetof(SphereVertex,pos)},
-											{0u,EF_R8G8B8A8_UNORM,offsetof(SphereVertex,color)},
-											{0u,EF_R32G32_SFLOAT,offsetof(SphereVertex,uv)},
-											{0u,EF_A2B10G10R10_SNORM_PACK32,offsetof(SphereVertex,normal)}
-										},{vertexSize,SVertexInputBindingParams::EVIR_PER_VERTEX} };
+	CQuantNormalCache* const quantNormalCache = quantNormalCacheOverride == nullptr ? m_params.normalCache.get() : quantNormalCacheOverride;
 
 	if (polyCountX < 2)
 		polyCountX = 2;
@@ -308,15 +204,20 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createSphere(float
 		polyCountY = 2;
 
 	const uint32_t polyCountXPitch = polyCountX + 1; // get to same vertex on next level
+  const size_t vertexCount = (polyCountXPitch * polyCountY) + 2;
 
-	retval.indexCount = (polyCountX * polyCountY) * 6;
-	auto indices = asset::ICPUBuffer::create({ sizeof(uint32_t) * retval.indexCount });
+	auto retval = core::make_smart_refctd_ptr<ICPUPolygonGeometry>();
+	retval->setIndexing(IPolygonGeometryBase::TriangleList());
 
 	// Create indices
+	using index_t = uint32_t;
 	{
-		uint32_t level = 0;
+    const auto indexCount = (polyCountX * polyCountY) * 6;
+		const auto bytesize = sizeof(index_t) * indexCount;
+		auto indices = ICPUBuffer::create({bytesize,IBuffer::EUF_INDEX_BUFFER_BIT});
+		auto indexPtr = reinterpret_cast<index_t*>(indices->getPointer());
+    uint32_t level = 0;
 		size_t indexAddIx = 0;
-		uint32_t* indexPtr = (uint32_t*)indices->getPointer();
 		for (uint32_t p1 = 0; p1 < polyCountY - 1; ++p1)
 		{
 			//main quads, top to bottom
@@ -372,23 +273,123 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createSphere(float
 		indexPtr[indexAddIx++] = polyCountSqM1 + polyCountX - 1;
 		indexPtr[indexAddIx++] = polyCountSqM1;
 		indexPtr[indexAddIx++] = polyCountSq1;
+
+		shapes::AABB<4,index_t> aabb;
+		aabb.minVx[0] = 0;
+		aabb.maxVx[0] = vertexCount - 1;
+		retval->setIndexView({
+			.composed = {
+				.encodedDataRange = {.u32=aabb},
+				.stride = sizeof(index_t),
+				.format = EF_R16_UINT,
+				.rangeFormat = IGeometryBase::EAABBFormat::U16
+			},
+			.src = {.offset=0,.size=bytesize,.buffer=std::move(indices)}
+		});
 	}
-	indices->setUsageFlags(indices->getUsageFlags() | asset::IBuffer::EUF_INDEX_BUFFER_BIT);
-	retval.indexBuffer = {0ull, std::move(indices)};
 
-	// handle vertices
+	constexpr auto NormalCacheFormat = EF_R8G8B8_SNORM;
+	constexpr auto NormalFormat = EF_R8G8B8A8_SNORM;
+
+	// Create vertex attributes with NONE usage because we have no clue how they'll be used
+	hlsl::float32_t3* positions;
+  hlsl::vector<uint8_t, 4>* normals;
+	hlsl::vector<uint8_t, 2>* uvs;
+	hlsl::vector<uint8_t, 4>* colors;
+	{
+		{
+			constexpr auto AttrSize = sizeof(decltype(*positions));
+			auto buff = ICPUBuffer::create({{AttrSize * vertexCount,IBuffer::EUF_NONE}});
+			positions = reinterpret_cast<decltype(positions)>(buff->getPointer());
+			shapes::AABB<4, float32_t> aabb;
+			aabb.maxVx = float32_t4(radius, radius, radius, 0.0f);
+			aabb.minVx = float32_t4(-radius, -radius, -radius, 0.0f);
+			retval->setPositionView({
+				.composed = {
+					.encodedDataRange = {.f32 = aabb},
+					.stride = AttrSize,
+					.format = EF_R32G32B32_SFLOAT,
+					.rangeFormat = IGeometryBase::EAABBFormat::F32
+				},
+				.src = {
+				  .offset=0,
+				  .size = buff->getSize(),
+				  .buffer = std::move(buff),
+				}
+			});
+		}
+		{
+			constexpr auto AttrSize = sizeof(decltype(*normals));
+			auto buff = ICPUBuffer::create({{AttrSize * vertexCount,IBuffer::EUF_NONE}});
+			normals = reinterpret_cast<decltype(normals)>(buff->getPointer());
+			shapes::AABB<4, int8_t> aabb;
+			aabb.maxVx = hlsl::vector<int8_t,4>(127,127,127,0);
+			aabb.minVx = -aabb.maxVx;
+			retval->setNormalView({
+				.composed = {
+					.encodedDataRange = {.s8=aabb},
+					.stride = AttrSize,
+					.format = NormalFormat,
+					.rangeFormat = IGeometryBase::EAABBFormat::S8_NORM
+				},
+				.src = {
+				  .offset = 0,
+				  .size = buff->getSize(),
+				  .buffer = std::move(buff)
+				}
+			});
+		}
+		{
+			constexpr auto AttrSize = sizeof(decltype(*uvs));
+			auto buff = ICPUBuffer::create({{AttrSize * vertexCount,IBuffer::EUF_NONE}});
+			uvs = reinterpret_cast<decltype(uvs)>(buff->getPointer());
+			shapes::AABB<4, uint8_t> aabb;
+			aabb.minVx = hlsl::vector<uint8_t, 4>(0,0,0,0);
+			aabb.maxVx = hlsl::vector<uint8_t, 4>(255,255,0,0);
+			retval->getAuxAttributeViews()->push_back({
+				.composed = {
+					.encodedDataRange = {.u8=aabb},
+					.stride = AttrSize,
+					.format = EF_R8G8_UNORM,
+					.rangeFormat = IGeometryBase::EAABBFormat::U8_NORM
+				},
+				.src = {
+				  .offset = 0,
+				  .size = buff->getSize(),
+				  .buffer = std::move(buff),
+				}
+			});
+		}
+		{
+			constexpr auto AttrSize = sizeof(decltype(*colors));
+			auto buff = ICPUBuffer::create({{AttrSize * vertexCount,IBuffer::EUF_NONE}});
+			colors = reinterpret_cast<decltype(colors)>(buff->getPointer());
+			shapes::AABB<4, uint8_t> aabb;
+			aabb.minVx = hlsl::vector<uint8_t, 4>(0,0,0,0);
+			aabb.maxVx = hlsl::vector<uint8_t, 4>(255,255,0,0);
+			retval->getAuxAttributeViews()->push_back({
+				.composed = {
+					.encodedDataRange = {.u8=aabb},
+					.stride = AttrSize,
+					.format = EF_R8G8B8A8_UNORM,
+					.rangeFormat = IGeometryBase::EAABBFormat::U8_NORM
+				},
+				.src = {
+				  .offset = 0,
+				  .size = buff->getSize(),
+				  .buffer = std::move(buff),
+				}
+			});
+		}
+	}
+
+	// fill vertices
 	{
-		size_t vertexSize = 3 * 4 + 4 + 2 * 4 + 4;
-		size_t vertexCount = (polyCountXPitch * polyCountY) + 2;
-		auto vtxBuf = asset::ICPUBuffer::create({ vertexCount * vertexSize });
-		auto* tmpMem = reinterpret_cast<uint8_t*>(vtxBuf->getPointer());
 		for (size_t i = 0; i < vertexCount; i++)
 		{
-			tmpMem[i * vertexSize + 3 * 4 + 0] = 255;
-			tmpMem[i * vertexSize + 3 * 4 + 1] = 255;
-			tmpMem[i * vertexSize + 3 * 4 + 2] = 255;
-			tmpMem[i * vertexSize + 3 * 4 + 3] = 255;
+			colors[i] = { 255,255,255,255 };
 		}
+
 		// calculate the angle which separates all points in a circle
 		const float AngleX = 2 * core::PI<float>() / polyCountX;
 		const float AngleY = core::PI<float>() / polyCountY;
@@ -398,258 +399,615 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createSphere(float
 		// we don't start at 0.
 
 		double ay = 0;//AngleY / 2;
-
-		using quant_normal_t = CQuantNormalCache::value_type_t<EF_A2B10G10R10_SNORM_PACK32>;
-		uint8_t* tmpMemPtr = tmpMem;
-		for (uint32_t y = 0; y < polyCountY; ++y)
-		{
-			ay += AngleY;
-			const double sinay = sin(ay);
-			axz = 0;
-
-			// calculate the necessary vertices without the doubled one
-			uint8_t* oldTmpMemPtr = tmpMemPtr;
-			for (uint32_t xz = 0; xz < polyCountX; ++xz)
-			{
-				// calculate points position
-
-				float32_t3 pos(static_cast<float>(cos(axz) * sinay),
-					static_cast<float>(cos(ay)),
-					static_cast<float>(sin(axz) * sinay));
-				// for spheres the normal is the position
-				core::vectorSIMDf normal(&pos.X);
-				normal.makeSafe3D();
-				quant_normal_t quantizedNormal = quantNormalCache->quantize<EF_A2B10G10R10_SNORM_PACK32>(normal);
-				pos *= radius;
-
-				// calculate texture coordinates via sphere mapping
-				// tu is the same on each level, so only calculate once
-				float tu = 0.5f;
-				//if (y==0)
-				//{
-				if (normal.Y != -1.0f && normal.Y != 1.0f)
-					tu = static_cast<float>(acos(core::clamp(normal.X / sinay, -1.0, 1.0)) * 0.5 * core::RECIPROCAL_PI<double>());
-				if (normal.Z < 0.0f)
-					tu = 1 - tu;
-				//}
-				//else
-					//tu = ((float*)(tmpMem+(i-polyCountXPitch)*vertexSize))[4];
-
-				((float*)tmpMemPtr)[0] = pos.X;
-				((float*)tmpMemPtr)[1] = pos.Y;
-				((float*)tmpMemPtr)[2] = pos.Z;
-				((float*)tmpMemPtr)[4] = tu;
-				((float*)tmpMemPtr)[5] = static_cast<float>(ay * core::RECIPROCAL_PI<double>());
-				((quant_normal_t*)tmpMemPtr)[6] = quantizedNormal;
-				static_assert(sizeof(quant_normal_t)==4u);
-
-				tmpMemPtr += vertexSize;
-				axz += AngleX;
-			}
-			// This is the doubled vertex on the initial position
-
-			((float*)tmpMemPtr)[0] = ((float*)oldTmpMemPtr)[0];
-			((float*)tmpMemPtr)[1] = ((float*)oldTmpMemPtr)[1];
-			((float*)tmpMemPtr)[2] = ((float*)oldTmpMemPtr)[2];
-			((float*)tmpMemPtr)[4] = 1.f;
-			((float*)tmpMemPtr)[5] = ((float*)oldTmpMemPtr)[5];
-			((uint32_t*)tmpMemPtr)[6] = ((uint32_t*)oldTmpMemPtr)[6];
-			tmpMemPtr += vertexSize;
-		}
+		auto vertex_i = 0;
+    for (uint32_t y = 0; y < polyCountY; ++y)
+    {
+      ay += AngleY;
+      const double sinay = sin(ay);
+      axz = 0;
+
+      // calculate the necessary vertices without the doubled one
+			const auto old_vertex_i = vertex_i;
+      for (uint32_t xz = 0; xz < polyCountX; ++xz)
+      {
+        // calculate points position
+
+        float32_t3 pos(static_cast<float>(cos(axz) * sinay),
+          static_cast<float>(cos(ay)),
+          static_cast<float>(sin(axz) * sinay));
+        // for spheres the normal is the position
+        core::vectorSIMDf normal(&pos.x);
+        normal.makeSafe3D();
+        const auto quantizedNormal = quantNormalCache->quantize<NormalCacheFormat>(normal);
+        pos *= radius;
+
+        // calculate texture coordinates via sphere mapping
+        // tu is the same on each level, so only calculate once
+        float tu = 0.5f;
+        //if (y==0)
+        //{
+        if (normal.Y != -1.0f && normal.Y != 1.0f)
+          tu = static_cast<float>(acos(core::clamp(normal.X / sinay, -1.0, 1.0)) * 0.5 * core::RECIPROCAL_PI<double>());
+        if (normal.Z < 0.0f)
+          tu = 1 - tu;
+        //}
+        //else
+          //tu = ((float*)(tmpMem+(i-polyCountXPitch)*vertexSize))[4];
+
+				positions[vertex_i] = pos;
+				uvs[vertex_i] = { packSnorm(tu), packSnorm(static_cast<float>(ay * core::RECIPROCAL_PI<double>())) };
+				memcpy(normals + vertex_i, &quantizedNormal, sizeof(quantizedNormal));
+
+				vertex_i++;
+        axz += AngleX;
+      }
+      // This is the doubled vertex on the initial position
+
+      positions[vertex_i] = positions[old_vertex_i];
+			uvs[vertex_i] = { 127, uvs[old_vertex_i].y };
+			normals[vertex_i] = normals[old_vertex_i];
+
+			vertex_i++;
+    }
 
 		// the vertex at the top of the sphere
-		((float*)tmpMemPtr)[0] = 0.f;
-		((float*)tmpMemPtr)[1] = radius;
-		((float*)tmpMemPtr)[2] = 0.f;
-		((float*)tmpMemPtr)[4] = 0.5f;
-		((float*)tmpMemPtr)[5] = 0.f;
-		((quant_normal_t*)tmpMemPtr)[6] = quantNormalCache->quantize<EF_A2B10G10R10_SNORM_PACK32>(core::vectorSIMDf(0.f, 1.f, 0.f));
+		positions[vertex_i] = { 0.f, radius, 0.f };
+		uvs[vertex_i] = { 0, 63};
+		const auto quantizedTopNormal = quantNormalCache->quantize<NormalCacheFormat>(core::vectorSIMDf(0.f, 1.f, 0.f));
+    memcpy(normals + vertex_i, &quantizedTopNormal, sizeof(quantizedTopNormal));
 
 		// the vertex at the bottom of the sphere
-		tmpMemPtr += vertexSize;
-		((float*)tmpMemPtr)[0] = 0.f;
-		((float*)tmpMemPtr)[1] = -radius;
-		((float*)tmpMemPtr)[2] = 0.f;
-		((float*)tmpMemPtr)[4] = 0.5f;
-		((float*)tmpMemPtr)[5] = 1.f;
-		((quant_normal_t*)tmpMemPtr)[6] = quantNormalCache->quantize<EF_A2B10G10R10_SNORM_PACK32>(core::vectorSIMDf(0.f, -1.f, 0.f));
-
-		// recalculate bounding box
-		core::aabbox3df BoundingBox;
-		BoundingBox.reset(float32_t3(radius));
-		BoundingBox.addInternalPoint(-radius, -radius, -radius);
-
-		// set vertex buffer
-		vtxBuf->setUsageFlags(vtxBuf->getUsageFlags() | asset::IBuffer::EUF_VERTEX_BUFFER_BIT);
-		retval.bindings[0] = { 0ull,std::move(vtxBuf) };
-		retval.indexType = asset::EIT_32BIT;
-		retval.bbox = BoundingBox;
+		vertex_i++;
+		positions[vertex_i] = { 0.f, -radius, 0.f };
+		uvs[vertex_i] = { 63, 127};
+		const auto quantizedBottomNormal = quantNormalCache->quantize<NormalCacheFormat>(core::vectorSIMDf(0.f, -1.f, 0.f));
+    memcpy(normals + vertex_i, &quantizedBottomNormal, sizeof(quantizedBottomNormal));
 	}
 
+	CPolygonGeometryManipulator::recomputeContentHashes(retval.get());
 	return retval;
 }
 
-/* A cylinder with proper normals and texture coords */
 core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCylinder(
 	float radius, float length,
-	uint32_t tesselation, const video::SColor& color, IMeshManipulator* const meshManipulatorOverride
-) const
+	uint32_t tesselation, const video::SColor& color, CQuantNormalCache* const quantNormalCacheOverride) const
 {
-	return_type retval;
-	constexpr size_t vertexSize = sizeof(CGeometryCreator::CylinderVertex);
-	CQuantNormalCache* const quantNormalCache = (meshManipulatorOverride == nullptr) ? defaultMeshManipulator->getQuantNormalCache() : meshManipulatorOverride->getQuantNormalCache();
-	retval.inputParams = { 0b1111u,0b1u,{
-											{0u,EF_R32G32B32_SFLOAT,offsetof(CylinderVertex,pos)},
-											{0u,EF_R8G8B8A8_UNORM,offsetof(CylinderVertex,color)},
-											{0u,EF_R32G32_SFLOAT,offsetof(CylinderVertex,uv)},
-											{0u,EF_A2B10G10R10_SNORM_PACK32,offsetof(CylinderVertex,normal)}
-										},{vertexSize,SVertexInputBindingParams::EVIR_PER_VERTEX} };
-
-    const size_t vtxCnt = 2u*tesselation;
-    auto vtxBuf = asset::ICPUBuffer::create({ vtxCnt*sizeof(CylinderVertex) });
-
-    CylinderVertex* vertices = reinterpret_cast<CylinderVertex*>(vtxBuf->getPointer());
-	for (auto i=0ull; i<vtxCnt; i++)
-		vertices[i] = CylinderVertex();
-
-    const uint32_t halfIx = tesselation;
-
-    uint8_t glcolor[4];
-    color.toOpenGLColor(glcolor);
-
-    const float tesselationRec = core::reciprocal_approxim<float>(tesselation);
-    const float step = 2.f*core::PI<float>()*tesselationRec;
-    for (uint32_t i = 0u; i<tesselation; ++i)
-    {
-        core::vectorSIMDf p(std::cos(i*step), std::sin(i*step), 0.f);
-        p *= radius;
-        const auto n = quantNormalCache->quantize<EF_A2B10G10R10_SNORM_PACK32>(core::normalize(p));
-
-        memcpy(vertices[i].pos, p.pointer, 12u);
-        vertices[i].normal = n;
-        memcpy(vertices[i].color, glcolor, 4u);
-        vertices[i].uv[0] = float(i) * tesselationRec;
-
-        vertices[i+halfIx] = vertices[i];
-        vertices[i+halfIx].pos[2] = length;
-        vertices[i+halfIx].uv[1] = 1.f;
-    }
+	using namespace hlsl;
+
+	CQuantNormalCache* const quantNormalCache = quantNormalCacheOverride == nullptr ? m_params.normalCache.get() : quantNormalCacheOverride;
+
+  const uint16_t halfIx = static_cast<uint16_t>(tesselation);
+  const uint16_t vertexCount = 2 * static_cast<uint16_t>(tesselation);
 
-    constexpr uint32_t rows = 2u;
-	retval.indexCount = rows * 3u * tesselation;
-    auto idxBuf = asset::ICPUBuffer::create({ retval.indexCount *sizeof(uint16_t) });
-    uint16_t* indices = (uint16_t*)idxBuf->getPointer();
+	auto retval = core::make_smart_refctd_ptr<ICPUPolygonGeometry>();
+	retval->setIndexing(IPolygonGeometryBase::TriangleList());
 
-    for (uint32_t i = 0u, j = 0u; i < halfIx; ++i)
+	// Create indices
+	using index_t = uint16_t;
+	{
+    constexpr uint32_t RowCount = 2u;
+    const auto IndexCount = RowCount * 3 * tesselation;
+		const auto bytesize = sizeof(index_t) * IndexCount;
+		auto indices = ICPUBuffer::create({bytesize,IBuffer::EUF_INDEX_BUFFER_BIT});
+		auto u = reinterpret_cast<index_t*>(indices->getPointer());
+    for (uint16_t i = 0u, j = 0u; i < halfIx; ++i)
     {
-        indices[j++] = i;
-        indices[j++] = (i+1u)!=halfIx ? (i+1u):0u;
-        indices[j++] = i+halfIx;
-        indices[j++] = i+halfIx;
-        indices[j++] = (i+1u)!=halfIx ? (i+1u):0u;
-        indices[j++] = (i+1u)!=halfIx ? (i+1u+halfIx):halfIx;
+      u[j++] = i;
+      u[j++] = (i + 1u) != halfIx ? (i + 1u):0u;
+      u[j++] = i + halfIx;
+      u[j++] = i + halfIx;
+      u[j++] = (i + 1u)!= halfIx ? (i + 1u):0u;
+      u[j++] = (i + 1u)!= halfIx ? (i + 1u + halfIx) : halfIx;
     }
 
-	// set vertex buffer
-	idxBuf->setUsageFlags(idxBuf->getUsageFlags() | asset::IBuffer::EUF_INDEX_BUFFER_BIT);
-	retval.indexBuffer = { 0ull, std::move(idxBuf) };
-	vtxBuf->setUsageFlags(vtxBuf->getUsageFlags() | asset::IBuffer::EUF_VERTEX_BUFFER_BIT);
-	retval.bindings[0] = { 0ull, std::move(vtxBuf) };
-	retval.indexType = asset::EIT_16BIT;
-	//retval.bbox = ?;
+		shapes::AABB<4,index_t> aabb;
+		aabb.minVx[0] = 0;
+		aabb.maxVx[0] = vertexCount - 1;
+		retval->setIndexView({
+			.composed = {
+				.encodedDataRange = {.u16=aabb},
+				.stride = sizeof(index_t),
+				.format = EF_R16_UINT,
+				.rangeFormat = IGeometryBase::EAABBFormat::U16
+			},
+			.src = {.offset=0,.size=bytesize,.buffer=std::move(indices)}
+		});
+	}
+
+	constexpr auto NormalCacheFormat = EF_R8G8B8_SNORM;
+	constexpr auto NormalFormat = EF_R8G8B8A8_SNORM;
+
+	// Create vertex attributes with NONE usage because we have no clue how they'll be used
+	hlsl::float32_t3* positions;
+  hlsl::vector<uint8_t, 4>* normals;
+	hlsl::vector<uint8_t, 2>* uvs;
+	hlsl::vector<uint8_t, 4>* colors;
+	{
+		{
+			constexpr auto AttrSize = sizeof(decltype(*positions));
+			auto buff = ICPUBuffer::create({{AttrSize * vertexCount,IBuffer::EUF_NONE}});
+			positions = reinterpret_cast<decltype(positions)>(buff->getPointer());
+			shapes::AABB<4, float32_t> aabb;
+			aabb.maxVx = float32_t4(radius, radius, length, 0.0f);
+			aabb.minVx = float32_t4(-radius, -radius, 0.0f, 0.0f);
+			retval->setPositionView({
+				.composed = {
+					.encodedDataRange = {.f32 = aabb},
+					.stride = AttrSize,
+					.format = EF_R32G32B32_SFLOAT,
+					.rangeFormat = IGeometryBase::EAABBFormat::F32
+				},
+				.src = {
+				  .offset=0,
+				  .size = buff->getSize(),
+				  .buffer = std::move(buff),
+				}
+			});
+		}
+		{
+			constexpr auto AttrSize = sizeof(decltype(*normals));
+			auto buff = ICPUBuffer::create({{AttrSize * vertexCount,IBuffer::EUF_NONE}});
+			normals = reinterpret_cast<decltype(normals)>(buff->getPointer());
+			shapes::AABB<4, int8_t> aabb;
+			aabb.maxVx = hlsl::vector<int8_t,4>(127,127,127,0);
+			aabb.minVx = -aabb.maxVx;
+			retval->setNormalView({
+				.composed = {
+					.encodedDataRange = {.s8=aabb},
+					.stride = AttrSize,
+					.format = NormalFormat,
+					.rangeFormat = IGeometryBase::EAABBFormat::S8_NORM
+				},
+				.src = {
+				  .offset = 0,
+				  .size = buff->getSize(),
+				  .buffer = std::move(buff)
+				}
+			});
+		}
+		{
+			constexpr auto AttrSize = sizeof(decltype(*uvs));
+			auto buff = ICPUBuffer::create({{AttrSize * vertexCount,IBuffer::EUF_NONE}});
+			uvs = reinterpret_cast<decltype(uvs)>(buff->getPointer());
+			shapes::AABB<4, uint8_t> aabb;
+			aabb.minVx = hlsl::vector<uint8_t, 4>(0,0,0,0);
+			aabb.maxVx = hlsl::vector<uint8_t, 4>(255,255,0,0);
+			retval->getAuxAttributeViews()->push_back({
+				.composed = {
+					.encodedDataRange = {.u8=aabb},
+					.stride = AttrSize,
+					.format = EF_R8G8_UNORM,
+					.rangeFormat = IGeometryBase::EAABBFormat::U8_NORM
+				},
+				.src = {
+				  .offset = 0,
+				  .size = buff->getSize(),
+				  .buffer = std::move(buff),
+				}
+			});
+		}
+		{
+			constexpr auto AttrSize = sizeof(decltype(*colors));
+			auto buff = ICPUBuffer::create({{AttrSize * vertexCount,IBuffer::EUF_NONE}});
+			colors = reinterpret_cast<decltype(colors)>(buff->getPointer());
+			shapes::AABB<4, uint8_t> aabb;
+			aabb.minVx = hlsl::vector<uint8_t, 4>(0,0,0,0);
+			aabb.maxVx = hlsl::vector<uint8_t, 4>(255,255,0,0);
+			retval->getAuxAttributeViews()->push_back({
+				.composed = {
+					.encodedDataRange = {.u8=aabb},
+					.stride = AttrSize,
+					.format = EF_R8G8B8A8_UNORM,
+					.rangeFormat = IGeometryBase::EAABBFormat::U8_NORM
+				},
+				.src = {
+				  .offset = 0,
+				  .size = buff->getSize(),
+				  .buffer = std::move(buff),
+				}
+			});
+		}
+	}
+
+  uint8_t glcolor[4];
+  color.toOpenGLColor(glcolor);
+
+  const float tesselationRec = core::reciprocal_approxim<float>(static_cast<float>(tesselation));
+  const float step = 2.f * core::PI<float>() * tesselationRec;
+  for (uint32_t i = 0u; i < tesselation; ++i)
+  {
+		const auto f_i = static_cast<float>(i);
+    core::vectorSIMDf p(std::cos(f_i * step), std::sin(f_i * step), 0.f);
+    p *= radius;
+    const auto n = quantNormalCache->quantize<NormalCacheFormat>(core::normalize(p));
+
+    positions[i] = { p.x, p.y, p.z };
+		memcpy(normals + i, &n, sizeof(n));
+		uvs[i] = { f_i * tesselationRec, 0.0 };
+		colors[i] = { glcolor[0], glcolor[1], glcolor[2], glcolor[3] };
+
+    positions[i + halfIx] = { p.x, p.y, length };
+    normals[i + halfIx] = normals[i];
+    uvs[i + halfIx] = { 1.0f, 0.0f };
+		colors[i + halfIx] = { glcolor[0], glcolor[1], glcolor[2], glcolor[3] };
+  }
 
+	CPolygonGeometryManipulator::recomputeContentHashes(retval.get());
 	return retval;
 }
 
-/* A cone with proper normals and texture coords */
 core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCone(
-	float radius, float length, uint32_t tesselation,
-	const video::SColor& colorTop,
-	const video::SColor& colorBottom,
-	float oblique,
-	IMeshManipulator* const meshManipulatorOverride
-) const
+  float radius, float length, uint32_t tesselation,
+  const video::SColor& colorTop,
+  const video::SColor& colorBottom,
+  float oblique, CQuantNormalCache* const quantNormalCacheOverride) const
 {
-    const size_t vtxCnt = tesselation * 2;
-    auto vtxBuf = asset::ICPUBuffer::create({ vtxCnt * sizeof(ConeVertex) });
-    ConeVertex* vertices = reinterpret_cast<ConeVertex*>(vtxBuf->getPointer());
 
-	ConeVertex* baseVertices = vertices;
-	ConeVertex* apexVertices = vertices + tesselation;
+	using namespace hlsl;
 
-    std::fill(vertices,vertices+vtxCnt, ConeVertex(core::vectorSIMDf(0.f),{},colorBottom));
-	CQuantNormalCache* const quantNormalCache = (meshManipulatorOverride == nullptr) ? defaultMeshManipulator->getQuantNormalCache() : meshManipulatorOverride->getQuantNormalCache();
+	CQuantNormalCache* const quantNormalCache = quantNormalCacheOverride == nullptr ? m_params.normalCache.get() : quantNormalCacheOverride;
 
-    const float step = (2.f*core::PI<float>()) / tesselation;
+  const uint16_t vertexCount = 2 * static_cast<uint16_t>(tesselation);
+
+	auto retval = core::make_smart_refctd_ptr<ICPUPolygonGeometry>();
+	retval->setIndexing(IPolygonGeometryBase::TriangleList());
+
+	// Create indices
+	using index_t = uint16_t;
+	{
+    constexpr uint32_t RowCount = 2u;
+    const auto IndexCount = 3 * tesselation;
+		const auto bytesize = sizeof(index_t) * IndexCount;
+		auto indices = ICPUBuffer::create({bytesize,IBuffer::EUF_INDEX_BUFFER_BIT});
+		auto u = reinterpret_cast<index_t*>(indices->getPointer());
+    const uint32_t firstIndexOfBaseVertices = 0;
+    const uint32_t firstIndexOfApexVertices = tesselation;
+    for (uint32_t i = 0; i < tesselation; i++)
+    {
+      u[i * 3] = firstIndexOfApexVertices + i;
+      u[(i * 3) + 1] = firstIndexOfBaseVertices + i;
+      u[(i * 3) + 2] = i == (tesselation - 1) ? firstIndexOfBaseVertices : firstIndexOfBaseVertices + i + 1;
+    }
+
+		shapes::AABB<4,index_t> aabb;
+		aabb.minVx[0] = 0;
+		aabb.maxVx[0] = vertexCount - 1;
+		retval->setIndexView({
+			.composed = {
+				.encodedDataRange = {.u16=aabb},
+				.stride = sizeof(index_t),
+				.format = EF_R16_UINT,
+				.rangeFormat = IGeometryBase::EAABBFormat::U16
+			},
+			.src = {.offset=0,.size=bytesize,.buffer=std::move(indices)}
+		});
+	}
+
+	constexpr auto NormalCacheFormat = EF_R8G8B8_SNORM;
+	constexpr auto NormalFormat = EF_R8G8B8A8_SNORM;
+
+	// Create vertex attributes with NONE usage because we have no clue how they'll be used
+	hlsl::float32_t3* positions;
+  hlsl::vector<uint8_t, 4>* normals;
+	hlsl::vector<uint8_t, 4>* colors;
+  {
+    {
+      constexpr auto AttrSize = sizeof(decltype(*positions));
+      auto buff = ICPUBuffer::create({{AttrSize * vertexCount,IBuffer::EUF_NONE}});
+      positions = reinterpret_cast<decltype(positions)>(buff->getPointer());
+      shapes::AABB<4, float32_t> aabb;
+      aabb.maxVx = float32_t4(radius, radius, length, 0.0f);
+      aabb.minVx = float32_t4(-radius, -radius, 0.0f, 0.0f);
+      retval->setPositionView({
+        .composed = {
+          .encodedDataRange = {.f32 = aabb},
+          .stride = AttrSize,
+          .format = EF_R32G32B32_SFLOAT,
+          .rangeFormat = IGeometryBase::EAABBFormat::F32
+        },
+        .src = {
+          .offset=0,
+          .size = buff->getSize(),
+          .buffer = std::move(buff),
+        }
+      });
+    }
+    {
+      constexpr auto AttrSize = sizeof(decltype(*normals));
+      auto buff = ICPUBuffer::create({{AttrSize * vertexCount,IBuffer::EUF_NONE}});
+      normals = reinterpret_cast<decltype(normals)>(buff->getPointer());
+      shapes::AABB<4, int8_t> aabb;
+      aabb.maxVx = hlsl::vector<int8_t,4>(127,127,127,0);
+      aabb.minVx = -aabb.maxVx;
+      retval->setNormalView({
+        .composed = {
+          .encodedDataRange = {.s8=aabb},
+          .stride = AttrSize,
+          .format = NormalFormat,
+          .rangeFormat = IGeometryBase::EAABBFormat::S8_NORM
+        },
+        .src = {
+          .offset = 0,
+          .size = buff->getSize(),
+          .buffer = std::move(buff)
+        }
+      });
+    }
+    {
+      constexpr auto AttrSize = sizeof(decltype(*colors));
+      auto buff = ICPUBuffer::create({{AttrSize * vertexCount,IBuffer::EUF_NONE}});
+      colors = reinterpret_cast<decltype(colors)>(buff->getPointer());
+      shapes::AABB<4, uint8_t> aabb;
+      aabb.minVx = hlsl::vector<uint8_t, 4>(0,0,0,0);
+      aabb.maxVx = hlsl::vector<uint8_t, 4>(255,255,0,0);
+      retval->getAuxAttributeViews()->push_back({
+        .composed = {
+          .encodedDataRange = {.u8=aabb},
+          .stride = AttrSize,
+          .format = EF_R8G8B8A8_UNORM,
+          .rangeFormat = IGeometryBase::EAABBFormat::U8_NORM
+        },
+        .src = {
+          .offset = 0,
+          .size = buff->getSize(),
+          .buffer = std::move(buff),
+        }
+      });
+    }
+  }
+
+  uint8_t glcolor[4];
+  colorBottom.toOpenGLColor(glcolor);
+	vector<uint8_t, 4> vertexBottomColor = { glcolor[0], glcolor[1], glcolor[2], glcolor[3] };
+	std::fill_n(colors, vertexCount, vertexBottomColor);
+
+  const float step = (2.f*core::PI<float>()) / tesselation;
 
 	const core::vectorSIMDf apexVertexCoords(oblique, length, 0.0f);
 
-	//vertex positions
+	const auto apexVertexBase_i = tesselation;
+
 	for (uint32_t i = 0u; i < tesselation; i++)
 	{
 		core::vectorSIMDf v(std::cos(i * step), 0.0f, std::sin(i * step), 0.0f);
 		v *= radius;
 
-		memcpy(baseVertices[i].pos, v.pointer, sizeof(float) * 3);
-		memcpy(apexVertices[i].pos, apexVertexCoords.pointer, sizeof(float) * 3);
-	}
+		positions[i] = { v.x, v.y, v.z };
+		positions[apexVertexBase_i + i] = { apexVertexCoords.x, apexVertexCoords.y, apexVertexCoords.z };
 
-	//vertex normals
-	for (uint32_t i = 0; i < tesselation; i++)
-	{
-		const core::vectorSIMDf v0ToApex = apexVertexCoords - core::vectorSIMDf(vertices[i].pos[0], vertices[i].pos[1], vertices[i].pos[2]);
+		const auto simdPosition = core::vectorSIMDf(positions[i].x, positions[i].y, positions[i].z);
+		const core::vectorSIMDf v0ToApex = apexVertexCoords - simdPosition;
 
 		uint32_t nextVertexIndex = i == (tesselation - 1) ? 0 : i + 1;
-		core::vectorSIMDf u1 = core::vectorSIMDf(baseVertices[nextVertexIndex].pos[0], baseVertices[nextVertexIndex].pos[1], baseVertices[nextVertexIndex].pos[2]);
-		u1 -= core::vectorSIMDf(baseVertices[i].pos[0], baseVertices[i].pos[1], baseVertices[i].pos[2]);
+		core::vectorSIMDf u1 = core::vectorSIMDf(positions[nextVertexIndex].x, positions[nextVertexIndex].y, positions[nextVertexIndex].z);
+		u1 -= simdPosition;
 		float angleWeight = std::acos(core::dot(core::normalize(apexVertexCoords), core::normalize(u1)).x);
 		u1 = core::normalize(core::cross(v0ToApex, u1)) * angleWeight;
 
 		uint32_t prevVertexIndex = i == 0 ? (tesselation - 1) : i - 1;
-		core::vectorSIMDf u2 = core::vectorSIMDf(baseVertices[prevVertexIndex].pos[0], baseVertices[prevVertexIndex].pos[1], baseVertices[prevVertexIndex].pos[2]);
-		u2 -= core::vectorSIMDf(baseVertices[i].pos[0], baseVertices[i].pos[1], baseVertices[i].pos[2]);
+		core::vectorSIMDf u2 = core::vectorSIMDf(positions[prevVertexIndex].x, positions[prevVertexIndex].y, positions[prevVertexIndex].z);
+		u2 -= simdPosition;
 		angleWeight = std::acos(core::dot(core::normalize(apexVertexCoords), core::normalize(u2)).x);
 		u2 = core::normalize(core::cross(u2, v0ToApex)) * angleWeight;
 
-		baseVertices[i].normal = quantNormalCache->quantize<EF_A2B10G10R10_SNORM_PACK32>(core::normalize(u1 + u2));
-		apexVertices[i].normal = quantNormalCache->quantize<EF_A2B10G10R10_SNORM_PACK32>(core::normalize(u1));
+
+		const auto baseNormal = quantNormalCache->quantize<NormalCacheFormat>(core::normalize(u1 + u2));
+		memcpy(normals + i, &baseNormal, sizeof(baseNormal));
+
+		const auto apexNormal = quantNormalCache->quantize<NormalCacheFormat>(core::normalize(u1));
+		memcpy(normals + apexVertexBase_i + i, &apexNormal, sizeof(apexNormal));
 	}
 
-	auto idxBuf = asset::ICPUBuffer::create({ 3u * tesselation * sizeof(uint16_t) });
-	uint16_t* indices = (uint16_t*)idxBuf->getPointer();
+	CPolygonGeometryManipulator::recomputeContentHashes(retval.get());
+	return retval;
+}
 
-	const uint32_t firstIndexOfBaseVertices = 0;
-	const uint32_t firstIndexOfApexVertices = tesselation;
-	for (uint32_t i = 0; i < tesselation; i++)
+core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createArrow(
+	const uint32_t tesselationCylinder,
+	const uint32_t tesselationCone,
+	const float height,
+	const float cylinderHeight,
+	const float width0,
+	const float width1,
+	const video::SColor vtxColor0,
+	const video::SColor vtxColor1
+) const
+{
+  assert(height > cylinderHeight);
+
+	using position_t = hlsl::float32_t3;
+	using normal_t = hlsl::vector<uint8_t, 4>;
+	using uv_t = hlsl::vector<uint8_t, 2>;
+	using color_t = hlsl::vector<uint8_t, 4>;
+
+  auto cylinder = createCylinder(width0, cylinderHeight, tesselationCylinder, vtxColor0);
+  auto cone = createCone(width1, height-cylinderHeight, tesselationCone, vtxColor1, vtxColor1);
+
+	auto cylinderPositions = reinterpret_cast<position_t*>(cylinder->getPositionView().src.buffer->getPointer());
+	auto conePositions = reinterpret_cast<position_t*>(cone->getPositionView().src.buffer->getPointer());
+
+	const auto cylinderNormals = reinterpret_cast<normal_t*>(cylinder->getNormalView().src.buffer->getPointer());
+	const auto coneNormals = reinterpret_cast<normal_t*>(cone->getNormalView().src.buffer->getPointer());
+
+	const auto cylinderUvs = reinterpret_cast<uv_t*>(cylinder->getAuxAttributeViews()->front().src.buffer->getPointer());
+	const auto coneUvs = reinterpret_cast<uv_t*>(cone->getAuxAttributeViews()->front().src.buffer->getPointer());
+
+	const auto cylinderIndices = cylinder->getIndexView().src.buffer->getPointer();
+	const auto coneIndices = cone->getIndexView().src.buffer->getPointer();
+
+	const auto cylinderVertexCount = cylinder->getPositionView().getElementCount();
+	const auto coneVertexCount = cone->getPositionView().getElementCount();
+	const auto newArrowVertexCount = cylinderVertexCount + coneVertexCount;
+
+	const auto cylinderIndexCount = cylinder->getVertexReferenceCount();
+	const auto coneIndexCount = cone->getVertexReferenceCount();
+	const auto newArrowIndexCount = cylinderIndexCount + coneIndexCount;
+
+	using namespace hlsl;
+
+	auto retval = core::make_smart_refctd_ptr<ICPUPolygonGeometry>();
+	retval->setIndexing(IPolygonGeometryBase::TriangleList());
+
+	// Create indices
+	using index_t = uint16_t;
 	{
-		indices[i * 3] = firstIndexOfApexVertices + i;
-		indices[(i * 3) + 1] = firstIndexOfBaseVertices + i;
-		indices[(i * 3) + 2] = i == (tesselation - 1) ? firstIndexOfBaseVertices : firstIndexOfBaseVertices + i + 1;
+		const auto bytesize = sizeof(index_t) * newArrowIndexCount;
+		auto indices = ICPUBuffer::create({bytesize,IBuffer::EUF_INDEX_BUFFER_BIT});
+		auto arrowIndices = reinterpret_cast<uint16_t*>(indices->getPointer());
+		auto newConeIndices = (arrowIndices + cylinderIndexCount);
+
+		memcpy(arrowIndices, cylinderIndices, sizeof(uint16_t) * cylinderIndexCount);
+		memcpy(newConeIndices, coneIndices, sizeof(uint16_t) * coneIndexCount);
+
+		for (auto i = 0ull; i < coneIndexCount; ++i)
+			*(newConeIndices + i) += cylinderVertexCount;
+
+		shapes::AABB<4,index_t> aabb;
+		aabb.minVx[0] = 0;
+		aabb.maxVx[0] = newArrowVertexCount - 1;
+		retval->setIndexView({
+			.composed = {
+				.encodedDataRange = {.u16=aabb},
+				.stride = sizeof(index_t),
+				.format = EF_R16_UINT,
+				.rangeFormat = IGeometryBase::EAABBFormat::U16
+			},
+			.src = {.offset=0,.size=bytesize,.buffer=std::move(indices)}
+		});
 	}
 
-	return_type cone;
+	constexpr auto NormalFormat = EF_R8G8B8A8_SNORM;
 
-	constexpr size_t vertexSize = sizeof(ConeVertex);
-	cone.inputParams =
-	{ 0b111u,0b1u,
+	// Create vertex attributes with NONE usage because we have no clue how they'll be used
+	hlsl::float32_t3* positions;
+  hlsl::vector<uint8_t, 4>* normals;
+	hlsl::vector<uint8_t, 4>* colors;
+	hlsl::vector<uint8_t, 2>* uvs;
+  {
+    {
+      constexpr auto AttrSize = sizeof(decltype(*positions));
+      auto buff = ICPUBuffer::create({{AttrSize * newArrowVertexCount,IBuffer::EUF_NONE}});
+      positions = reinterpret_cast<decltype(positions)>(buff->getPointer());
+      shapes::AABB<4, float32_t> aabb;
+			//TODO(kevyuu): Calculate arrow aabb
+      aabb.maxVx = hlsl::vector<float32_t,4>(127,127,127,0);
+      aabb.minVx = -aabb.maxVx;
+      retval->setPositionView({
+        .composed = {
+          .encodedDataRange = {.f32 = aabb},
+          .stride = AttrSize,
+          .format = EF_R32G32B32_SFLOAT,
+          .rangeFormat = IGeometryBase::EAABBFormat::F32
+        },
+        .src = {
+          .offset=0,
+          .size = buff->getSize(),
+          .buffer = std::move(buff),
+        }
+      });
+    }
+    {
+      constexpr auto AttrSize = sizeof(decltype(*normals));
+      auto buff = ICPUBuffer::create({{AttrSize * newArrowVertexCount,IBuffer::EUF_NONE}});
+      normals = reinterpret_cast<decltype(normals)>(buff->getPointer());
+      shapes::AABB<4, int8_t> aabb;
+      aabb.maxVx = hlsl::vector<int8_t,4>(127,127,127,0);
+      aabb.minVx = -aabb.maxVx;
+      retval->setNormalView({
+        .composed = {
+          .encodedDataRange = {.s8=aabb},
+          .stride = AttrSize,
+          .format = NormalFormat,
+          .rangeFormat = IGeometryBase::EAABBFormat::S8_NORM
+        },
+        .src = {
+          .offset = 0,
+          .size = buff->getSize(),
+          .buffer = std::move(buff)
+        }
+      });
+    }
 		{
-			{0u,EF_R32G32B32_SFLOAT,offsetof(ConeVertex,pos)},
-			{0u,EF_R8G8B8A8_UNORM,offsetof(ConeVertex,color)},
-			{0u,EF_A2B10G10R10_SNORM_PACK32,offsetof(ConeVertex,normal)}
-		},
-		{vertexSize,SVertexInputBindingParams::EVIR_PER_VERTEX}
-	};
+			constexpr auto AttrSize = sizeof(decltype(*uvs));
+			auto buff = ICPUBuffer::create({{AttrSize * newArrowVertexCount,IBuffer::EUF_NONE}});
+			uvs = reinterpret_cast<decltype(uvs)>(buff->getPointer());
+			shapes::AABB<4, uint8_t> aabb;
+			aabb.minVx = hlsl::vector<uint8_t, 4>(0,0,0,0);
+			aabb.maxVx = hlsl::vector<uint8_t, 4>(255,255,0,0);
+			retval->getAuxAttributeViews()->push_back({
+				.composed = {
+					.encodedDataRange = {.u8=aabb},
+					.stride = AttrSize,
+					.format = EF_R8G8_UNORM,
+					.rangeFormat = IGeometryBase::EAABBFormat::U8_NORM
+				},
+				.src = {
+				  .offset = 0,
+				  .size = buff->getSize(),
+				  .buffer = std::move(buff),
+				}
+			});
+		}
+    {
+      constexpr auto AttrSize = sizeof(decltype(*colors));
+      auto buff = ICPUBuffer::create({{AttrSize * newArrowVertexCount,IBuffer::EUF_NONE}});
+      colors = reinterpret_cast<decltype(colors)>(buff->getPointer());
+      shapes::AABB<4, uint8_t> aabb;
+      aabb.minVx = hlsl::vector<uint8_t, 4>(0,0,0,0);
+      aabb.maxVx = hlsl::vector<uint8_t, 4>(255,255,0,0);
+      retval->getAuxAttributeViews()->push_back({
+        .composed = {
+          .encodedDataRange = {.u8=aabb},
+          .stride = AttrSize,
+          .format = EF_R8G8B8A8_UNORM,
+          .rangeFormat = IGeometryBase::EAABBFormat::U8_NORM
+        },
+        .src = {
+          .offset = 0,
+          .size = buff->getSize(),
+          .buffer = std::move(buff),
+        }
+      });
+    }
+  }
+  
+	for (auto i = 0ull; i < coneVertexCount; ++i)
+	{
+		auto& conePosition = conePositions[i];
+		core::vector3df_SIMD newPos(conePosition.x, conePosition.y, conePosition.z);
+		newPos.rotateYZByRAD(-1.5707963268);
 
-	vtxBuf->addUsageFlags(asset::IBuffer::EUF_VERTEX_BUFFER_BIT);
-	cone.bindings[0] = { 0, std::move(vtxBuf) };
-	idxBuf->addUsageFlags(asset::IBuffer::EUF_INDEX_BUFFER_BIT);
-	cone.indexBuffer = { 0, std::move(idxBuf) };
-	cone.indexCount = cone.indexBuffer.buffer->getSize() / sizeof(uint16_t);
-	cone.indexType = EIT_16BIT;
+    conePosition = {newPos.x, newPos.y, newPos.z};
+	}
 
-    return cone;
+  uint8_t cylinderGlColor[4];
+  vtxColor0.toOpenGLColor(cylinderGlColor);
+
+  uint8_t coneGlColor[4];
+  vtxColor1.toOpenGLColor(coneGlColor);
+	
+	for (auto z = 0ull; z < newArrowVertexCount; ++z)
+	{
+		if (z < cylinderVertexCount)
+		{
+			positions[z] = cylinderPositions[z];
+			normals[z] = cylinderNormals[z];
+			uvs[z] = cylinderUvs[z];
+			colors[z] = { cylinderGlColor[0], cylinderGlColor[1], cylinderGlColor[2], cylinderGlColor[3] };
+		}
+		else
+		{
+			const auto cone_i = z - cylinderVertexCount;
+			positions[z] = conePositions[cone_i];
+			normals[z] = coneNormals[cone_i];
+			uvs[z] = { 0, 0 };
+			colors[z] = { coneGlColor[0], coneGlColor[1], coneGlColor[2], coneGlColor[3] };
+		}
+	}
+
+	CPolygonGeometryManipulator::recomputeContentHashes(retval.get());
+	return retval;
 }
-#endif
 
 core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createRectangle(const hlsl::float32_t2 size) const
 {

From 2d8b7c4918fd8c93ba86e0a4f23b32fef2cb8468 Mon Sep 17 00:00:00 2001
From: kevyuu <kevin.kayu@gmail.com>
Date: Wed, 25 Jun 2025 18:29:46 +0700
Subject: [PATCH 02/40] Implement getIndexType convenience function for
 IPolygonGeometry

---
 include/nbl/asset/IPolygonGeometry.h | 40 +++++++++++++++-------------
 1 file changed, 21 insertions(+), 19 deletions(-)

diff --git a/include/nbl/asset/IPolygonGeometry.h b/include/nbl/asset/IPolygonGeometry.h
index 4d021c178c..97a6cda7d0 100644
--- a/include/nbl/asset/IPolygonGeometry.h
+++ b/include/nbl/asset/IPolygonGeometry.h
@@ -203,6 +203,26 @@ class IPolygonGeometry : public IIndexableGeometry<BufferType>, public IPolygonG
         // For User defined semantics
         inline const core::vector<SDataView>& getAuxAttributeViews() const {return m_auxAttributeViews;}
 
+        inline E_INDEX_TYPE getIndexType() const
+        {
+            auto indexType = EIT_UNKNOWN;
+            // disallowed index format
+            if (base_t::m_indexView)
+            {
+                switch (base_t::m_indexView.composed.format)
+                {
+                    case EF_R16_UINT:
+                        indexType = EIT_16BIT;
+                        break;
+                    case EF_R32_UINT: [[fallthrough]];
+                        indexType = EIT_32BIT;
+                        break;
+                    default:
+                        break;
+                }
+            }
+            return indexType;
+        }
 
         // Does not set the `transform` or `geometryFlags` fields, because it doesn't care about it.
         // Also won't set second set of vertex data, opacity mipmaps, etc.
@@ -212,30 +232,12 @@ class IPolygonGeometry : public IIndexableGeometry<BufferType>, public IPolygonG
             // must be a triangle list, but don't want to compare pointers
             if (m_indexing && m_indexing->knownTopology()==EPT_TRIANGLE_LIST)// && m_indexing->degree() == TriangleList()->degree() && m_indexing->rate() == TriangleList->rate())
             {
-                auto indexType = EIT_UNKNOWN;
-                // disallowed index format
-                if (base_t::m_indexView)
-                {
-                    switch (base_t::m_indexView.composed.format)
-                    {
-                        case EF_R16_UINT:
-                            indexType = EIT_16BIT;
-                            break;
-                        case EF_R32_UINT: [[fallthrough]];
-                            indexType = EIT_32BIT;
-                            break;
-                        default:
-                            break;
-                    }
-                    if (indexType==EIT_UNKNOWN)
-                        return retval;
-                }
                 retval.vertexData[0] = base_t::m_positionView.src;
                 retval.indexData = base_t::m_indexView.src;
                 retval.maxVertex = base_t::m_positionView.getElementCount() - 1;
                 retval.vertexStride = base_t::m_positionView.composed.getStride();
                 retval.vertexFormat = base_t::m_positionView.composed.format;
-                retval.indexType = indexType;
+                retval.indexType = getIndexType();
             }
             return retval;
         }

From cdcaae9008dbda95bf5e7241fd3da13415f411db Mon Sep 17 00:00:00 2001
From: kevyuu <kevin.kayu@gmail.com>
Date: Sat, 28 Jun 2025 21:51:50 +0700
Subject: [PATCH 03/40] Implement createIcosphere

---
 src/nbl/asset/utils/CGeometryCreator.cpp | 140 +++++++++++++++++------
 1 file changed, 106 insertions(+), 34 deletions(-)

diff --git a/src/nbl/asset/utils/CGeometryCreator.cpp b/src/nbl/asset/utils/CGeometryCreator.cpp
index c25a222a53..775d2e2061 100644
--- a/src/nbl/asset/utils/CGeometryCreator.cpp
+++ b/src/nbl/asset/utils/CGeometryCreator.cpp
@@ -1211,7 +1211,6 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createDisk(const f
 	return retval;
 }
 
-#if 0
 /*
 	Helpful Icosphere class implementation used to compute
 	and create icopshere's vertices and indecies.
@@ -1224,6 +1223,8 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createDisk(const f
 class Icosphere
 {
 public:
+	using index_t = unsigned int;
+
 	Icosphere(float radius = 1.0f, int subdivision = 1, bool smooth = false) : radius(radius), subdivision(subdivision), smooth(smooth), interleavedStride(32)
 	{
 		if (smooth)
@@ -1234,27 +1235,27 @@ class Icosphere
 
 	~Icosphere() {}
 
-	unsigned int getVertexCount() const { return (unsigned int)vertices.size() / 3; }
+	unsigned int getPositionCount() const { return (unsigned int)vertices.size() / 3; }
 	unsigned int getNormalCount() const { return (unsigned int)normals.size() / 3; }
 	unsigned int getTexCoordCount() const { return (unsigned int)texCoords.size() / 2; }
 	unsigned int getIndexCount() const { return (unsigned int)indices.size(); }
 	unsigned int getLineIndexCount() const { return (unsigned int)lineIndices.size(); }
 	unsigned int getTriangleCount() const { return getIndexCount() / 3; }
 
-	unsigned int getVertexSize() const { return (unsigned int)vertices.size() * sizeof(float); }   // # of bytes
+	unsigned int getPositionSize() const { return (unsigned int)vertices.size() * sizeof(float); }   // # of bytes
 	unsigned int getNormalSize() const { return (unsigned int)normals.size() * sizeof(float); }
 	unsigned int getTexCoordSize() const { return (unsigned int)texCoords.size() * sizeof(float); }
-	unsigned int getIndexSize() const { return (unsigned int)indices.size() * sizeof(unsigned int); }
+	unsigned int getIndexSize() const { return (unsigned int)indices.size() * sizeof(index_t); }
 	unsigned int getLineIndexSize() const { return (unsigned int)lineIndices.size() * sizeof(unsigned int); }
 
-	const float* getVertices() const { return vertices.data(); }
+	const float* getPositions() const { return vertices.data(); }
 	const float* getNormals() const { return normals.data(); }
 	const float* getTexCoords() const { return texCoords.data(); }
 	const unsigned int* getIndices() const { return indices.data(); }
 	const unsigned int* getLineIndices() const { return lineIndices.data(); }
 
 	// for interleaved vertices: V/N/T
-	unsigned int getInterleavedVertexCount() const { return getVertexCount(); }    // # of vertices
+	unsigned int getInterleavedVertexCount() const { return getPositionCount(); }    // # of vertices
 	unsigned int getInterleavedVertexSize() const { return (unsigned int)interleavedVertices.size() * sizeof(float); }    // # of bytes
 	int getInterleavedStride() const { return interleavedStride; }   // should be 32 bytes
 	const float* getInterleavedVertices() const { return interleavedVertices.data(); }
@@ -2178,38 +2179,109 @@ class Icosphere
 
 core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createIcoSphere(float radius, uint32_t subdivision, bool smooth) const
 {
-	Icosphere IcosphereData(radius, subdivision, smooth);
-	
-	return_type icosphereGeometry;
 
-	constexpr size_t vertexSize = sizeof(IcosphereVertex);
+	Icosphere icosphere(radius, subdivision, smooth);
+
+	auto retval = core::make_smart_refctd_ptr<ICPUPolygonGeometry>();
+	retval->setIndexing(IPolygonGeometryBase::TriangleList());
+
+	using namespace hlsl;
+
+	// Create indices
+	{
+    auto indexBuffer = asset::ICPUBuffer::create({ icosphere.getIndexSize() });
+    memcpy(indexBuffer->getPointer(), icosphere.getIndices(), indexBuffer->getSize());
+
+		shapes::AABB<4,Icosphere::index_t> aabb;
+		aabb.minVx[0] = 0;
+		aabb.maxVx[0] = icosphere.getPositionCount() - 1;
+
+		static_assert(sizeof(Icosphere::index_t) == 2 || sizeof(Icosphere::index_t) == 4);
+		const auto isIndex16Bit = sizeof(Icosphere::index_t) == 2;
+
+		retval->setIndexView({
+			.composed = {
+				.encodedDataRange = {.u32=aabb},
+				.stride = sizeof(Icosphere::index_t),
+				.format = isIndex16Bit ? EF_R16_UINT : EF_R32_UINT,
+				.rangeFormat = isIndex16Bit? IGeometryBase::EAABBFormat::U16 : IGeometryBase::EAABBFormat::U32
+			},
+			.src = {.offset=0,.size=icosphere.getIndexSize(),.buffer = std::move(indexBuffer)}
+		});
+	}
 
-	icosphereGeometry.inputParams =
-	{ 0b111u,0b1u,
+	{
 		{
-			{0u, EF_R32G32B32_SFLOAT, offsetof(IcosphereVertex,pos)},
-			{0u, EF_R32G32B32_SFLOAT, offsetof(IcosphereVertex,normals)},
-			{0u, EF_R32G32_SFLOAT, offsetof(IcosphereVertex,uv)}
-		},
-		{vertexSize,SVertexInputBindingParams::EVIR_PER_VERTEX} 
-	};
-
-	auto vertexBuffer = asset::ICPUBuffer::create({ IcosphereData.getInterleavedVertexSize() });
-	auto indexBuffer = asset::ICPUBuffer::create({ IcosphereData.getIndexSize() });
-
-	memcpy(vertexBuffer->getPointer(), IcosphereData.getInterleavedVertices(), vertexBuffer->getSize());
-	memcpy(indexBuffer->getPointer(), IcosphereData.getIndices(), indexBuffer->getSize());
-
-	vertexBuffer->addUsageFlags(asset::IBuffer::EUF_VERTEX_BUFFER_BIT);
-	icosphereGeometry.bindings[0] = { 0, std::move(vertexBuffer) };
-	indexBuffer->addUsageFlags(asset::IBuffer::EUF_INDEX_BUFFER_BIT);
-	icosphereGeometry.indexBuffer = { 0, std::move(indexBuffer) };
-	icosphereGeometry.indexCount = IcosphereData.getIndexCount();
-	icosphereGeometry.indexType = EIT_32BIT;
-
-	return icosphereGeometry;
+			using position_t = float32_t3;
+			constexpr auto AttrSize = sizeof(position_t);
+			auto buff = ICPUBuffer::create({ icosphere.getPositionCount() * AttrSize, IBuffer::EUF_NONE });
+			const auto positions = reinterpret_cast<position_t*>(buff->getPointer());
+			memcpy(positions, icosphere.getPositions(), icosphere.getPositionSize());
+			shapes::AABB<4, float32_t> aabb;
+			aabb.maxVx = float32_t4(radius, radius, radius, 0.f);
+			aabb.minVx = -aabb.maxVx;
+			retval->setPositionView({
+				.composed = {
+					.encodedDataRange = {.f32 = aabb},
+					.stride = AttrSize,
+					.format = EF_R32G32B32_SFLOAT,
+					.rangeFormat = IGeometryBase::EAABBFormat::F32
+				},
+				.src = {
+					.offset = 0,
+					.size = buff->getSize(),
+					.buffer = std::move(buff),
+				}
+			});
+		}
+    {
+			using normal_t = float32_t3;
+			constexpr auto AttrSize = sizeof(normal_t);
+			auto buff = ICPUBuffer::create({icosphere.getNormalSize(), IBuffer::EUF_NONE});
+			const auto normals = reinterpret_cast<normal_t*>(buff->getPointer());
+			memcpy(normals, icosphere.getNormals(), icosphere.getNormalSize());
+			shapes::AABB<4,float32_t> aabb;
+			aabb.maxVx = float32_t4(1, 1, 1, 0.f);
+			aabb.minVx = -aabb.maxVx;
+			retval->setNormalView({
+				.composed = {
+					.encodedDataRange = {.f32 = aabb},
+					.stride = AttrSize,
+					.format = EF_R32G32B32_SFLOAT,
+					.rangeFormat = IGeometryBase::EAABBFormat::F32
+				},
+				.src = {.offset = 0,.size = buff->getSize(),.buffer = std::move(buff)},
+			});
+    }
+    {
+			using uv_t = uint32_t;
+			constexpr auto AttrSize = sizeof(uv_t);
+			auto buff = ICPUBuffer::create({AttrSize * icosphere.getTexCoordCount(), IBuffer::EUF_NONE});
+			const auto uvs = reinterpret_cast<uv_t*>(buff->getPointer());
+			shapes::AABB<4, uint16_t> aabb;
+			aabb.minVx = uint16_t4(0,0,0,0);
+			aabb.maxVx = uint16_t4(0xFFFF,0xFFFF,0,0);
+			retval->getAuxAttributeViews()->push_back({
+				.composed = {
+					.encodedDataRange = {.u16=aabb},
+					.stride = AttrSize,
+					.format = EF_R16G16_UNORM,
+					.rangeFormat = IGeometryBase::EAABBFormat::U16_NORM
+				},
+				.src = {.offset=0,.size=buff->getSize(),.buffer=std::move(buff)}
+			});
+			for (auto uv_i = 0u; uv_i < icosphere.getTexCoordCount(); uv_i++)
+			{
+				const auto texCoords = icosphere.getTexCoords();
+				const auto f32_uv = float32_t2{ texCoords[2 * uv_i], texCoords[(2 * uv_i) + 1] };
+				uvs[uv_i] = packUnorm2x16(f32_uv);
+			}
+    }
+	}
+
+	CPolygonGeometryManipulator::recomputeContentHashes(retval.get());
+	return retval;
 }
-#endif
 
 } // end namespace nbl::asset
 

From 2e063d73bc98248fa874187bb306729a2f06485b Mon Sep 17 00:00:00 2001
From: kevyuu <kevin.kayu@gmail.com>
Date: Sat, 28 Jun 2025 21:52:18 +0700
Subject: [PATCH 04/40] Remove color parameter from create<Geometry>

---
 include/nbl/asset/utils/CGeometryCreator.h |   7 +-
 src/nbl/asset/utils/CGeometryCreator.cpp   | 124 +--------------------
 2 files changed, 6 insertions(+), 125 deletions(-)

diff --git a/include/nbl/asset/utils/CGeometryCreator.h b/include/nbl/asset/utils/CGeometryCreator.h
index 87d7a0ef5e..ca6fff1790 100644
--- a/include/nbl/asset/utils/CGeometryCreator.h
+++ b/include/nbl/asset/utils/CGeometryCreator.h
@@ -61,8 +61,7 @@ class NBL_API2 CGeometryCreator final : public core::IReferenceCounted
 		core::smart_refctd_ptr<ICPUPolygonGeometry> createArrow(const uint32_t tesselationCylinder = 4,
 				const uint32_t tesselationCone = 8, const float height = 1.f,
 				const float cylinderHeight = 0.6f, const float widthCylinder = 0.05f,
-				const float widthCone = 0.3f, const video::SColor colorCylinder = 0xFFFFFFFF,
-				const video::SColor colorCone = 0xFFFFFFFF) const;
+				const float widthCone = 0.3f) const;
 
 
 		//! Create a sphere mesh.
@@ -87,7 +86,7 @@ class NBL_API2 CGeometryCreator final : public core::IReferenceCounted
 		*/
 		core::smart_refctd_ptr<ICPUPolygonGeometry> createCylinder(float radius, float length,
 				uint32_t tesselation,
-				const video::SColor& color=video::SColor(0xffffffff), CQuantNormalCache* const quantNormalCacheOverride=nullptr) const;
+				CQuantNormalCache* const quantNormalCacheOverride=nullptr) const;
 
 		//! Create a cone mesh.
 		/**
@@ -100,8 +99,6 @@ class NBL_API2 CGeometryCreator final : public core::IReferenceCounted
 		\return Generated mesh.
 		*/
 		core::smart_refctd_ptr<ICPUPolygonGeometry> createCone(float radius, float length, uint32_t tesselation,
-				const video::SColor& colorTop=video::SColor(0xffffffff),
-				const video::SColor& colorBottom=video::SColor(0xffffffff),
 				float oblique=0.f, CQuantNormalCache* const quantNormalCacheOverride=nullptr) const;
 
 		core::smart_refctd_ptr<ICPUPolygonGeometry> createRectangle(const hlsl::float32_t2 size={0.5f,0.5f}) const;
diff --git a/src/nbl/asset/utils/CGeometryCreator.cpp b/src/nbl/asset/utils/CGeometryCreator.cpp
index 775d2e2061..c012df8826 100644
--- a/src/nbl/asset/utils/CGeometryCreator.cpp
+++ b/src/nbl/asset/utils/CGeometryCreator.cpp
@@ -295,7 +295,6 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createSphere(float
 	hlsl::float32_t3* positions;
   hlsl::vector<uint8_t, 4>* normals;
 	hlsl::vector<uint8_t, 2>* uvs;
-	hlsl::vector<uint8_t, 4>* colors;
 	{
 		{
 			constexpr auto AttrSize = sizeof(decltype(*positions));
@@ -360,36 +359,10 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createSphere(float
 				}
 			});
 		}
-		{
-			constexpr auto AttrSize = sizeof(decltype(*colors));
-			auto buff = ICPUBuffer::create({{AttrSize * vertexCount,IBuffer::EUF_NONE}});
-			colors = reinterpret_cast<decltype(colors)>(buff->getPointer());
-			shapes::AABB<4, uint8_t> aabb;
-			aabb.minVx = hlsl::vector<uint8_t, 4>(0,0,0,0);
-			aabb.maxVx = hlsl::vector<uint8_t, 4>(255,255,0,0);
-			retval->getAuxAttributeViews()->push_back({
-				.composed = {
-					.encodedDataRange = {.u8=aabb},
-					.stride = AttrSize,
-					.format = EF_R8G8B8A8_UNORM,
-					.rangeFormat = IGeometryBase::EAABBFormat::U8_NORM
-				},
-				.src = {
-				  .offset = 0,
-				  .size = buff->getSize(),
-				  .buffer = std::move(buff),
-				}
-			});
-		}
 	}
 
 	// fill vertices
 	{
-		for (size_t i = 0; i < vertexCount; i++)
-		{
-			colors[i] = { 255,255,255,255 };
-		}
-
 		// calculate the angle which separates all points in a circle
 		const float AngleX = 2 * core::PI<float>() / polyCountX;
 		const float AngleY = core::PI<float>() / polyCountY;
@@ -470,7 +443,7 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createSphere(float
 
 core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCylinder(
 	float radius, float length,
-	uint32_t tesselation, const video::SColor& color, CQuantNormalCache* const quantNormalCacheOverride) const
+	uint32_t tesselation, CQuantNormalCache* const quantNormalCacheOverride) const
 {
 	using namespace hlsl;
 
@@ -521,7 +494,6 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCylinder(
 	hlsl::float32_t3* positions;
   hlsl::vector<uint8_t, 4>* normals;
 	hlsl::vector<uint8_t, 2>* uvs;
-	hlsl::vector<uint8_t, 4>* colors;
 	{
 		{
 			constexpr auto AttrSize = sizeof(decltype(*positions));
@@ -586,32 +558,8 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCylinder(
 				}
 			});
 		}
-		{
-			constexpr auto AttrSize = sizeof(decltype(*colors));
-			auto buff = ICPUBuffer::create({{AttrSize * vertexCount,IBuffer::EUF_NONE}});
-			colors = reinterpret_cast<decltype(colors)>(buff->getPointer());
-			shapes::AABB<4, uint8_t> aabb;
-			aabb.minVx = hlsl::vector<uint8_t, 4>(0,0,0,0);
-			aabb.maxVx = hlsl::vector<uint8_t, 4>(255,255,0,0);
-			retval->getAuxAttributeViews()->push_back({
-				.composed = {
-					.encodedDataRange = {.u8=aabb},
-					.stride = AttrSize,
-					.format = EF_R8G8B8A8_UNORM,
-					.rangeFormat = IGeometryBase::EAABBFormat::U8_NORM
-				},
-				.src = {
-				  .offset = 0,
-				  .size = buff->getSize(),
-				  .buffer = std::move(buff),
-				}
-			});
-		}
 	}
 
-  uint8_t glcolor[4];
-  color.toOpenGLColor(glcolor);
-
   const float tesselationRec = core::reciprocal_approxim<float>(static_cast<float>(tesselation));
   const float step = 2.f * core::PI<float>() * tesselationRec;
   for (uint32_t i = 0u; i < tesselation; ++i)
@@ -624,12 +572,10 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCylinder(
     positions[i] = { p.x, p.y, p.z };
 		memcpy(normals + i, &n, sizeof(n));
 		uvs[i] = { f_i * tesselationRec, 0.0 };
-		colors[i] = { glcolor[0], glcolor[1], glcolor[2], glcolor[3] };
 
     positions[i + halfIx] = { p.x, p.y, length };
     normals[i + halfIx] = normals[i];
     uvs[i + halfIx] = { 1.0f, 0.0f };
-		colors[i + halfIx] = { glcolor[0], glcolor[1], glcolor[2], glcolor[3] };
   }
 
 	CPolygonGeometryManipulator::recomputeContentHashes(retval.get());
@@ -638,8 +584,6 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCylinder(
 
 core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCone(
   float radius, float length, uint32_t tesselation,
-  const video::SColor& colorTop,
-  const video::SColor& colorBottom,
   float oblique, CQuantNormalCache* const quantNormalCacheOverride) const
 {
 
@@ -689,7 +633,6 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCone(
 	// Create vertex attributes with NONE usage because we have no clue how they'll be used
 	hlsl::float32_t3* positions;
   hlsl::vector<uint8_t, 4>* normals;
-	hlsl::vector<uint8_t, 4>* colors;
   {
     {
       constexpr auto AttrSize = sizeof(decltype(*positions));
@@ -733,34 +676,8 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCone(
         }
       });
     }
-    {
-      constexpr auto AttrSize = sizeof(decltype(*colors));
-      auto buff = ICPUBuffer::create({{AttrSize * vertexCount,IBuffer::EUF_NONE}});
-      colors = reinterpret_cast<decltype(colors)>(buff->getPointer());
-      shapes::AABB<4, uint8_t> aabb;
-      aabb.minVx = hlsl::vector<uint8_t, 4>(0,0,0,0);
-      aabb.maxVx = hlsl::vector<uint8_t, 4>(255,255,0,0);
-      retval->getAuxAttributeViews()->push_back({
-        .composed = {
-          .encodedDataRange = {.u8=aabb},
-          .stride = AttrSize,
-          .format = EF_R8G8B8A8_UNORM,
-          .rangeFormat = IGeometryBase::EAABBFormat::U8_NORM
-        },
-        .src = {
-          .offset = 0,
-          .size = buff->getSize(),
-          .buffer = std::move(buff),
-        }
-      });
-    }
   }
 
-  uint8_t glcolor[4];
-  colorBottom.toOpenGLColor(glcolor);
-	vector<uint8_t, 4> vertexBottomColor = { glcolor[0], glcolor[1], glcolor[2], glcolor[3] };
-	std::fill_n(colors, vertexCount, vertexBottomColor);
-
   const float step = (2.f*core::PI<float>()) / tesselation;
 
 	const core::vectorSIMDf apexVertexCoords(oblique, length, 0.0f);
@@ -808,9 +725,7 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createArrow(
 	const float height,
 	const float cylinderHeight,
 	const float width0,
-	const float width1,
-	const video::SColor vtxColor0,
-	const video::SColor vtxColor1
+	const float width1
 ) const
 {
   assert(height > cylinderHeight);
@@ -818,10 +733,9 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createArrow(
 	using position_t = hlsl::float32_t3;
 	using normal_t = hlsl::vector<uint8_t, 4>;
 	using uv_t = hlsl::vector<uint8_t, 2>;
-	using color_t = hlsl::vector<uint8_t, 4>;
 
-  auto cylinder = createCylinder(width0, cylinderHeight, tesselationCylinder, vtxColor0);
-  auto cone = createCone(width1, height-cylinderHeight, tesselationCone, vtxColor1, vtxColor1);
+  auto cylinder = createCylinder(width0, cylinderHeight, tesselationCylinder);
+  auto cone = createCone(width1, height-cylinderHeight, tesselationCone);
 
 	auto cylinderPositions = reinterpret_cast<position_t*>(cylinder->getPositionView().src.buffer->getPointer());
 	auto conePositions = reinterpret_cast<position_t*>(cone->getPositionView().src.buffer->getPointer());
@@ -881,7 +795,6 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createArrow(
 	// Create vertex attributes with NONE usage because we have no clue how they'll be used
 	hlsl::float32_t3* positions;
   hlsl::vector<uint8_t, 4>* normals;
-	hlsl::vector<uint8_t, 4>* colors;
 	hlsl::vector<uint8_t, 2>* uvs;
   {
     {
@@ -948,27 +861,6 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createArrow(
 				}
 			});
 		}
-    {
-      constexpr auto AttrSize = sizeof(decltype(*colors));
-      auto buff = ICPUBuffer::create({{AttrSize * newArrowVertexCount,IBuffer::EUF_NONE}});
-      colors = reinterpret_cast<decltype(colors)>(buff->getPointer());
-      shapes::AABB<4, uint8_t> aabb;
-      aabb.minVx = hlsl::vector<uint8_t, 4>(0,0,0,0);
-      aabb.maxVx = hlsl::vector<uint8_t, 4>(255,255,0,0);
-      retval->getAuxAttributeViews()->push_back({
-        .composed = {
-          .encodedDataRange = {.u8=aabb},
-          .stride = AttrSize,
-          .format = EF_R8G8B8A8_UNORM,
-          .rangeFormat = IGeometryBase::EAABBFormat::U8_NORM
-        },
-        .src = {
-          .offset = 0,
-          .size = buff->getSize(),
-          .buffer = std::move(buff),
-        }
-      });
-    }
   }
   
 	for (auto i = 0ull; i < coneVertexCount; ++i)
@@ -980,12 +872,6 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createArrow(
     conePosition = {newPos.x, newPos.y, newPos.z};
 	}
 
-  uint8_t cylinderGlColor[4];
-  vtxColor0.toOpenGLColor(cylinderGlColor);
-
-  uint8_t coneGlColor[4];
-  vtxColor1.toOpenGLColor(coneGlColor);
-	
 	for (auto z = 0ull; z < newArrowVertexCount; ++z)
 	{
 		if (z < cylinderVertexCount)
@@ -993,7 +879,6 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createArrow(
 			positions[z] = cylinderPositions[z];
 			normals[z] = cylinderNormals[z];
 			uvs[z] = cylinderUvs[z];
-			colors[z] = { cylinderGlColor[0], cylinderGlColor[1], cylinderGlColor[2], cylinderGlColor[3] };
 		}
 		else
 		{
@@ -1001,7 +886,6 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createArrow(
 			positions[z] = conePositions[cone_i];
 			normals[z] = coneNormals[cone_i];
 			uvs[z] = { 0, 0 };
-			colors[z] = { coneGlColor[0], coneGlColor[1], coneGlColor[2], coneGlColor[3] };
 		}
 	}
 

From f0b50642ad95492ba1490ce5884c0d2774edf311 Mon Sep 17 00:00:00 2001
From: kevyuu <kevin.kayu@gmail.com>
Date: Sat, 28 Jun 2025 21:57:27 +0700
Subject: [PATCH 05/40] Fix indentation

---
 src/nbl/asset/utils/CGeometryCreator.cpp | 430 +++++++++++------------
 1 file changed, 215 insertions(+), 215 deletions(-)

diff --git a/src/nbl/asset/utils/CGeometryCreator.cpp b/src/nbl/asset/utils/CGeometryCreator.cpp
index c012df8826..88e93e4d2c 100644
--- a/src/nbl/asset/utils/CGeometryCreator.cpp
+++ b/src/nbl/asset/utils/CGeometryCreator.cpp
@@ -204,7 +204,7 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createSphere(float
 		polyCountY = 2;
 
 	const uint32_t polyCountXPitch = polyCountX + 1; // get to same vertex on next level
-  const size_t vertexCount = (polyCountXPitch * polyCountY) + 2;
+	const size_t vertexCount = (polyCountXPitch * polyCountY) + 2;
 
 	auto retval = core::make_smart_refctd_ptr<ICPUPolygonGeometry>();
 	retval->setIndexing(IPolygonGeometryBase::TriangleList());
@@ -212,11 +212,11 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createSphere(float
 	// Create indices
 	using index_t = uint32_t;
 	{
-    const auto indexCount = (polyCountX * polyCountY) * 6;
+		const auto indexCount = (polyCountX * polyCountY) * 6;
 		const auto bytesize = sizeof(index_t) * indexCount;
 		auto indices = ICPUBuffer::create({bytesize,IBuffer::EUF_INDEX_BUFFER_BIT});
 		auto indexPtr = reinterpret_cast<index_t*>(indices->getPointer());
-    uint32_t level = 0;
+		uint32_t level = 0;
 		size_t indexAddIx = 0;
 		for (uint32_t p1 = 0; p1 < polyCountY - 1; ++p1)
 		{
@@ -293,7 +293,7 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createSphere(float
 
 	// Create vertex attributes with NONE usage because we have no clue how they'll be used
 	hlsl::float32_t3* positions;
-  hlsl::vector<uint8_t, 4>* normals;
+	hlsl::vector<uint8_t, 4>* normals;
 	hlsl::vector<uint8_t, 2>* uvs;
 	{
 		{
@@ -311,9 +311,9 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createSphere(float
 					.rangeFormat = IGeometryBase::EAABBFormat::F32
 				},
 				.src = {
-				  .offset=0,
-				  .size = buff->getSize(),
-				  .buffer = std::move(buff),
+					.offset=0,
+					.size = buff->getSize(),
+					.buffer = std::move(buff),
 				}
 			});
 		}
@@ -332,9 +332,9 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createSphere(float
 					.rangeFormat = IGeometryBase::EAABBFormat::S8_NORM
 				},
 				.src = {
-				  .offset = 0,
-				  .size = buff->getSize(),
-				  .buffer = std::move(buff)
+					.offset = 0,
+					.size = buff->getSize(),
+					.buffer = std::move(buff)
 				}
 			});
 		}
@@ -353,9 +353,9 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createSphere(float
 					.rangeFormat = IGeometryBase::EAABBFormat::U8_NORM
 				},
 				.src = {
-				  .offset = 0,
-				  .size = buff->getSize(),
-				  .buffer = std::move(buff),
+					.offset = 0,
+					.size = buff->getSize(),
+					.buffer = std::move(buff),
 				}
 			});
 		}
@@ -373,68 +373,68 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createSphere(float
 
 		double ay = 0;//AngleY / 2;
 		auto vertex_i = 0;
-    for (uint32_t y = 0; y < polyCountY; ++y)
-    {
-      ay += AngleY;
-      const double sinay = sin(ay);
-      axz = 0;
+		for (uint32_t y = 0; y < polyCountY; ++y)
+		{
+			ay += AngleY;
+			const double sinay = sin(ay);
+			axz = 0;
 
-      // calculate the necessary vertices without the doubled one
+			// calculate the necessary vertices without the doubled one
 			const auto old_vertex_i = vertex_i;
-      for (uint32_t xz = 0; xz < polyCountX; ++xz)
-      {
-        // calculate points position
-
-        float32_t3 pos(static_cast<float>(cos(axz) * sinay),
-          static_cast<float>(cos(ay)),
-          static_cast<float>(sin(axz) * sinay));
-        // for spheres the normal is the position
-        core::vectorSIMDf normal(&pos.x);
-        normal.makeSafe3D();
-        const auto quantizedNormal = quantNormalCache->quantize<NormalCacheFormat>(normal);
-        pos *= radius;
-
-        // calculate texture coordinates via sphere mapping
-        // tu is the same on each level, so only calculate once
-        float tu = 0.5f;
-        //if (y==0)
-        //{
-        if (normal.Y != -1.0f && normal.Y != 1.0f)
-          tu = static_cast<float>(acos(core::clamp(normal.X / sinay, -1.0, 1.0)) * 0.5 * core::RECIPROCAL_PI<double>());
-        if (normal.Z < 0.0f)
-          tu = 1 - tu;
-        //}
-        //else
-          //tu = ((float*)(tmpMem+(i-polyCountXPitch)*vertexSize))[4];
+			for (uint32_t xz = 0; xz < polyCountX; ++xz)
+			{
+				// calculate points position
+
+				float32_t3 pos(static_cast<float>(cos(axz) * sinay),
+					static_cast<float>(cos(ay)),
+					static_cast<float>(sin(axz) * sinay));
+				// for spheres the normal is the position
+				core::vectorSIMDf normal(&pos.x);
+				normal.makeSafe3D();
+				const auto quantizedNormal = quantNormalCache->quantize<NormalCacheFormat>(normal);
+				pos *= radius;
+
+				// calculate texture coordinates via sphere mapping
+				// tu is the same on each level, so only calculate once
+				float tu = 0.5f;
+				//if (y==0)
+				//{
+				if (normal.Y != -1.0f && normal.Y != 1.0f)
+					tu = static_cast<float>(acos(core::clamp(normal.X / sinay, -1.0, 1.0)) * 0.5 * core::RECIPROCAL_PI<double>());
+				if (normal.Z < 0.0f)
+					tu = 1 - tu;
+				//}
+				//else
+					//tu = ((float*)(tmpMem+(i-polyCountXPitch)*vertexSize))[4];
 
 				positions[vertex_i] = pos;
 				uvs[vertex_i] = { packSnorm(tu), packSnorm(static_cast<float>(ay * core::RECIPROCAL_PI<double>())) };
 				memcpy(normals + vertex_i, &quantizedNormal, sizeof(quantizedNormal));
 
 				vertex_i++;
-        axz += AngleX;
-      }
-      // This is the doubled vertex on the initial position
+				axz += AngleX;
+			}
+			// This is the doubled vertex on the initial position
 
-      positions[vertex_i] = positions[old_vertex_i];
+			positions[vertex_i] = positions[old_vertex_i];
 			uvs[vertex_i] = { 127, uvs[old_vertex_i].y };
 			normals[vertex_i] = normals[old_vertex_i];
 
 			vertex_i++;
-    }
+		}
 
 		// the vertex at the top of the sphere
 		positions[vertex_i] = { 0.f, radius, 0.f };
 		uvs[vertex_i] = { 0, 63};
 		const auto quantizedTopNormal = quantNormalCache->quantize<NormalCacheFormat>(core::vectorSIMDf(0.f, 1.f, 0.f));
-    memcpy(normals + vertex_i, &quantizedTopNormal, sizeof(quantizedTopNormal));
+		memcpy(normals + vertex_i, &quantizedTopNormal, sizeof(quantizedTopNormal));
 
 		// the vertex at the bottom of the sphere
 		vertex_i++;
 		positions[vertex_i] = { 0.f, -radius, 0.f };
 		uvs[vertex_i] = { 63, 127};
 		const auto quantizedBottomNormal = quantNormalCache->quantize<NormalCacheFormat>(core::vectorSIMDf(0.f, -1.f, 0.f));
-    memcpy(normals + vertex_i, &quantizedBottomNormal, sizeof(quantizedBottomNormal));
+		memcpy(normals + vertex_i, &quantizedBottomNormal, sizeof(quantizedBottomNormal));
 	}
 
 	CPolygonGeometryManipulator::recomputeContentHashes(retval.get());
@@ -449,8 +449,8 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCylinder(
 
 	CQuantNormalCache* const quantNormalCache = quantNormalCacheOverride == nullptr ? m_params.normalCache.get() : quantNormalCacheOverride;
 
-  const uint16_t halfIx = static_cast<uint16_t>(tesselation);
-  const uint16_t vertexCount = 2 * static_cast<uint16_t>(tesselation);
+	const uint16_t halfIx = static_cast<uint16_t>(tesselation);
+	const uint16_t vertexCount = 2 * static_cast<uint16_t>(tesselation);
 
 	auto retval = core::make_smart_refctd_ptr<ICPUPolygonGeometry>();
 	retval->setIndexing(IPolygonGeometryBase::TriangleList());
@@ -458,20 +458,20 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCylinder(
 	// Create indices
 	using index_t = uint16_t;
 	{
-    constexpr uint32_t RowCount = 2u;
-    const auto IndexCount = RowCount * 3 * tesselation;
+		constexpr uint32_t RowCount = 2u;
+		const auto IndexCount = RowCount * 3 * tesselation;
 		const auto bytesize = sizeof(index_t) * IndexCount;
 		auto indices = ICPUBuffer::create({bytesize,IBuffer::EUF_INDEX_BUFFER_BIT});
 		auto u = reinterpret_cast<index_t*>(indices->getPointer());
-    for (uint16_t i = 0u, j = 0u; i < halfIx; ++i)
-    {
-      u[j++] = i;
-      u[j++] = (i + 1u) != halfIx ? (i + 1u):0u;
-      u[j++] = i + halfIx;
-      u[j++] = i + halfIx;
-      u[j++] = (i + 1u)!= halfIx ? (i + 1u):0u;
-      u[j++] = (i + 1u)!= halfIx ? (i + 1u + halfIx) : halfIx;
-    }
+		for (uint16_t i = 0u, j = 0u; i < halfIx; ++i)
+		{
+			u[j++] = i;
+			u[j++] = (i + 1u) != halfIx ? (i + 1u):0u;
+			u[j++] = i + halfIx;
+			u[j++] = i + halfIx;
+			u[j++] = (i + 1u)!= halfIx ? (i + 1u):0u;
+			u[j++] = (i + 1u)!= halfIx ? (i + 1u + halfIx) : halfIx;
+		}
 
 		shapes::AABB<4,index_t> aabb;
 		aabb.minVx[0] = 0;
@@ -492,7 +492,7 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCylinder(
 
 	// Create vertex attributes with NONE usage because we have no clue how they'll be used
 	hlsl::float32_t3* positions;
-  hlsl::vector<uint8_t, 4>* normals;
+	hlsl::vector<uint8_t, 4>* normals;
 	hlsl::vector<uint8_t, 2>* uvs;
 	{
 		{
@@ -510,9 +510,9 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCylinder(
 					.rangeFormat = IGeometryBase::EAABBFormat::F32
 				},
 				.src = {
-				  .offset=0,
-				  .size = buff->getSize(),
-				  .buffer = std::move(buff),
+					.offset=0,
+					.size = buff->getSize(),
+					.buffer = std::move(buff),
 				}
 			});
 		}
@@ -531,9 +531,9 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCylinder(
 					.rangeFormat = IGeometryBase::EAABBFormat::S8_NORM
 				},
 				.src = {
-				  .offset = 0,
-				  .size = buff->getSize(),
-				  .buffer = std::move(buff)
+					.offset = 0,
+					.size = buff->getSize(),
+					.buffer = std::move(buff)
 				}
 			});
 		}
@@ -552,46 +552,46 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCylinder(
 					.rangeFormat = IGeometryBase::EAABBFormat::U8_NORM
 				},
 				.src = {
-				  .offset = 0,
-				  .size = buff->getSize(),
-				  .buffer = std::move(buff),
+					.offset = 0,
+					.size = buff->getSize(),
+					.buffer = std::move(buff),
 				}
 			});
 		}
 	}
 
-  const float tesselationRec = core::reciprocal_approxim<float>(static_cast<float>(tesselation));
-  const float step = 2.f * core::PI<float>() * tesselationRec;
-  for (uint32_t i = 0u; i < tesselation; ++i)
-  {
+	const float tesselationRec = core::reciprocal_approxim<float>(static_cast<float>(tesselation));
+	const float step = 2.f * core::PI<float>() * tesselationRec;
+	for (uint32_t i = 0u; i < tesselation; ++i)
+	{
 		const auto f_i = static_cast<float>(i);
-    core::vectorSIMDf p(std::cos(f_i * step), std::sin(f_i * step), 0.f);
-    p *= radius;
-    const auto n = quantNormalCache->quantize<NormalCacheFormat>(core::normalize(p));
+		core::vectorSIMDf p(std::cos(f_i * step), std::sin(f_i * step), 0.f);
+		p *= radius;
+		const auto n = quantNormalCache->quantize<NormalCacheFormat>(core::normalize(p));
 
-    positions[i] = { p.x, p.y, p.z };
+		positions[i] = { p.x, p.y, p.z };
 		memcpy(normals + i, &n, sizeof(n));
 		uvs[i] = { f_i * tesselationRec, 0.0 };
 
-    positions[i + halfIx] = { p.x, p.y, length };
-    normals[i + halfIx] = normals[i];
-    uvs[i + halfIx] = { 1.0f, 0.0f };
-  }
+		positions[i + halfIx] = { p.x, p.y, length };
+		normals[i + halfIx] = normals[i];
+		uvs[i + halfIx] = { 1.0f, 0.0f };
+	}
 
 	CPolygonGeometryManipulator::recomputeContentHashes(retval.get());
 	return retval;
 }
 
 core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCone(
-  float radius, float length, uint32_t tesselation,
-  float oblique, CQuantNormalCache* const quantNormalCacheOverride) const
+	float radius, float length, uint32_t tesselation,
+	float oblique, CQuantNormalCache* const quantNormalCacheOverride) const
 {
 
 	using namespace hlsl;
 
 	CQuantNormalCache* const quantNormalCache = quantNormalCacheOverride == nullptr ? m_params.normalCache.get() : quantNormalCacheOverride;
 
-  const uint16_t vertexCount = 2 * static_cast<uint16_t>(tesselation);
+	const uint16_t vertexCount = 2 * static_cast<uint16_t>(tesselation);
 
 	auto retval = core::make_smart_refctd_ptr<ICPUPolygonGeometry>();
 	retval->setIndexing(IPolygonGeometryBase::TriangleList());
@@ -599,19 +599,19 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCone(
 	// Create indices
 	using index_t = uint16_t;
 	{
-    constexpr uint32_t RowCount = 2u;
-    const auto IndexCount = 3 * tesselation;
+		constexpr uint32_t RowCount = 2u;
+		const auto IndexCount = 3 * tesselation;
 		const auto bytesize = sizeof(index_t) * IndexCount;
 		auto indices = ICPUBuffer::create({bytesize,IBuffer::EUF_INDEX_BUFFER_BIT});
 		auto u = reinterpret_cast<index_t*>(indices->getPointer());
-    const uint32_t firstIndexOfBaseVertices = 0;
-    const uint32_t firstIndexOfApexVertices = tesselation;
-    for (uint32_t i = 0; i < tesselation; i++)
-    {
-      u[i * 3] = firstIndexOfApexVertices + i;
-      u[(i * 3) + 1] = firstIndexOfBaseVertices + i;
-      u[(i * 3) + 2] = i == (tesselation - 1) ? firstIndexOfBaseVertices : firstIndexOfBaseVertices + i + 1;
-    }
+		const uint32_t firstIndexOfBaseVertices = 0;
+		const uint32_t firstIndexOfApexVertices = tesselation;
+		for (uint32_t i = 0; i < tesselation; i++)
+		{
+			u[i * 3] = firstIndexOfApexVertices + i;
+			u[(i * 3) + 1] = firstIndexOfBaseVertices + i;
+			u[(i * 3) + 2] = i == (tesselation - 1) ? firstIndexOfBaseVertices : firstIndexOfBaseVertices + i + 1;
+		}
 
 		shapes::AABB<4,index_t> aabb;
 		aabb.minVx[0] = 0;
@@ -632,53 +632,53 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCone(
 
 	// Create vertex attributes with NONE usage because we have no clue how they'll be used
 	hlsl::float32_t3* positions;
-  hlsl::vector<uint8_t, 4>* normals;
-  {
-    {
-      constexpr auto AttrSize = sizeof(decltype(*positions));
-      auto buff = ICPUBuffer::create({{AttrSize * vertexCount,IBuffer::EUF_NONE}});
-      positions = reinterpret_cast<decltype(positions)>(buff->getPointer());
-      shapes::AABB<4, float32_t> aabb;
-      aabb.maxVx = float32_t4(radius, radius, length, 0.0f);
-      aabb.minVx = float32_t4(-radius, -radius, 0.0f, 0.0f);
-      retval->setPositionView({
-        .composed = {
-          .encodedDataRange = {.f32 = aabb},
-          .stride = AttrSize,
-          .format = EF_R32G32B32_SFLOAT,
-          .rangeFormat = IGeometryBase::EAABBFormat::F32
-        },
-        .src = {
-          .offset=0,
-          .size = buff->getSize(),
-          .buffer = std::move(buff),
-        }
-      });
-    }
-    {
-      constexpr auto AttrSize = sizeof(decltype(*normals));
-      auto buff = ICPUBuffer::create({{AttrSize * vertexCount,IBuffer::EUF_NONE}});
-      normals = reinterpret_cast<decltype(normals)>(buff->getPointer());
-      shapes::AABB<4, int8_t> aabb;
-      aabb.maxVx = hlsl::vector<int8_t,4>(127,127,127,0);
-      aabb.minVx = -aabb.maxVx;
-      retval->setNormalView({
-        .composed = {
-          .encodedDataRange = {.s8=aabb},
-          .stride = AttrSize,
-          .format = NormalFormat,
-          .rangeFormat = IGeometryBase::EAABBFormat::S8_NORM
-        },
-        .src = {
-          .offset = 0,
-          .size = buff->getSize(),
-          .buffer = std::move(buff)
-        }
-      });
-    }
-  }
-
-  const float step = (2.f*core::PI<float>()) / tesselation;
+	hlsl::vector<uint8_t, 4>* normals;
+	{
+		{
+			constexpr auto AttrSize = sizeof(decltype(*positions));
+			auto buff = ICPUBuffer::create({{AttrSize * vertexCount,IBuffer::EUF_NONE}});
+			positions = reinterpret_cast<decltype(positions)>(buff->getPointer());
+			shapes::AABB<4, float32_t> aabb;
+			aabb.maxVx = float32_t4(radius, radius, length, 0.0f);
+			aabb.minVx = float32_t4(-radius, -radius, 0.0f, 0.0f);
+			retval->setPositionView({
+				.composed = {
+					.encodedDataRange = {.f32 = aabb},
+					.stride = AttrSize,
+					.format = EF_R32G32B32_SFLOAT,
+					.rangeFormat = IGeometryBase::EAABBFormat::F32
+				},
+				.src = {
+					.offset=0,
+					.size = buff->getSize(),
+					.buffer = std::move(buff),
+				}
+			});
+		}
+		{
+			constexpr auto AttrSize = sizeof(decltype(*normals));
+			auto buff = ICPUBuffer::create({{AttrSize * vertexCount,IBuffer::EUF_NONE}});
+			normals = reinterpret_cast<decltype(normals)>(buff->getPointer());
+			shapes::AABB<4, int8_t> aabb;
+			aabb.maxVx = hlsl::vector<int8_t,4>(127,127,127,0);
+			aabb.minVx = -aabb.maxVx;
+			retval->setNormalView({
+				.composed = {
+					.encodedDataRange = {.s8=aabb},
+					.stride = AttrSize,
+					.format = NormalFormat,
+					.rangeFormat = IGeometryBase::EAABBFormat::S8_NORM
+				},
+				.src = {
+					.offset = 0,
+					.size = buff->getSize(),
+					.buffer = std::move(buff)
+				}
+			});
+		}
+	}
+
+	const float step = (2.f*core::PI<float>()) / tesselation;
 
 	const core::vectorSIMDf apexVertexCoords(oblique, length, 0.0f);
 
@@ -728,14 +728,14 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createArrow(
 	const float width1
 ) const
 {
-  assert(height > cylinderHeight);
+	assert(height > cylinderHeight);
 
 	using position_t = hlsl::float32_t3;
 	using normal_t = hlsl::vector<uint8_t, 4>;
 	using uv_t = hlsl::vector<uint8_t, 2>;
 
-  auto cylinder = createCylinder(width0, cylinderHeight, tesselationCylinder);
-  auto cone = createCone(width1, height-cylinderHeight, tesselationCone);
+	auto cylinder = createCylinder(width0, cylinderHeight, tesselationCylinder);
+	auto cone = createCone(width1, height-cylinderHeight, tesselationCone);
 
 	auto cylinderPositions = reinterpret_cast<position_t*>(cylinder->getPositionView().src.buffer->getPointer());
 	auto conePositions = reinterpret_cast<position_t*>(cone->getPositionView().src.buffer->getPointer());
@@ -794,52 +794,52 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createArrow(
 
 	// Create vertex attributes with NONE usage because we have no clue how they'll be used
 	hlsl::float32_t3* positions;
-  hlsl::vector<uint8_t, 4>* normals;
+	hlsl::vector<uint8_t, 4>* normals;
 	hlsl::vector<uint8_t, 2>* uvs;
-  {
-    {
-      constexpr auto AttrSize = sizeof(decltype(*positions));
-      auto buff = ICPUBuffer::create({{AttrSize * newArrowVertexCount,IBuffer::EUF_NONE}});
-      positions = reinterpret_cast<decltype(positions)>(buff->getPointer());
-      shapes::AABB<4, float32_t> aabb;
+	{
+		{
+			constexpr auto AttrSize = sizeof(decltype(*positions));
+			auto buff = ICPUBuffer::create({{AttrSize * newArrowVertexCount,IBuffer::EUF_NONE}});
+			positions = reinterpret_cast<decltype(positions)>(buff->getPointer());
+			shapes::AABB<4, float32_t> aabb;
 			//TODO(kevyuu): Calculate arrow aabb
-      aabb.maxVx = hlsl::vector<float32_t,4>(127,127,127,0);
-      aabb.minVx = -aabb.maxVx;
-      retval->setPositionView({
-        .composed = {
-          .encodedDataRange = {.f32 = aabb},
-          .stride = AttrSize,
-          .format = EF_R32G32B32_SFLOAT,
-          .rangeFormat = IGeometryBase::EAABBFormat::F32
-        },
-        .src = {
-          .offset=0,
-          .size = buff->getSize(),
-          .buffer = std::move(buff),
-        }
-      });
-    }
-    {
-      constexpr auto AttrSize = sizeof(decltype(*normals));
-      auto buff = ICPUBuffer::create({{AttrSize * newArrowVertexCount,IBuffer::EUF_NONE}});
-      normals = reinterpret_cast<decltype(normals)>(buff->getPointer());
-      shapes::AABB<4, int8_t> aabb;
-      aabb.maxVx = hlsl::vector<int8_t,4>(127,127,127,0);
-      aabb.minVx = -aabb.maxVx;
-      retval->setNormalView({
-        .composed = {
-          .encodedDataRange = {.s8=aabb},
-          .stride = AttrSize,
-          .format = NormalFormat,
-          .rangeFormat = IGeometryBase::EAABBFormat::S8_NORM
-        },
-        .src = {
-          .offset = 0,
-          .size = buff->getSize(),
-          .buffer = std::move(buff)
-        }
-      });
-    }
+			aabb.maxVx = hlsl::vector<float32_t,4>(127,127,127,0);
+			aabb.minVx = -aabb.maxVx;
+			retval->setPositionView({
+				.composed = {
+					.encodedDataRange = {.f32 = aabb},
+					.stride = AttrSize,
+					.format = EF_R32G32B32_SFLOAT,
+					.rangeFormat = IGeometryBase::EAABBFormat::F32
+				},
+				.src = {
+					.offset=0,
+					.size = buff->getSize(),
+					.buffer = std::move(buff),
+				}
+			});
+		}
+		{
+			constexpr auto AttrSize = sizeof(decltype(*normals));
+			auto buff = ICPUBuffer::create({{AttrSize * newArrowVertexCount,IBuffer::EUF_NONE}});
+			normals = reinterpret_cast<decltype(normals)>(buff->getPointer());
+			shapes::AABB<4, int8_t> aabb;
+			aabb.maxVx = hlsl::vector<int8_t,4>(127,127,127,0);
+			aabb.minVx = -aabb.maxVx;
+			retval->setNormalView({
+				.composed = {
+					.encodedDataRange = {.s8=aabb},
+					.stride = AttrSize,
+					.format = NormalFormat,
+					.rangeFormat = IGeometryBase::EAABBFormat::S8_NORM
+				},
+				.src = {
+					.offset = 0,
+					.size = buff->getSize(),
+					.buffer = std::move(buff)
+				}
+			});
+		}
 		{
 			constexpr auto AttrSize = sizeof(decltype(*uvs));
 			auto buff = ICPUBuffer::create({{AttrSize * newArrowVertexCount,IBuffer::EUF_NONE}});
@@ -855,21 +855,21 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createArrow(
 					.rangeFormat = IGeometryBase::EAABBFormat::U8_NORM
 				},
 				.src = {
-				  .offset = 0,
-				  .size = buff->getSize(),
-				  .buffer = std::move(buff),
+					.offset = 0,
+					.size = buff->getSize(),
+					.buffer = std::move(buff),
 				}
 			});
 		}
-  }
-  
+	}
+	
 	for (auto i = 0ull; i < coneVertexCount; ++i)
 	{
 		auto& conePosition = conePositions[i];
 		core::vector3df_SIMD newPos(conePosition.x, conePosition.y, conePosition.z);
 		newPos.rotateYZByRAD(-1.5707963268);
 
-    conePosition = {newPos.x, newPos.y, newPos.z};
+		conePosition = {newPos.x, newPos.y, newPos.z};
 	}
 
 	for (auto z = 0ull; z < newArrowVertexCount; ++z)
@@ -1100,8 +1100,8 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createDisk(const f
 	and create icopshere's vertices and indecies.
 
 	Polyhedron subdividing icosahedron (20 tris) by N-times iteration
-    The icosphere with N=1 (default) has 80 triangles by subdividing a triangle
-    of icosahedron into 4 triangles. If N=0, it is identical to icosahedron.
+		The icosphere with N=1 (default) has 80 triangles by subdividing a triangle
+		of icosahedron into 4 triangles. If N=0, it is identical to icosahedron.
 */
 
 class Icosphere
@@ -1240,14 +1240,14 @@ class Icosphere
 		texture coordinate is shared or no. If it is on the line segments, it is also
 		non-shared point
 
-		   00  01  02  03  04         
-		   /\  /\  /\  /\  /\         
-		  /  \/  \/  \/  \/  \        
+			 00  01  02  03  04         
+			 /\  /\  /\  /\  /\         
+			/  \/  \/  \/  \/  \        
 		 05  06  07  08  09   \       
-		   \   10  11  12  13  14     
+			 \   10  11  12  13  14     
 			\  /\  /\  /\  /\  /      
 			 \/  \/  \/  \/  \/       
-			  15  16  17  18  19      
+				15  16  17  18  19      
 	*/
 
 	static inline bool isSharedTexCoord(const float t[2])
@@ -1813,7 +1813,7 @@ class Icosphere
 				 v1           
 				/ \           
 		 newV1 *---* newV3    
-			  / \ / \         
+				/ \ / \         
 			v2---*---v3       
 				newV2         
 	*/
@@ -1979,8 +1979,8 @@ class Icosphere
 		add 7 sub edge lines per triangle to array using 6 indices (CCW)           
 			 i1                                                                     
 			 /            : (i1, i2)                                                
-		   i2---i6        : (i2, i6)												  
-		   / \  /         : (i2, i3), (i2, i4), (i6, i4)							  
+			 i2---i6        : (i2, i6)												  
+			 / \  /         : (i2, i3), (i2, i4), (i6, i4)							  
 		 i3---i4---i5     : (i3, i4), (i4, i5)									  
 	*/
 
@@ -2073,8 +2073,8 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createIcoSphere(fl
 
 	// Create indices
 	{
-    auto indexBuffer = asset::ICPUBuffer::create({ icosphere.getIndexSize() });
-    memcpy(indexBuffer->getPointer(), icosphere.getIndices(), indexBuffer->getSize());
+		auto indexBuffer = asset::ICPUBuffer::create({ icosphere.getIndexSize() });
+		memcpy(indexBuffer->getPointer(), icosphere.getIndices(), indexBuffer->getSize());
 
 		shapes::AABB<4,Icosphere::index_t> aabb;
 		aabb.minVx[0] = 0;
@@ -2118,7 +2118,7 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createIcoSphere(fl
 				}
 			});
 		}
-    {
+		{
 			using normal_t = float32_t3;
 			constexpr auto AttrSize = sizeof(normal_t);
 			auto buff = ICPUBuffer::create({icosphere.getNormalSize(), IBuffer::EUF_NONE});
@@ -2136,8 +2136,8 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createIcoSphere(fl
 				},
 				.src = {.offset = 0,.size = buff->getSize(),.buffer = std::move(buff)},
 			});
-    }
-    {
+		}
+		{
 			using uv_t = uint32_t;
 			constexpr auto AttrSize = sizeof(uv_t);
 			auto buff = ICPUBuffer::create({AttrSize * icosphere.getTexCoordCount(), IBuffer::EUF_NONE});
@@ -2160,7 +2160,7 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createIcoSphere(fl
 				const auto f32_uv = float32_t2{ texCoords[2 * uv_i], texCoords[(2 * uv_i) + 1] };
 				uvs[uv_i] = packUnorm2x16(f32_uv);
 			}
-    }
+		}
 	}
 
 	CPolygonGeometryManipulator::recomputeContentHashes(retval.get());

From 68a689cbbaefb3210a5715940fae0d82f38f47b7 Mon Sep 17 00:00:00 2001
From: kevyuu <kevin.kayu@gmail.com>
Date: Sun, 29 Jun 2025 15:21:20 +0700
Subject: [PATCH 06/40] Fix normal and uv type

---
 src/nbl/asset/utils/CGeometryCreator.cpp | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/nbl/asset/utils/CGeometryCreator.cpp b/src/nbl/asset/utils/CGeometryCreator.cpp
index 88e93e4d2c..0a5dd5920e 100644
--- a/src/nbl/asset/utils/CGeometryCreator.cpp
+++ b/src/nbl/asset/utils/CGeometryCreator.cpp
@@ -210,8 +210,8 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createSphere(float
 	retval->setIndexing(IPolygonGeometryBase::TriangleList());
 
 	// Create indices
-	using index_t = uint32_t;
 	{
+		using index_t = uint32_t;
 		const auto indexCount = (polyCountX * polyCountY) * 6;
 		const auto bytesize = sizeof(index_t) * indexCount;
 		auto indices = ICPUBuffer::create({bytesize,IBuffer::EUF_INDEX_BUFFER_BIT});
@@ -293,8 +293,8 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createSphere(float
 
 	// Create vertex attributes with NONE usage because we have no clue how they'll be used
 	hlsl::float32_t3* positions;
-	hlsl::vector<uint8_t, 4>* normals;
-	hlsl::vector<uint8_t, 2>* uvs;
+	hlsl::vector<int8_t, 4>* normals;
+	hlsl::vector<uint16_t, 2>* uvs;
 	{
 		{
 			constexpr auto AttrSize = sizeof(decltype(*positions));
@@ -342,12 +342,12 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createSphere(float
 			constexpr auto AttrSize = sizeof(decltype(*uvs));
 			auto buff = ICPUBuffer::create({{AttrSize * vertexCount,IBuffer::EUF_NONE}});
 			uvs = reinterpret_cast<decltype(uvs)>(buff->getPointer());
-			shapes::AABB<4, uint8_t> aabb;
+			shapes::AABB<4, uint16_t> aabb;
 			aabb.minVx = hlsl::vector<uint8_t, 4>(0,0,0,0);
 			aabb.maxVx = hlsl::vector<uint8_t, 4>(255,255,0,0);
 			retval->getAuxAttributeViews()->push_back({
 				.composed = {
-					.encodedDataRange = {.u8=aabb},
+					.encodedDataRange = {.u16=aabb},
 					.stride = AttrSize,
 					.format = EF_R8G8_UNORM,
 					.rangeFormat = IGeometryBase::EAABBFormat::U8_NORM
@@ -492,8 +492,8 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCylinder(
 
 	// Create vertex attributes with NONE usage because we have no clue how they'll be used
 	hlsl::float32_t3* positions;
-	hlsl::vector<uint8_t, 4>* normals;
-	hlsl::vector<uint8_t, 2>* uvs;
+	hlsl::vector<int8_t, 4>* normals;
+	hlsl::vector<uint16_t, 2>* uvs;
 	{
 		{
 			constexpr auto AttrSize = sizeof(decltype(*positions));
@@ -632,7 +632,7 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCone(
 
 	// Create vertex attributes with NONE usage because we have no clue how they'll be used
 	hlsl::float32_t3* positions;
-	hlsl::vector<uint8_t, 4>* normals;
+	hlsl::vector<int8_t, 4>* normals;
 	{
 		{
 			constexpr auto AttrSize = sizeof(decltype(*positions));

From ca7f1822540a71b4ddfa8ce5d4b38d5bd481a3c1 Mon Sep 17 00:00:00 2001
From: kevyuu <kevin.kayu@gmail.com>
Date: Sun, 29 Jun 2025 15:21:51 +0700
Subject: [PATCH 07/40] Return nullptr if vertexCount overflow

---
 src/nbl/asset/utils/CGeometryCreator.cpp | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/src/nbl/asset/utils/CGeometryCreator.cpp b/src/nbl/asset/utils/CGeometryCreator.cpp
index 0a5dd5920e..fcd31041c4 100644
--- a/src/nbl/asset/utils/CGeometryCreator.cpp
+++ b/src/nbl/asset/utils/CGeometryCreator.cpp
@@ -449,8 +449,11 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCylinder(
 
 	CQuantNormalCache* const quantNormalCache = quantNormalCacheOverride == nullptr ? m_params.normalCache.get() : quantNormalCacheOverride;
 
-	const uint16_t halfIx = static_cast<uint16_t>(tesselation);
-	const uint16_t vertexCount = 2 * static_cast<uint16_t>(tesselation);
+	const auto halfIx = static_cast<uint16_t>(tesselation);
+	const uint32_t u32_vertexCount = 2 * tesselation;
+	if (u32_vertexCount > std::numeric_limits<uint16_t>::max())
+		return nullptr;
+	const auto vertexCount = static_cast<uint16_t>(u32_vertexCount);
 
 	auto retval = core::make_smart_refctd_ptr<ICPUPolygonGeometry>();
 	retval->setIndexing(IPolygonGeometryBase::TriangleList());
@@ -571,11 +574,11 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCylinder(
 
 		positions[i] = { p.x, p.y, p.z };
 		memcpy(normals + i, &n, sizeof(n));
-		uvs[i] = { f_i * tesselationRec, 0.0 };
+		uvs[i] = { packSnorm(f_i * tesselationRec), packSnorm(0.0) };
 
 		positions[i + halfIx] = { p.x, p.y, length };
 		normals[i + halfIx] = normals[i];
-		uvs[i + halfIx] = { 1.0f, 0.0f };
+		uvs[i + halfIx] = { packSnorm(1.0f), packSnorm(0.0f) };
 	}
 
 	CPolygonGeometryManipulator::recomputeContentHashes(retval.get());
@@ -591,7 +594,10 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCone(
 
 	CQuantNormalCache* const quantNormalCache = quantNormalCacheOverride == nullptr ? m_params.normalCache.get() : quantNormalCacheOverride;
 
-	const uint16_t vertexCount = 2 * static_cast<uint16_t>(tesselation);
+	const uint32_t u32_vertexCount = 2 * tesselation;
+	if (u32_vertexCount > std::numeric_limits<uint16_t>::max())
+		return nullptr;
+	const auto vertexCount = static_cast<uint16_t>(u32_vertexCount);
 
 	auto retval = core::make_smart_refctd_ptr<ICPUPolygonGeometry>();
 	retval->setIndexing(IPolygonGeometryBase::TriangleList());

From a2b7b04a9540fb2ca38620c24744174881062d35 Mon Sep 17 00:00:00 2001
From: kevyuu <kevin.kayu@gmail.com>
Date: Tue, 1 Jul 2025 18:09:08 +0700
Subject: [PATCH 08/40] Remove simd vector from normal quantization cache

---
 include/nbl/asset/utils/CDirQuantCacheBase.h | 111 ++++++++++---------
 include/nbl/asset/utils/CQuantNormalCache.h  |   5 +-
 2 files changed, 58 insertions(+), 58 deletions(-)

diff --git a/include/nbl/asset/utils/CDirQuantCacheBase.h b/include/nbl/asset/utils/CDirQuantCacheBase.h
index 462d414a73..a0e656d50e 100644
--- a/include/nbl/asset/utils/CDirQuantCacheBase.h
+++ b/include/nbl/asset/utils/CDirQuantCacheBase.h
@@ -43,13 +43,13 @@ class CDirQuantCacheBase
 				
 				Vector8u3() : x(0u),y(0u),z(0u) {}
 				Vector8u3(const Vector8u3&) = default;
-				explicit Vector8u3(const core::vectorSIMDu32& val)
+				explicit Vector8u3(const hlsl::float32_t3& val)
 				{
 					operator=(val);
 				}
 
 				Vector8u3& operator=(const Vector8u3&) = default;
-				Vector8u3& operator=(const core::vectorSIMDu32& val)
+				Vector8u3& operator=(const hlsl::float32_t3& val)
 				{
 					x = val.x;
 					y = val.y;
@@ -57,11 +57,12 @@ class CDirQuantCacheBase
 					return *this;
 				}
 
-				inline core::vectorSIMDu32 getValue() const
+        hlsl::float32_t3 getValue() const
 				{
-					return core::vectorSIMDu32(x,y,z);
+					return { x, y, z };
 				}
 
+
 			private:
 				uint8_t x;
 				uint8_t y;
@@ -74,24 +75,24 @@ class CDirQuantCacheBase
 				
 				Vector8u4() : x(0u),y(0u),z(0u),w(0u) {}
 				Vector8u4(const Vector8u4&) = default;
-				explicit Vector8u4(const core::vectorSIMDu32& val)
+				explicit Vector8u4(const hlsl::float32_t3& val)
 				{
 					operator=(val);
 				}
 
 				Vector8u4& operator=(const Vector8u4&) = default;
-				Vector8u4& operator=(const core::vectorSIMDu32& val)
+				Vector8u4& operator=(const hlsl::float32_t3& val)
 				{
 					x = val.x;
 					y = val.y;
 					z = val.z;
-					w = val.w;
+					w = 0;
 					return *this;
 				}
 
-				inline core::vectorSIMDu32 getValue() const
+        hlsl::float32_t3 getValue() const
 				{
-					return core::vectorSIMDu32(x,y,z,w);
+					return { x, y, z };
 				}
 				
 			private:
@@ -108,16 +109,17 @@ class CDirQuantCacheBase
 
 				Vector1010102() : storage(0u) {}
 				Vector1010102(const Vector1010102&) = default;
-				explicit Vector1010102(const core::vectorSIMDu32& val)
+				explicit Vector1010102(const hlsl::float32_t3& val)
 				{
 					operator=(val);
 				}
 
 				Vector1010102& operator=(const Vector1010102&) = default;
-				Vector1010102& operator=(const core::vectorSIMDu32& val)
+				Vector1010102& operator=(const hlsl::float32_t3& val)
 				{
-					constexpr auto storageBits = quantizationBits+1u;
-					storage = val.x|(val.y<<storageBits)|(val.z<<(storageBits*2u));
+					constexpr auto storageBits = quantizationBits + 1u;
+					hlsl::uint32_t3 u32_val = { val.x, val.y, val.z };
+					storage = u32_val.x | (u32_val.y << storageBits) | (u32_val.z << (storageBits * 2u));
 					return *this;
 				}
 
@@ -130,13 +132,13 @@ class CDirQuantCacheBase
 					return storage==other.storage;
 				}
 
-				inline core::vectorSIMDu32 getValue() const
+        hlsl::float32_t3 getValue() const
 				{
-					constexpr auto storageBits = quantizationBits+1u;
-					const core::vectorSIMDu32 mask((0x1u<<storageBits)-1u);
-					return core::vectorSIMDu32(storage,storage>>storageBits,storage>>(storageBits*2u))&mask;
+					constexpr auto storageBits = quantizationBits + 1u;
+					const auto mask = (0x1u << storageBits) - 1u;
+					return { storage & mask, (storage >> storageBits) & mask, (storage >> (storageBits * 2)) & mask};
 				}
-				
+
 			private:
 				uint32_t storage;
 		};
@@ -149,13 +151,13 @@ class CDirQuantCacheBase
 				
 				Vector16u3() : x(0u),y(0u),z(0u) {}
 				Vector16u3(const Vector16u3&) = default;
-				explicit Vector16u3(const core::vectorSIMDu32& val)
+				explicit Vector16u3(const hlsl::float32_t3& val)
 				{
 					operator=(val);
 				}
 
 				Vector16u3& operator=(const Vector16u3&) = default;
-				Vector16u3& operator=(const core::vectorSIMDu32& val)
+				Vector16u3& operator=(const hlsl::float32_t3& val)
 				{
 					x = val.x;
 					y = val.y;
@@ -163,11 +165,11 @@ class CDirQuantCacheBase
 					return *this;
 				}
 
-				inline core::vectorSIMDu32 getValue() const
+        hlsl::float32_t3 getValue() const
 				{
-					return core::vectorSIMDu32(x,y,z);
+					return { x, y, z };
 				}
-				
+
 			private:
 				uint16_t x;
 				uint16_t y;
@@ -180,26 +182,26 @@ class CDirQuantCacheBase
 
 				Vector16u4() : x(0u),y(0u),z(0u),w(0u) {}
 				Vector16u4(const Vector16u4&) = default;
-				explicit Vector16u4(const core::vectorSIMDu32& val)
+				explicit Vector16u4(const hlsl::float32_t3& val)
 				{
 					operator=(val);
 				}
 
 				Vector16u4& operator=(const Vector16u4&) = default;
-				Vector16u4& operator=(const core::vectorSIMDu32& val)
+				Vector16u4& operator=(const hlsl::float32_t3& val)
 				{
 					x = val.x;
 					y = val.y;
 					z = val.z;
-					w = val.w;
+					w = 0;
 					return *this;
 				}
 
-				inline core::vectorSIMDu32 getValue() const
+        hlsl::float32_t3 getValue() const
 				{
-					return core::vectorSIMDu32(x,y,z,w);
+					return { x, y, z };
 				}
-				
+
 			private:
 				uint16_t x;
 				uint16_t y;
@@ -377,11 +379,11 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl::
 		std::tuple<cache_type_t<Formats>...> cache;
 		
 		template<uint32_t dimensions, E_FORMAT CacheFormat>
-		value_type_t<CacheFormat> quantize(const core::vectorSIMDf& value)
+		value_type_t<CacheFormat> quantize(const hlsl::float32_t3& value)
 		{
-			const auto negativeMask = value < core::vectorSIMDf(0.0f);
+			const auto negativeMask = lessThan(value, hlsl::float32_t3(0.0f));
 
-			const core::vectorSIMDf absValue = abs(value);
+			const hlsl::float32_t3 absValue = abs(value);
 			const auto key = Key(absValue);
 
 			constexpr auto quantizationBits = quantization_bits_v<CacheFormat>;
@@ -393,32 +395,31 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl::
 					quantized = found->second;
 				else
 				{
-					const core::vectorSIMDf fit = findBestFit<dimensions,quantizationBits>(absValue);
+					const auto fit = findBestFit<dimensions,quantizationBits>(absValue);
 
-					quantized = core::vectorSIMDu32(core::abs(fit));
+					quantized = abs(fit);
 					insertIntoCache<CacheFormat>(key,quantized);
 				}
 			}
 
-			const core::vectorSIMDu32 xorflag((0x1u<<(quantizationBits+1u))-1u);
-			auto restoredAsVec = quantized.getValue()^core::mix(core::vectorSIMDu32(0u),xorflag,negativeMask);
-			restoredAsVec += core::mix(core::vectorSIMDu32(0u),core::vectorSIMDu32(1u),negativeMask);
-			return value_type_t<CacheFormat>(restoredAsVec&xorflag);
+			//return quantized.
+			const auto negativeMulVec = hlsl::float32_t3(negativeMask.x ? -1 : 1, negativeMask.y ? -1 : 1, negativeMask.z ? -1 : 1);
+      return value_type_t<CacheFormat>(negativeMulVec * quantized.getValue());
 		}
 
 		template<uint32_t dimensions, uint32_t quantizationBits>
-		static inline core::vectorSIMDf findBestFit(const core::vectorSIMDf& value)
+		static inline hlsl::float32_t3 findBestFit(const hlsl::float32_t3& value)
 		{
 			static_assert(dimensions>1u,"No point");
 			static_assert(dimensions<=4u,"High Dimensions are Hard!");
-			// precise normalize
-			const auto vectorForDots = value.preciseDivision(length(value));
+
+			const auto vectorForDots = hlsl::normalize(value);
 
 			//
-			core::vectorSIMDf fittingVector;
-			core::vectorSIMDf floorOffset;
+			hlsl::float32_t3 fittingVector;
+			hlsl::float32_t3 floorOffset;
 			constexpr uint32_t cornerCount = (0x1u<<(dimensions-1u))-1u;
-			core::vectorSIMDf corners[cornerCount] = {};
+			hlsl::float32_t3 corners[cornerCount] = {};
 			{
 				uint32_t maxDirCompIndex = 0u;
 				for (auto i=1u; i<dimensions; i++)
@@ -430,9 +431,9 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl::
 				if (maxDirectionComp < std::sqrtf(0.9998f / float(dimensions)))
 				{
 					_NBL_DEBUG_BREAK_IF(true);
-					return core::vectorSIMDf(0.f);
+					return hlsl::float32_t3(0.f);
 				}
-				fittingVector = value.preciseDivision(core::vectorSIMDf(maxDirectionComp));
+				fittingVector = value / maxDirectionComp;
 				floorOffset[maxDirCompIndex] = 0.499f;
 				const uint32_t localCorner[7][3] = {
 					{1,0,0},
@@ -452,12 +453,12 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl::
 				}
 			}
 
-			core::vectorSIMDf bestFit;
+			hlsl::float32_t3 bestFit;
 			float closestTo1 = -1.f;
-			auto evaluateFit = [&](const core::vectorSIMDf& newFit) -> void
+			auto evaluateFit = [&](const hlsl::float32_t3& newFit) -> void
 			{
-				auto newFitLen = core::length(newFit);
-				const float dp = core::dot<core::vectorSIMDf>(newFit,vectorForDots).preciseDivision(newFitLen)[0];
+				auto newFitLen = length(newFit);
+				const float dp = hlsl::dot(newFit,vectorForDots) / (newFitLen);
 				if (dp > closestTo1)
 				{
 					closestTo1 = dp;
@@ -466,18 +467,18 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl::
 			};
 
 			constexpr uint32_t cubeHalfSize = (0x1u << quantizationBits) - 1u;
-			const core::vectorSIMDf cubeHalfSizeND = core::vectorSIMDf(cubeHalfSize);
+			const hlsl::float32_t3 cubeHalfSizeND = hlsl::float32_t3(cubeHalfSize);
 			for (uint32_t n=cubeHalfSize; n>0u; n--)
 			{
 				//we'd use float addition in the interest of speed, to increment the loop
 				//but adding a small number to a large one loses precision, so multiplication preferrable
-				core::vectorSIMDf bottomFit = core::floor(fittingVector*float(n)+floorOffset);
-				if ((bottomFit<=cubeHalfSizeND).all())
+				const auto bottomFit = floor(fittingVector * float(n) + floorOffset);
+				if (hlsl::all(glm::lessThanEqual(bottomFit, cubeHalfSizeND)))
 					evaluateFit(bottomFit);
-				for (auto i=0u; i<cornerCount; i++)
+				for (auto i = 0u; i < cornerCount; i++)
 				{
 					auto bottomFitTmp = bottomFit+corners[i];
-					if ((bottomFitTmp<=cubeHalfSizeND).all())
+					if (hlsl::all(glm::lessThanEqual(bottomFitTmp, cubeHalfSizeND)))
 						evaluateFit(bottomFitTmp);
 				}
 			}
diff --git a/include/nbl/asset/utils/CQuantNormalCache.h b/include/nbl/asset/utils/CQuantNormalCache.h
index 92703d9d37..31b7d403d2 100644
--- a/include/nbl/asset/utils/CQuantNormalCache.h
+++ b/include/nbl/asset/utils/CQuantNormalCache.h
@@ -19,7 +19,7 @@ namespace impl
 
 struct VectorUV
 {
-	inline VectorUV(const core::vectorSIMDf& absNormal)
+	inline VectorUV(const hlsl::float32_t3& absNormal)
 	{
 		const float rcpManhattanNorm = 1.f / (absNormal.x + absNormal.y + absNormal.z);
 		u = absNormal.x * rcpManhattanNorm;
@@ -56,9 +56,8 @@ class CQuantNormalCache : public CDirQuantCacheBase<impl::VectorUV,impl::QuantNo
 
 	public:
 		template<E_FORMAT CacheFormat>
-		value_type_t<CacheFormat> quantize(core::vectorSIMDf normal)
+		value_type_t<CacheFormat> quantize(hlsl::float32_t3 normal)
 		{
-			normal.makeSafe3D();
 			return Base::quantize<3u,CacheFormat>(normal);
 		}
 };

From 83f39d31c869f84049fb58e839c014f3a857c03e Mon Sep 17 00:00:00 2001
From: kevyuu <kevin.kayu@gmail.com>
Date: Tue, 1 Jul 2025 18:55:29 +0700
Subject: [PATCH 09/40] Fix SBuferRange to SBufferBinding conversion
 requirement

---
 include/nbl/asset/IBuffer.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/nbl/asset/IBuffer.h b/include/nbl/asset/IBuffer.h
index 8c3b8f95ef..6f8c1bb35b 100644
--- a/include/nbl/asset/IBuffer.h
+++ b/include/nbl/asset/IBuffer.h
@@ -100,7 +100,7 @@ struct SBufferRange
 	inline operator SBufferRange<const BufferType>&() {return *reinterpret_cast<SBufferRange<const BufferType>*>(this);}
 	inline operator const SBufferRange<const BufferType>&() const {return *reinterpret_cast<const SBufferRange<const BufferType>*>(this);}
 
-	template<typename BT> requires std::is_same_v<std::remove_const_t<BT>,BufferType>
+	template<typename BT> requires (std::is_const_v<BT> && std::is_base_of_v<IBuffer,std::remove_const_t<BT>>)
 	inline operator SBufferBinding<BT>() const { return {.offset=offset,.buffer=buffer}; }
 
 	explicit inline operator bool() const {return isValid();}

From 983ace987435d1a9349d282c64600b93cb8eef8e Mon Sep 17 00:00:00 2001
From: kevyuu <kevin.kayu@gmail.com>
Date: Tue, 1 Jul 2025 21:25:46 +0700
Subject: [PATCH 10/40] createArrow multiple geometries

---
 include/nbl/asset/utils/CGeometryCreator.h |   2 +-
 src/nbl/asset/utils/CGeometryCreator.cpp   | 186 +++------------------
 2 files changed, 23 insertions(+), 165 deletions(-)

diff --git a/include/nbl/asset/utils/CGeometryCreator.h b/include/nbl/asset/utils/CGeometryCreator.h
index ca6fff1790..1852b0f033 100644
--- a/include/nbl/asset/utils/CGeometryCreator.h
+++ b/include/nbl/asset/utils/CGeometryCreator.h
@@ -58,7 +58,7 @@ class NBL_API2 CGeometryCreator final : public core::IReferenceCounted
 		\param colorCone color of the cone
 		\return Generated mesh.
 		*/
-		core::smart_refctd_ptr<ICPUPolygonGeometry> createArrow(const uint32_t tesselationCylinder = 4,
+		core::vector<core::smart_refctd_ptr<ICPUPolygonGeometry>> createArrow(const uint32_t tesselationCylinder = 4,
 				const uint32_t tesselationCone = 8, const float height = 1.f,
 				const float cylinderHeight = 0.6f, const float widthCylinder = 0.05f,
 				const float widthCone = 0.3f) const;
diff --git a/src/nbl/asset/utils/CGeometryCreator.cpp b/src/nbl/asset/utils/CGeometryCreator.cpp
index fcd31041c4..31ddbac88c 100644
--- a/src/nbl/asset/utils/CGeometryCreator.cpp
+++ b/src/nbl/asset/utils/CGeometryCreator.cpp
@@ -389,8 +389,7 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createSphere(float
 					static_cast<float>(cos(ay)),
 					static_cast<float>(sin(axz) * sinay));
 				// for spheres the normal is the position
-				core::vectorSIMDf normal(&pos.x);
-				normal.makeSafe3D();
+				const auto normal = pos;
 				const auto quantizedNormal = quantNormalCache->quantize<NormalCacheFormat>(normal);
 				pos *= radius;
 
@@ -399,9 +398,9 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createSphere(float
 				float tu = 0.5f;
 				//if (y==0)
 				//{
-				if (normal.Y != -1.0f && normal.Y != 1.0f)
-					tu = static_cast<float>(acos(core::clamp(normal.X / sinay, -1.0, 1.0)) * 0.5 * core::RECIPROCAL_PI<double>());
-				if (normal.Z < 0.0f)
+				if (normal.y != -1.0f && normal.y != 1.0f)
+					tu = static_cast<float>(acos(core::clamp(normal.x / sinay, -1.0, 1.0)) * 0.5 * core::RECIPROCAL_PI<double>());
+				if (normal.z < 0.0f)
 					tu = 1 - tu;
 				//}
 				//else
@@ -426,14 +425,14 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createSphere(float
 		// the vertex at the top of the sphere
 		positions[vertex_i] = { 0.f, radius, 0.f };
 		uvs[vertex_i] = { 0, 63};
-		const auto quantizedTopNormal = quantNormalCache->quantize<NormalCacheFormat>(core::vectorSIMDf(0.f, 1.f, 0.f));
+		const auto quantizedTopNormal = quantNormalCache->quantize<NormalCacheFormat>(hlsl::float32_t3(0.f, 1.f, 0.f));
 		memcpy(normals + vertex_i, &quantizedTopNormal, sizeof(quantizedTopNormal));
 
 		// the vertex at the bottom of the sphere
 		vertex_i++;
 		positions[vertex_i] = { 0.f, -radius, 0.f };
 		uvs[vertex_i] = { 63, 127};
-		const auto quantizedBottomNormal = quantNormalCache->quantize<NormalCacheFormat>(core::vectorSIMDf(0.f, -1.f, 0.f));
+		const auto quantizedBottomNormal = quantNormalCache->quantize<NormalCacheFormat>(hlsl::float32_t3(0.f, -1.f, 0.f));
 		memcpy(normals + vertex_i, &quantizedBottomNormal, sizeof(quantizedBottomNormal));
 	}
 
@@ -568,9 +567,9 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCylinder(
 	for (uint32_t i = 0u; i < tesselation; ++i)
 	{
 		const auto f_i = static_cast<float>(i);
-		core::vectorSIMDf p(std::cos(f_i * step), std::sin(f_i * step), 0.f);
+		hlsl::float32_t3 p(std::cos(f_i * step), std::sin(f_i * step), 0.f);
 		p *= radius;
-		const auto n = quantNormalCache->quantize<NormalCacheFormat>(core::normalize(p));
+		const auto n = quantNormalCache->quantize<NormalCacheFormat>(hlsl::normalize(p));
 
 		positions[i] = { p.x, p.y, p.z };
 		memcpy(normals + i, &n, sizeof(n));
@@ -686,38 +685,38 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCone(
 
 	const float step = (2.f*core::PI<float>()) / tesselation;
 
-	const core::vectorSIMDf apexVertexCoords(oblique, length, 0.0f);
+	const hlsl::float32_t3 apexVertexCoords(oblique, length, 0.0f);
 
 	const auto apexVertexBase_i = tesselation;
 
 	for (uint32_t i = 0u; i < tesselation; i++)
 	{
-		core::vectorSIMDf v(std::cos(i * step), 0.0f, std::sin(i * step), 0.0f);
+		hlsl::float32_t3 v(std::cos(i * step), 0.0f, std::sin(i * step));
 		v *= radius;
 
 		positions[i] = { v.x, v.y, v.z };
 		positions[apexVertexBase_i + i] = { apexVertexCoords.x, apexVertexCoords.y, apexVertexCoords.z };
 
-		const auto simdPosition = core::vectorSIMDf(positions[i].x, positions[i].y, positions[i].z);
-		const core::vectorSIMDf v0ToApex = apexVertexCoords - simdPosition;
+		const auto simdPosition = hlsl::float32_t3(positions[i].x, positions[i].y, positions[i].z);
+		const hlsl::float32_t3 v0ToApex = apexVertexCoords - simdPosition;
 
 		uint32_t nextVertexIndex = i == (tesselation - 1) ? 0 : i + 1;
-		core::vectorSIMDf u1 = core::vectorSIMDf(positions[nextVertexIndex].x, positions[nextVertexIndex].y, positions[nextVertexIndex].z);
+		hlsl::float32_t3 u1 = hlsl::float32_t3(positions[nextVertexIndex].x, positions[nextVertexIndex].y, positions[nextVertexIndex].z);
 		u1 -= simdPosition;
-		float angleWeight = std::acos(core::dot(core::normalize(apexVertexCoords), core::normalize(u1)).x);
-		u1 = core::normalize(core::cross(v0ToApex, u1)) * angleWeight;
+		float angleWeight = std::acos(hlsl::dot(hlsl::normalize(apexVertexCoords), hlsl::normalize(u1)));
+		u1 = hlsl::normalize(hlsl::cross(v0ToApex, u1)) * angleWeight;
 
 		uint32_t prevVertexIndex = i == 0 ? (tesselation - 1) : i - 1;
-		core::vectorSIMDf u2 = core::vectorSIMDf(positions[prevVertexIndex].x, positions[prevVertexIndex].y, positions[prevVertexIndex].z);
+		hlsl::float32_t3 u2 = hlsl::float32_t3(positions[prevVertexIndex].x, positions[prevVertexIndex].y, positions[prevVertexIndex].z);
 		u2 -= simdPosition;
-		angleWeight = std::acos(core::dot(core::normalize(apexVertexCoords), core::normalize(u2)).x);
-		u2 = core::normalize(core::cross(u2, v0ToApex)) * angleWeight;
+		angleWeight = std::acos(hlsl::dot(hlsl::normalize(apexVertexCoords), hlsl::normalize(u2)));
+		u2 = hlsl::normalize(hlsl::cross(u2, v0ToApex)) * angleWeight;
 
 
-		const auto baseNormal = quantNormalCache->quantize<NormalCacheFormat>(core::normalize(u1 + u2));
+		const auto baseNormal = quantNormalCache->quantize<NormalCacheFormat>(hlsl::normalize(u1 + u2));
 		memcpy(normals + i, &baseNormal, sizeof(baseNormal));
 
-		const auto apexNormal = quantNormalCache->quantize<NormalCacheFormat>(core::normalize(u1));
+		const auto apexNormal = quantNormalCache->quantize<NormalCacheFormat>(hlsl::normalize(u1));
 		memcpy(normals + apexVertexBase_i + i, &apexNormal, sizeof(apexNormal));
 	}
 
@@ -725,7 +724,7 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCone(
 	return retval;
 }
 
-core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createArrow(
+core::vector<core::smart_refctd_ptr<ICPUPolygonGeometry>> CGeometryCreator::createArrow(
 	const uint32_t tesselationCylinder,
 	const uint32_t tesselationCone,
 	const float height,
@@ -737,137 +736,13 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createArrow(
 	assert(height > cylinderHeight);
 
 	using position_t = hlsl::float32_t3;
-	using normal_t = hlsl::vector<uint8_t, 4>;
-	using uv_t = hlsl::vector<uint8_t, 2>;
 
 	auto cylinder = createCylinder(width0, cylinderHeight, tesselationCylinder);
 	auto cone = createCone(width1, height-cylinderHeight, tesselationCone);
 
-	auto cylinderPositions = reinterpret_cast<position_t*>(cylinder->getPositionView().src.buffer->getPointer());
 	auto conePositions = reinterpret_cast<position_t*>(cone->getPositionView().src.buffer->getPointer());
 
-	const auto cylinderNormals = reinterpret_cast<normal_t*>(cylinder->getNormalView().src.buffer->getPointer());
-	const auto coneNormals = reinterpret_cast<normal_t*>(cone->getNormalView().src.buffer->getPointer());
-
-	const auto cylinderUvs = reinterpret_cast<uv_t*>(cylinder->getAuxAttributeViews()->front().src.buffer->getPointer());
-	const auto coneUvs = reinterpret_cast<uv_t*>(cone->getAuxAttributeViews()->front().src.buffer->getPointer());
-
-	const auto cylinderIndices = cylinder->getIndexView().src.buffer->getPointer();
-	const auto coneIndices = cone->getIndexView().src.buffer->getPointer();
-
-	const auto cylinderVertexCount = cylinder->getPositionView().getElementCount();
 	const auto coneVertexCount = cone->getPositionView().getElementCount();
-	const auto newArrowVertexCount = cylinderVertexCount + coneVertexCount;
-
-	const auto cylinderIndexCount = cylinder->getVertexReferenceCount();
-	const auto coneIndexCount = cone->getVertexReferenceCount();
-	const auto newArrowIndexCount = cylinderIndexCount + coneIndexCount;
-
-	using namespace hlsl;
-
-	auto retval = core::make_smart_refctd_ptr<ICPUPolygonGeometry>();
-	retval->setIndexing(IPolygonGeometryBase::TriangleList());
-
-	// Create indices
-	using index_t = uint16_t;
-	{
-		const auto bytesize = sizeof(index_t) * newArrowIndexCount;
-		auto indices = ICPUBuffer::create({bytesize,IBuffer::EUF_INDEX_BUFFER_BIT});
-		auto arrowIndices = reinterpret_cast<uint16_t*>(indices->getPointer());
-		auto newConeIndices = (arrowIndices + cylinderIndexCount);
-
-		memcpy(arrowIndices, cylinderIndices, sizeof(uint16_t) * cylinderIndexCount);
-		memcpy(newConeIndices, coneIndices, sizeof(uint16_t) * coneIndexCount);
-
-		for (auto i = 0ull; i < coneIndexCount; ++i)
-			*(newConeIndices + i) += cylinderVertexCount;
-
-		shapes::AABB<4,index_t> aabb;
-		aabb.minVx[0] = 0;
-		aabb.maxVx[0] = newArrowVertexCount - 1;
-		retval->setIndexView({
-			.composed = {
-				.encodedDataRange = {.u16=aabb},
-				.stride = sizeof(index_t),
-				.format = EF_R16_UINT,
-				.rangeFormat = IGeometryBase::EAABBFormat::U16
-			},
-			.src = {.offset=0,.size=bytesize,.buffer=std::move(indices)}
-		});
-	}
-
-	constexpr auto NormalFormat = EF_R8G8B8A8_SNORM;
-
-	// Create vertex attributes with NONE usage because we have no clue how they'll be used
-	hlsl::float32_t3* positions;
-	hlsl::vector<uint8_t, 4>* normals;
-	hlsl::vector<uint8_t, 2>* uvs;
-	{
-		{
-			constexpr auto AttrSize = sizeof(decltype(*positions));
-			auto buff = ICPUBuffer::create({{AttrSize * newArrowVertexCount,IBuffer::EUF_NONE}});
-			positions = reinterpret_cast<decltype(positions)>(buff->getPointer());
-			shapes::AABB<4, float32_t> aabb;
-			//TODO(kevyuu): Calculate arrow aabb
-			aabb.maxVx = hlsl::vector<float32_t,4>(127,127,127,0);
-			aabb.minVx = -aabb.maxVx;
-			retval->setPositionView({
-				.composed = {
-					.encodedDataRange = {.f32 = aabb},
-					.stride = AttrSize,
-					.format = EF_R32G32B32_SFLOAT,
-					.rangeFormat = IGeometryBase::EAABBFormat::F32
-				},
-				.src = {
-					.offset=0,
-					.size = buff->getSize(),
-					.buffer = std::move(buff),
-				}
-			});
-		}
-		{
-			constexpr auto AttrSize = sizeof(decltype(*normals));
-			auto buff = ICPUBuffer::create({{AttrSize * newArrowVertexCount,IBuffer::EUF_NONE}});
-			normals = reinterpret_cast<decltype(normals)>(buff->getPointer());
-			shapes::AABB<4, int8_t> aabb;
-			aabb.maxVx = hlsl::vector<int8_t,4>(127,127,127,0);
-			aabb.minVx = -aabb.maxVx;
-			retval->setNormalView({
-				.composed = {
-					.encodedDataRange = {.s8=aabb},
-					.stride = AttrSize,
-					.format = NormalFormat,
-					.rangeFormat = IGeometryBase::EAABBFormat::S8_NORM
-				},
-				.src = {
-					.offset = 0,
-					.size = buff->getSize(),
-					.buffer = std::move(buff)
-				}
-			});
-		}
-		{
-			constexpr auto AttrSize = sizeof(decltype(*uvs));
-			auto buff = ICPUBuffer::create({{AttrSize * newArrowVertexCount,IBuffer::EUF_NONE}});
-			uvs = reinterpret_cast<decltype(uvs)>(buff->getPointer());
-			shapes::AABB<4, uint8_t> aabb;
-			aabb.minVx = hlsl::vector<uint8_t, 4>(0,0,0,0);
-			aabb.maxVx = hlsl::vector<uint8_t, 4>(255,255,0,0);
-			retval->getAuxAttributeViews()->push_back({
-				.composed = {
-					.encodedDataRange = {.u8=aabb},
-					.stride = AttrSize,
-					.format = EF_R8G8_UNORM,
-					.rangeFormat = IGeometryBase::EAABBFormat::U8_NORM
-				},
-				.src = {
-					.offset = 0,
-					.size = buff->getSize(),
-					.buffer = std::move(buff),
-				}
-			});
-		}
-	}
 	
 	for (auto i = 0ull; i < coneVertexCount; ++i)
 	{
@@ -878,25 +753,8 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createArrow(
 		conePosition = {newPos.x, newPos.y, newPos.z};
 	}
 
-	for (auto z = 0ull; z < newArrowVertexCount; ++z)
-	{
-		if (z < cylinderVertexCount)
-		{
-			positions[z] = cylinderPositions[z];
-			normals[z] = cylinderNormals[z];
-			uvs[z] = cylinderUvs[z];
-		}
-		else
-		{
-			const auto cone_i = z - cylinderVertexCount;
-			positions[z] = conePositions[cone_i];
-			normals[z] = coneNormals[cone_i];
-			uvs[z] = { 0, 0 };
-		}
-	}
+	return {cylinder, cone};
 
-	CPolygonGeometryManipulator::recomputeContentHashes(retval.get());
-	return retval;
 }
 
 core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createRectangle(const hlsl::float32_t2 size) const

From 090dae2ac53cb2122fdf33cdc51962053a89ac39 Mon Sep 17 00:00:00 2001
From: kevyuu <kevin.kayu@gmail.com>
Date: Wed, 2 Jul 2025 15:42:07 +0700
Subject: [PATCH 11/40] DRY findLSB

---
 include/nbl/asset/ECommonEnums.h | 287 +------------------------------
 src/nbl/asset/ECommonEnums.cpp   | 281 ++++++++++++++++++++++++++++++
 2 files changed, 285 insertions(+), 283 deletions(-)
 create mode 100644 src/nbl/asset/ECommonEnums.cpp

diff --git a/include/nbl/asset/ECommonEnums.h b/include/nbl/asset/ECommonEnums.h
index c07a0ced6a..f830b270d3 100644
--- a/include/nbl/asset/ECommonEnums.h
+++ b/include/nbl/asset/ECommonEnums.h
@@ -185,292 +185,13 @@ struct SMemoryBarrier
     }
 };
 
-inline core::bitflag<PIPELINE_STAGE_FLAGS> allPreviousStages(core::bitflag<PIPELINE_STAGE_FLAGS> stages)
-{
-    struct PerStagePreviousStages
-    {
-        public:
-            constexpr PerStagePreviousStages()
-            {
-                // set all stage to have itself as their previous stages
-                for (auto i = 0; i < std::numeric_limits<PIPELINE_STAGE_FLAGS>::digits; i++)
-                  data[i] = static_cast<PIPELINE_STAGE_FLAGS>(i);
-
-                add(PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT);
-
-                add(PIPELINE_STAGE_FLAGS::RAY_TRACING_SHADER_BIT, PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT);
-
-                // graphics primitive pipeline
-                PIPELINE_STAGE_FLAGS primitivePrevStage = PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT;
-                for (auto pipelineStage : {PIPELINE_STAGE_FLAGS::INDEX_INPUT_BIT, PIPELINE_STAGE_FLAGS::VERTEX_ATTRIBUTE_INPUT_BIT, PIPELINE_STAGE_FLAGS::VERTEX_SHADER_BIT, PIPELINE_STAGE_FLAGS::TESSELLATION_CONTROL_SHADER_BIT, PIPELINE_STAGE_FLAGS::TESSELLATION_EVALUATION_SHADER_BIT, PIPELINE_STAGE_FLAGS::GEOMETRY_SHADER_BIT, PIPELINE_STAGE_FLAGS::SHADING_RATE_ATTACHMENT_BIT, PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT, PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, PIPELINE_STAGE_FLAGS::LATE_FRAGMENT_TESTS_BIT, PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT})
-                {
-                    if (pipelineStage == PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT)
-                      primitivePrevStage |= PIPELINE_STAGE_FLAGS::FRAGMENT_DENSITY_PROCESS_BIT;
-                    add(pipelineStage, primitivePrevStage);
-                    primitivePrevStage |= pipelineStage;
-                }
-
-
-            }
-            constexpr const auto& operator[](const size_t ix) const {return data[ix];}
-
-        private:
-            constexpr static uint8_t findLSB(size_t val)
-            {
-                for (size_t ix=0ull; ix<sizeof(size_t)*8; ix++)
-                if ((0x1ull<<ix)&val)
-                    return ix;
-                return ~0u;
-            }
-            constexpr void add(PIPELINE_STAGE_FLAGS stageFlag, PIPELINE_STAGE_FLAGS previousStageFlags)
-            {
-                const auto bitIx = findLSB(static_cast<size_t>(stageFlag));
-                data[bitIx] |= previousStageFlags;
-            }
-
-            PIPELINE_STAGE_FLAGS data[std::numeric_limits<std::underlying_type_t<PIPELINE_STAGE_FLAGS>>::digits] = {};
-    };
-
-    constexpr PerStagePreviousStages bitToAccess = {};
-
-    core::bitflag<PIPELINE_STAGE_FLAGS> retval = PIPELINE_STAGE_FLAGS::NONE;
-    while (bool(stages.value))
-    {
-        const auto bitIx = hlsl::findLSB(stages);
-        retval |= bitToAccess[bitIx];
-        stages ^= static_cast<PIPELINE_STAGE_FLAGS>(0x1u<<bitIx);
-    }
-
-    return retval;
-}
-
-inline core::bitflag<PIPELINE_STAGE_FLAGS> allLaterStages(core::bitflag<PIPELINE_STAGE_FLAGS> stages)
-{
-    struct PerStageLaterStages
-    {
-        public:
-            constexpr PerStageLaterStages()
-            {
-                // set all stage to have itself as their next stages
-                for (auto i = 0; i < std::numeric_limits<PIPELINE_STAGE_FLAGS>::digits; i++)
-                  data[i] = static_cast<PIPELINE_STAGE_FLAGS>(i);
-
-                add(PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT, PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT);
-                add(PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT, PIPELINE_STAGE_FLAGS::RAY_TRACING_SHADER_BIT);
-
-                // graphics primitive pipeline
-                PIPELINE_STAGE_FLAGS laterStage = PIPELINE_STAGE_FLAGS::NONE;
-                const auto graphicsPrimitivePipelineOrders = std::array{ PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT, PIPELINE_STAGE_FLAGS::INDEX_INPUT_BIT, PIPELINE_STAGE_FLAGS::VERTEX_ATTRIBUTE_INPUT_BIT, PIPELINE_STAGE_FLAGS::VERTEX_SHADER_BIT, PIPELINE_STAGE_FLAGS::TESSELLATION_CONTROL_SHADER_BIT, PIPELINE_STAGE_FLAGS::TESSELLATION_EVALUATION_SHADER_BIT, PIPELINE_STAGE_FLAGS::GEOMETRY_SHADER_BIT, PIPELINE_STAGE_FLAGS::SHADING_RATE_ATTACHMENT_BIT, PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT, PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, PIPELINE_STAGE_FLAGS::LATE_FRAGMENT_TESTS_BIT, PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT };
-                for (auto iter = graphicsPrimitivePipelineOrders.rbegin(); iter < graphicsPrimitivePipelineOrders.rend(); iter++)
-                {
-                    const auto pipelineStage = *iter;
-                    add(pipelineStage, laterStage);
-                    laterStage |= pipelineStage;
-                }
-
-                add(PIPELINE_STAGE_FLAGS::FRAGMENT_DENSITY_PROCESS_BIT, PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT);
-            }
-            constexpr const auto& operator[](const size_t ix) const {return data[ix];}
-
-        private:
-            constexpr static uint8_t findLSB(size_t val)
-            {
-                for (size_t ix=0ull; ix<sizeof(size_t)*8; ix++)
-                if ((0x1ull<<ix)&val)
-                    return ix;
-                return ~0u;
-            }
-            constexpr void add(PIPELINE_STAGE_FLAGS stageFlag, PIPELINE_STAGE_FLAGS laterStageFlags)
-            {
-                const auto bitIx = findLSB(static_cast<size_t>(stageFlag));
-                data[bitIx] |= laterStageFlags;
-            }
-
-            PIPELINE_STAGE_FLAGS data[std::numeric_limits<std::underlying_type_t<PIPELINE_STAGE_FLAGS>>::digits] = {};
-    };
-
-    constexpr PerStageLaterStages bitToAccess = {};
-
-    core::bitflag<PIPELINE_STAGE_FLAGS> retval = PIPELINE_STAGE_FLAGS::NONE;
-    while (bool(stages.value))
-    {
-        const auto bitIx = hlsl::findLSB(stages);
-        retval |= bitToAccess[bitIx];
-        stages ^= static_cast<PIPELINE_STAGE_FLAGS>(0x1u<<bitIx);
-    }
-
-    return retval;
-}
-
-inline core::bitflag<ACCESS_FLAGS> allAccessesFromStages(core::bitflag<PIPELINE_STAGE_FLAGS> stages)
-{
-    struct PerStageAccesses
-    {
-        public:
-            constexpr PerStageAccesses()
-            {
-                init(PIPELINE_STAGE_FLAGS::HOST_BIT,ACCESS_FLAGS::HOST_READ_BIT|ACCESS_FLAGS::HOST_WRITE_BIT);
-
-                constexpr auto TransferRW = ACCESS_FLAGS::TRANSFER_READ_BIT|ACCESS_FLAGS::TRANSFER_WRITE_BIT;
-                init(PIPELINE_STAGE_FLAGS::COPY_BIT,TransferRW);
-                init(PIPELINE_STAGE_FLAGS::CLEAR_BIT,ACCESS_FLAGS::TRANSFER_WRITE_BIT);
-
-                constexpr auto MicromapRead = ACCESS_FLAGS::SHADER_READ_BITS;//|ACCESS_FLAGS::MICROMAP_READ_BIT;
-//                init(PIPELINE_STAGE_FLAGS::MICROMAP_BUILD_BIT,MicromapRead|ACCESS_FLAGS::MICROMAP_WRITE_BIT); // can micromaps be built indirectly?
-                
-                constexpr auto AccelerationStructureRW = ACCESS_FLAGS::ACCELERATION_STRUCTURE_READ_BIT|ACCESS_FLAGS::ACCELERATION_STRUCTURE_WRITE_BIT;
-                init(PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_COPY_BIT,TransferRW|AccelerationStructureRW);
-                init(PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_BUILD_BIT,ACCESS_FLAGS::INDIRECT_COMMAND_READ_BIT|MicromapRead|AccelerationStructureRW);
-
-                init(PIPELINE_STAGE_FLAGS::COMMAND_PREPROCESS_BIT,ACCESS_FLAGS::COMMAND_PREPROCESS_READ_BIT|ACCESS_FLAGS::COMMAND_PREPROCESS_WRITE_BIT);
-                init(PIPELINE_STAGE_FLAGS::CONDITIONAL_RENDERING_BIT,ACCESS_FLAGS::CONDITIONAL_RENDERING_READ_BIT);
-                init(PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT,ACCESS_FLAGS::INDIRECT_COMMAND_READ_BIT);
-
-                constexpr auto ShaderRW = ACCESS_FLAGS::SHADER_READ_BITS|ACCESS_FLAGS::SHADER_WRITE_BITS;
-                constexpr auto AllShaderStagesRW = ShaderRW^(ACCESS_FLAGS::INPUT_ATTACHMENT_READ_BIT|ACCESS_FLAGS::SHADER_BINDING_TABLE_READ_BIT);
-                init(PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT,AllShaderStagesRW);
-                init(PIPELINE_STAGE_FLAGS::INDEX_INPUT_BIT,ACCESS_FLAGS::INDEX_READ_BIT);
-                init(PIPELINE_STAGE_FLAGS::VERTEX_ATTRIBUTE_INPUT_BIT,ACCESS_FLAGS::VERTEX_ATTRIBUTE_READ_BIT);
-                init(PIPELINE_STAGE_FLAGS::VERTEX_SHADER_BIT,AllShaderStagesRW);
-                init(PIPELINE_STAGE_FLAGS::TESSELLATION_CONTROL_SHADER_BIT,AllShaderStagesRW);
-                init(PIPELINE_STAGE_FLAGS::TESSELLATION_EVALUATION_SHADER_BIT,AllShaderStagesRW);
-                init(PIPELINE_STAGE_FLAGS::GEOMETRY_SHADER_BIT,AllShaderStagesRW);
-//                init(PIPELINE_STAGE_FLAGS::TASK_SHADER_BIT,AllShaderStagesRW);
-//                init(PIPELINE_STAGE_FLAGS::MESH_SHADER_BIT,AllShaderStagesRW);
-                init(PIPELINE_STAGE_FLAGS::FRAGMENT_DENSITY_PROCESS_BIT,ACCESS_FLAGS::FRAGMENT_DENSITY_MAP_READ_BIT);
-                init(PIPELINE_STAGE_FLAGS::SHADING_RATE_ATTACHMENT_BIT,ACCESS_FLAGS::SHADING_RATE_ATTACHMENT_READ_BIT);
-                constexpr auto DepthStencilRW = ACCESS_FLAGS::DEPTH_STENCIL_ATTACHMENT_READ_BIT|ACCESS_FLAGS::DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
-                init(PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT,DepthStencilRW);
-                init(PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT,AllShaderStagesRW|ACCESS_FLAGS::INPUT_ATTACHMENT_READ_BIT);
-                init(PIPELINE_STAGE_FLAGS::LATE_FRAGMENT_TESTS_BIT,DepthStencilRW);
-                init(PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT,ACCESS_FLAGS::COLOR_ATTACHMENT_READ_BIT|ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT);
-
-                init(PIPELINE_STAGE_FLAGS::RAY_TRACING_SHADER_BIT,AllShaderStagesRW|ACCESS_FLAGS::SHADER_BINDING_TABLE_READ_BIT);
-
-                init(PIPELINE_STAGE_FLAGS::RESOLVE_BIT,TransferRW);
-                init(PIPELINE_STAGE_FLAGS::BLIT_BIT,TransferRW);
-
-//                init(PIPELINE_STAGE_FLAGS::VIDEO_DECODE_BIT,ACCESS_FLAGS::VIDEO_DECODE_READ_BIT|ACCESS_FLAGS::VIDEO_DECODE_WRITE_BIT);
-//                init(PIPELINE_STAGE_FLAGS::VIDEO_ENCODE_BIT,ACCESS_FLAGS::VIDEO_ENCODE_READ_BIT|ACCESS_FLAGS::VIDEO_ENCODE_WRITE_BIT);
-//                init(PIPELINE_STAGE_FLAGS::OPTICAL_FLOW_BIT,ACCESS_FLAGS::OPTICAL_FLOW_READ_BIT|ACCESS_FLAGS::OPTICAL_FLOW_WRITE_BIT);
-            }
-            constexpr const auto& operator[](const size_t ix) const {return data[ix];}
+core::bitflag<PIPELINE_STAGE_FLAGS> allPreviousStages(core::bitflag<PIPELINE_STAGE_FLAGS> stages);
 
-        private:
-            constexpr static uint8_t findLSB(size_t val)
-            {
-                for (size_t ix=0ull; ix<sizeof(size_t)*8; ix++)
-                if ((0x1ull<<ix)&val)
-                    return ix;
-                return ~0u;
-            }
-            constexpr void init(PIPELINE_STAGE_FLAGS stageFlag, ACCESS_FLAGS accessFlags)
-            {
-                const auto bitIx = findLSB(static_cast<size_t>(stageFlag));
-                data[bitIx] = accessFlags;
-            }
+core::bitflag<PIPELINE_STAGE_FLAGS> allLaterStages(core::bitflag<PIPELINE_STAGE_FLAGS> stages);
 
-            ACCESS_FLAGS data[32] = {};
-    };
-    constexpr PerStageAccesses bitToAccess = {};
+core::bitflag<ACCESS_FLAGS> allAccessesFromStages(core::bitflag<PIPELINE_STAGE_FLAGS> stages);
 
-    // TODO: add logically later or previous stages to make sure all other accesses remain valid
-    // or ideally expand the stages before calling `allAccessesFromStages` (TODO: add a `allLaterStages` and `allPreviouStages` basically)
-
-    core::bitflag<ACCESS_FLAGS> retval = ACCESS_FLAGS::NONE;
-    while (bool(stages.value))
-    {
-        const auto bitIx = hlsl::findLSB(stages);
-        retval |= bitToAccess[bitIx];
-        stages ^= static_cast<PIPELINE_STAGE_FLAGS>(0x1u<<bitIx);
-    }
-
-    return retval;
-}
-
-inline core::bitflag<PIPELINE_STAGE_FLAGS> allStagesFromAccesses(core::bitflag<ACCESS_FLAGS> accesses)
-{
-    struct PerAccessStages
-    {
-        public:
-            constexpr PerAccessStages()
-            {
-                init(ACCESS_FLAGS::HOST_READ_BIT,PIPELINE_STAGE_FLAGS::HOST_BIT);
-                init(ACCESS_FLAGS::HOST_WRITE_BIT,PIPELINE_STAGE_FLAGS::HOST_BIT);
-
-                init(ACCESS_FLAGS::TRANSFER_READ_BIT,PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS^PIPELINE_STAGE_FLAGS::CLEAR_BIT);
-                init(ACCESS_FLAGS::TRANSFER_WRITE_BIT,PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS);
-
-                constexpr auto MicromapAccelerationStructureBuilds = PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_BUILD_BIT;//|PIPELINE_STAGE_FLAGS::MICROMAP_BUILD_BIT;
-//                init(ACCESS_FLAGS::MICROMAP_READ_BIT,MicromapAccelerationStructureBuilds);
-//                init(ACCESS_FLAGS::MICROMAP_WRITE_BIT,PIPELINE_STAGE_FLAGS::MICROMAP_BUILD_BIT);
-                
-                constexpr auto AllShaders = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT|PIPELINE_STAGE_FLAGS::PRE_RASTERIZATION_SHADERS_BITS|PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT|PIPELINE_STAGE_FLAGS::RAY_TRACING_SHADER_BIT;
-                constexpr auto AccelerationStructureOperations = PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_COPY_BIT|PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_BUILD_BIT;
-                init(ACCESS_FLAGS::ACCELERATION_STRUCTURE_READ_BIT,AccelerationStructureOperations|AllShaders);
-                init(ACCESS_FLAGS::ACCELERATION_STRUCTURE_WRITE_BIT,AccelerationStructureOperations);
-
-                init(ACCESS_FLAGS::COMMAND_PREPROCESS_READ_BIT,PIPELINE_STAGE_FLAGS::COMMAND_PREPROCESS_BIT);
-                init(ACCESS_FLAGS::COMMAND_PREPROCESS_WRITE_BIT,PIPELINE_STAGE_FLAGS::COMMAND_PREPROCESS_BIT);
-                init(ACCESS_FLAGS::CONDITIONAL_RENDERING_READ_BIT,PIPELINE_STAGE_FLAGS::CONDITIONAL_RENDERING_BIT);
-                init(ACCESS_FLAGS::INDIRECT_COMMAND_READ_BIT,PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_BUILD_BIT|PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT);
-
-                init(ACCESS_FLAGS::UNIFORM_READ_BIT,AllShaders);
-                init(ACCESS_FLAGS::SAMPLED_READ_BIT,AllShaders);//|PIPELINE_STAGE_FLAGS::MICROMAP_BUILD_BIT);
-                init(ACCESS_FLAGS::STORAGE_READ_BIT,AllShaders|MicromapAccelerationStructureBuilds);
-                init(ACCESS_FLAGS::STORAGE_WRITE_BIT,AllShaders);
-
-                init(ACCESS_FLAGS::INDEX_READ_BIT,PIPELINE_STAGE_FLAGS::INDEX_INPUT_BIT);
-                init(ACCESS_FLAGS::VERTEX_ATTRIBUTE_READ_BIT,PIPELINE_STAGE_FLAGS::VERTEX_ATTRIBUTE_INPUT_BIT);
-
-                init(ACCESS_FLAGS::FRAGMENT_DENSITY_MAP_READ_BIT,PIPELINE_STAGE_FLAGS::FRAGMENT_DENSITY_PROCESS_BIT);
-                init(ACCESS_FLAGS::SHADING_RATE_ATTACHMENT_READ_BIT,PIPELINE_STAGE_FLAGS::SHADING_RATE_ATTACHMENT_BIT);
-                constexpr auto FragmentTests = PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT|PIPELINE_STAGE_FLAGS::LATE_FRAGMENT_TESTS_BIT;
-                init(ACCESS_FLAGS::DEPTH_STENCIL_ATTACHMENT_READ_BIT,FragmentTests);
-                init(ACCESS_FLAGS::DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,FragmentTests);
-                init(ACCESS_FLAGS::INPUT_ATTACHMENT_READ_BIT,PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT);
-                init(ACCESS_FLAGS::COLOR_ATTACHMENT_READ_BIT,PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT);
-                init(ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT,PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT);
-
-                init(ACCESS_FLAGS::SHADER_BINDING_TABLE_READ_BIT,PIPELINE_STAGE_FLAGS::RAY_TRACING_SHADER_BIT);
-
-//                init(ACCESS_FLAGS::VIDEO_DECODE_READ_BIT,PIPELINE_STAGE_FLAGS::VIDEO_DECODE_BIT);
-//                init(ACCESS_FLAGS::VIDEO_DECODE_WRITE_BIT,PIPELINE_STAGE_FLAGS::VIDEO_DECODE_BIT);
-//                init(ACCESS_FLAGS::VIDEO_ENCODE_READ_BIT,PIPELINE_STAGE_FLAGS::VIDEO_ENCODE_BIT);
-//                init(ACCESS_FLAGS::VIDEO_ENCODE_WRITE_BIT,PIPELINE_STAGE_FLAGS::VIDEO_ENCODE_BIT);
-//                init(ACCESS_FLAGS::OPTICAL_FLOW_READ_BIT,PIPELINE_STAGE_FLAGS::OPTICAL_FLOW_BIT);
-//                init(ACCESS_FLAGS::OPTICAL_FLOW_WRITE_BIT,PIPELINE_STAGE_FLAGS::OPTICAL_FLOW_BIT);
-            }
-            constexpr const auto& operator[](const size_t ix) const {return data[ix];}
-
-        private:
-            constexpr static uint8_t findLSB(size_t val)
-            {
-                for (size_t ix=0ull; ix<sizeof(size_t)*8; ix++)
-                if ((0x1ull<<ix)&val)
-                    return ix;
-                return ~0u;
-            }
-            constexpr void init(ACCESS_FLAGS accessFlags, PIPELINE_STAGE_FLAGS stageFlags)
-            {
-                const auto bitIx = findLSB(static_cast<size_t>(accessFlags));
-                data[bitIx] = stageFlags;
-            }
-
-            PIPELINE_STAGE_FLAGS data[32] = {};
-    };
-    constexpr PerAccessStages bitToStage = {};
-
-    core::bitflag<PIPELINE_STAGE_FLAGS> retval = PIPELINE_STAGE_FLAGS::NONE;
-    while (bool(accesses.value))
-    {
-        const auto bitIx = hlsl::findLSB(accesses);
-        retval |= bitToStage[bitIx];
-        accesses ^= static_cast<ACCESS_FLAGS>(0x1u<<bitIx);
-    }
-
-    return retval;
-}
+core::bitflag<PIPELINE_STAGE_FLAGS> allStagesFromAccesses(core::bitflag<ACCESS_FLAGS> accesses);
 
 }
 
diff --git a/src/nbl/asset/ECommonEnums.cpp b/src/nbl/asset/ECommonEnums.cpp
new file mode 100644
index 0000000000..0f23b9b3fc
--- /dev/null
+++ b/src/nbl/asset/ECommonEnums.cpp
@@ -0,0 +1,281 @@
+#include "nbl/asset/ECommonEnums.h"
+
+namespace nbl::asset
+{
+
+constexpr static int32_t findLSB(size_t val)
+{
+	if constexpr(std::is_constant_evaluated())
+	{
+		for (size_t ix=0ull; ix<sizeof(size_t)*8; ix++)
+			if ((0x1ull << ix) & val) return ix;
+		return ~0u;
+	} else
+	{
+		return hlsl::findLSB(val);
+	}
+}
+
+core::bitflag<PIPELINE_STAGE_FLAGS> allPreviousStages(core::bitflag<PIPELINE_STAGE_FLAGS> stages)
+{
+	struct PerStagePreviousStages
+	{
+		public:
+			constexpr PerStagePreviousStages()
+			{
+				// set all stage to have itself as their previous stages
+				for (auto i = 0; i < std::numeric_limits<PIPELINE_STAGE_FLAGS>::digits; i++)
+					data[i] = static_cast<PIPELINE_STAGE_FLAGS>(i);
+
+				add(PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT);
+
+				add(PIPELINE_STAGE_FLAGS::RAY_TRACING_SHADER_BIT, PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT);
+
+				// graphics primitive pipeline
+				PIPELINE_STAGE_FLAGS primitivePrevStage = PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT;
+				for (auto pipelineStage : {PIPELINE_STAGE_FLAGS::INDEX_INPUT_BIT, PIPELINE_STAGE_FLAGS::VERTEX_ATTRIBUTE_INPUT_BIT, PIPELINE_STAGE_FLAGS::VERTEX_SHADER_BIT, PIPELINE_STAGE_FLAGS::TESSELLATION_CONTROL_SHADER_BIT, PIPELINE_STAGE_FLAGS::TESSELLATION_EVALUATION_SHADER_BIT, PIPELINE_STAGE_FLAGS::GEOMETRY_SHADER_BIT, PIPELINE_STAGE_FLAGS::SHADING_RATE_ATTACHMENT_BIT, PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT, PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, PIPELINE_STAGE_FLAGS::LATE_FRAGMENT_TESTS_BIT, PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT})
+				{
+					if (pipelineStage == PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT)
+						primitivePrevStage |= PIPELINE_STAGE_FLAGS::FRAGMENT_DENSITY_PROCESS_BIT;
+					add(pipelineStage, primitivePrevStage);
+					primitivePrevStage |= pipelineStage;
+				}
+
+
+			}
+			constexpr const auto& operator[](const size_t ix) const {return data[ix];}
+
+		private:
+
+			constexpr void add(PIPELINE_STAGE_FLAGS stageFlag, PIPELINE_STAGE_FLAGS previousStageFlags)
+			{
+				const auto bitIx = findLSB(static_cast<size_t>(stageFlag));
+				data[bitIx] |= previousStageFlags;
+			}
+
+			PIPELINE_STAGE_FLAGS data[std::numeric_limits<std::underlying_type_t<PIPELINE_STAGE_FLAGS>>::digits] = {};
+	};
+
+	constexpr PerStagePreviousStages bitToAccess = {};
+
+	core::bitflag<PIPELINE_STAGE_FLAGS> retval = PIPELINE_STAGE_FLAGS::NONE;
+	while (bool(stages.value))
+	{
+		const auto bitIx = findLSB(static_cast<size_t>(stages.value));
+		retval |= bitToAccess[bitIx];
+		stages ^= static_cast<PIPELINE_STAGE_FLAGS>(0x1u<<bitIx);
+	}
+
+	return retval;
+}
+
+core::bitflag<PIPELINE_STAGE_FLAGS> allLaterStages(core::bitflag<PIPELINE_STAGE_FLAGS> stages)
+{
+	struct PerStageLaterStages
+	{
+		public:
+			constexpr PerStageLaterStages()
+			{
+				// set all stage to have itself as their next stages
+				for (auto i = 0; i < std::numeric_limits<PIPELINE_STAGE_FLAGS>::digits; i++)
+					data[i] = static_cast<PIPELINE_STAGE_FLAGS>(i);
+
+				add(PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT, PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT);
+				add(PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT, PIPELINE_STAGE_FLAGS::RAY_TRACING_SHADER_BIT);
+
+				// graphics primitive pipeline
+				PIPELINE_STAGE_FLAGS laterStage = PIPELINE_STAGE_FLAGS::NONE;
+				const auto graphicsPrimitivePipelineOrders = std::array{ PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT, PIPELINE_STAGE_FLAGS::INDEX_INPUT_BIT, PIPELINE_STAGE_FLAGS::VERTEX_ATTRIBUTE_INPUT_BIT, PIPELINE_STAGE_FLAGS::VERTEX_SHADER_BIT, PIPELINE_STAGE_FLAGS::TESSELLATION_CONTROL_SHADER_BIT, PIPELINE_STAGE_FLAGS::TESSELLATION_EVALUATION_SHADER_BIT, PIPELINE_STAGE_FLAGS::GEOMETRY_SHADER_BIT, PIPELINE_STAGE_FLAGS::SHADING_RATE_ATTACHMENT_BIT, PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT, PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, PIPELINE_STAGE_FLAGS::LATE_FRAGMENT_TESTS_BIT, PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT };
+				for (auto iter = graphicsPrimitivePipelineOrders.rbegin(); iter < graphicsPrimitivePipelineOrders.rend(); iter++)
+				{
+					const auto pipelineStage = *iter;
+					add(pipelineStage, laterStage);
+					laterStage |= pipelineStage;
+				}
+
+				add(PIPELINE_STAGE_FLAGS::FRAGMENT_DENSITY_PROCESS_BIT, PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT);
+			}
+			constexpr const auto& operator[](const size_t ix) const {return data[ix];}
+
+		private:
+
+			constexpr void add(PIPELINE_STAGE_FLAGS stageFlag, PIPELINE_STAGE_FLAGS laterStageFlags)
+			{
+				const auto bitIx = findLSB(static_cast<size_t>(stageFlag));
+				data[bitIx] |= laterStageFlags;
+			}
+
+			PIPELINE_STAGE_FLAGS data[std::numeric_limits<std::underlying_type_t<PIPELINE_STAGE_FLAGS>>::digits] = {};
+	};
+
+	constexpr PerStageLaterStages bitToAccess = {};
+
+	core::bitflag<PIPELINE_STAGE_FLAGS> retval = PIPELINE_STAGE_FLAGS::NONE;
+	while (bool(stages.value))
+	{
+		const auto bitIx = findLSB(static_cast<size_t>(stages.value));
+		retval |= bitToAccess[bitIx];
+		stages ^= static_cast<PIPELINE_STAGE_FLAGS>(0x1u<<bitIx);
+	}
+
+	return retval;
+}
+
+core::bitflag<ACCESS_FLAGS> allAccessesFromStages(core::bitflag<PIPELINE_STAGE_FLAGS> stages)
+{
+	struct PerStageAccesses
+	{
+		public:
+			constexpr PerStageAccesses()
+			{
+        init(PIPELINE_STAGE_FLAGS::HOST_BIT,ACCESS_FLAGS::HOST_READ_BIT|ACCESS_FLAGS::HOST_WRITE_BIT);
+
+        constexpr auto TransferRW = ACCESS_FLAGS::TRANSFER_READ_BIT|ACCESS_FLAGS::TRANSFER_WRITE_BIT;
+        init(PIPELINE_STAGE_FLAGS::COPY_BIT,TransferRW);
+        init(PIPELINE_STAGE_FLAGS::CLEAR_BIT,ACCESS_FLAGS::TRANSFER_WRITE_BIT);
+
+        constexpr auto MicromapRead = ACCESS_FLAGS::SHADER_READ_BITS;//|ACCESS_FLAGS::MICROMAP_READ_BIT;
+//                init(PIPELINE_STAGE_FLAGS::MICROMAP_BUILD_BIT,MicromapRead|ACCESS_FLAGS::MICROMAP_WRITE_BIT); // can micromaps be built indirectly?
+        
+        constexpr auto AccelerationStructureRW = ACCESS_FLAGS::ACCELERATION_STRUCTURE_READ_BIT|ACCESS_FLAGS::ACCELERATION_STRUCTURE_WRITE_BIT;
+        init(PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_COPY_BIT,TransferRW|AccelerationStructureRW);
+        init(PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_BUILD_BIT,ACCESS_FLAGS::INDIRECT_COMMAND_READ_BIT|MicromapRead|AccelerationStructureRW);
+
+        init(PIPELINE_STAGE_FLAGS::COMMAND_PREPROCESS_BIT,ACCESS_FLAGS::COMMAND_PREPROCESS_READ_BIT|ACCESS_FLAGS::COMMAND_PREPROCESS_WRITE_BIT);
+        init(PIPELINE_STAGE_FLAGS::CONDITIONAL_RENDERING_BIT,ACCESS_FLAGS::CONDITIONAL_RENDERING_READ_BIT);
+        init(PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT,ACCESS_FLAGS::INDIRECT_COMMAND_READ_BIT);
+
+        constexpr auto ShaderRW = ACCESS_FLAGS::SHADER_READ_BITS|ACCESS_FLAGS::SHADER_WRITE_BITS;
+        constexpr auto AllShaderStagesRW = ShaderRW^(ACCESS_FLAGS::INPUT_ATTACHMENT_READ_BIT|ACCESS_FLAGS::SHADER_BINDING_TABLE_READ_BIT);
+        init(PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT,AllShaderStagesRW);
+        init(PIPELINE_STAGE_FLAGS::INDEX_INPUT_BIT,ACCESS_FLAGS::INDEX_READ_BIT);
+        init(PIPELINE_STAGE_FLAGS::VERTEX_ATTRIBUTE_INPUT_BIT,ACCESS_FLAGS::VERTEX_ATTRIBUTE_READ_BIT);
+        init(PIPELINE_STAGE_FLAGS::VERTEX_SHADER_BIT,AllShaderStagesRW);
+        init(PIPELINE_STAGE_FLAGS::TESSELLATION_CONTROL_SHADER_BIT,AllShaderStagesRW);
+        init(PIPELINE_STAGE_FLAGS::TESSELLATION_EVALUATION_SHADER_BIT,AllShaderStagesRW);
+        init(PIPELINE_STAGE_FLAGS::GEOMETRY_SHADER_BIT,AllShaderStagesRW);
+//                init(PIPELINE_STAGE_FLAGS::TASK_SHADER_BIT,AllShaderStagesRW);
+//                init(PIPELINE_STAGE_FLAGS::MESH_SHADER_BIT,AllShaderStagesRW);
+        init(PIPELINE_STAGE_FLAGS::FRAGMENT_DENSITY_PROCESS_BIT,ACCESS_FLAGS::FRAGMENT_DENSITY_MAP_READ_BIT);
+        init(PIPELINE_STAGE_FLAGS::SHADING_RATE_ATTACHMENT_BIT,ACCESS_FLAGS::SHADING_RATE_ATTACHMENT_READ_BIT);
+        constexpr auto DepthStencilRW = ACCESS_FLAGS::DEPTH_STENCIL_ATTACHMENT_READ_BIT|ACCESS_FLAGS::DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
+        init(PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT,DepthStencilRW);
+        init(PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT,AllShaderStagesRW|ACCESS_FLAGS::INPUT_ATTACHMENT_READ_BIT);
+        init(PIPELINE_STAGE_FLAGS::LATE_FRAGMENT_TESTS_BIT,DepthStencilRW);
+        init(PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT,ACCESS_FLAGS::COLOR_ATTACHMENT_READ_BIT|ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT);
+
+        init(PIPELINE_STAGE_FLAGS::RAY_TRACING_SHADER_BIT,AllShaderStagesRW|ACCESS_FLAGS::SHADER_BINDING_TABLE_READ_BIT);
+
+        init(PIPELINE_STAGE_FLAGS::RESOLVE_BIT,TransferRW);
+        init(PIPELINE_STAGE_FLAGS::BLIT_BIT,TransferRW);
+
+//                init(PIPELINE_STAGE_FLAGS::VIDEO_DECODE_BIT,ACCESS_FLAGS::VIDEO_DECODE_READ_BIT|ACCESS_FLAGS::VIDEO_DECODE_WRITE_BIT);
+//                init(PIPELINE_STAGE_FLAGS::VIDEO_ENCODE_BIT,ACCESS_FLAGS::VIDEO_ENCODE_READ_BIT|ACCESS_FLAGS::VIDEO_ENCODE_WRITE_BIT);
+//                init(PIPELINE_STAGE_FLAGS::OPTICAL_FLOW_BIT,ACCESS_FLAGS::OPTICAL_FLOW_READ_BIT|ACCESS_FLAGS::OPTICAL_FLOW_WRITE_BIT);
+			}
+			constexpr const auto& operator[](const size_t ix) const {return data[ix];}
+
+		private:
+				
+			constexpr void init(PIPELINE_STAGE_FLAGS stageFlag, ACCESS_FLAGS accessFlags)
+			{
+				const auto bitIx = findLSB(static_cast<size_t>(stageFlag));
+				data[bitIx] = accessFlags;
+			}
+
+			ACCESS_FLAGS data[32] = {};
+	};
+	constexpr PerStageAccesses bitToAccess = {};
+
+	// TODO: add logically later or previous stages to make sure all other accesses remain valid
+	// or ideally expand the stages before calling `allAccessesFromStages` (TODO: add a `allLaterStages` and `allPreviouStages` basically)
+
+	core::bitflag<ACCESS_FLAGS> retval = ACCESS_FLAGS::NONE;
+	while (bool(stages.value))
+	{
+		const auto bitIx = findLSB(static_cast<size_t>(stages.value));
+		retval |= bitToAccess[bitIx];
+		stages ^= static_cast<PIPELINE_STAGE_FLAGS>(0x1u<<bitIx);
+	}
+
+	return retval;
+}
+
+core::bitflag<PIPELINE_STAGE_FLAGS> allStagesFromAccesses(core::bitflag<ACCESS_FLAGS> accesses)
+{
+	struct PerAccessStages
+	{
+		public:
+			constexpr PerAccessStages()
+			{
+        init(ACCESS_FLAGS::HOST_READ_BIT,PIPELINE_STAGE_FLAGS::HOST_BIT);
+        init(ACCESS_FLAGS::HOST_WRITE_BIT,PIPELINE_STAGE_FLAGS::HOST_BIT);
+
+        init(ACCESS_FLAGS::TRANSFER_READ_BIT,PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS^PIPELINE_STAGE_FLAGS::CLEAR_BIT);
+        init(ACCESS_FLAGS::TRANSFER_WRITE_BIT,PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS);
+
+        constexpr auto MicromapAccelerationStructureBuilds = PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_BUILD_BIT;//|PIPELINE_STAGE_FLAGS::MICROMAP_BUILD_BIT;
+//                init(ACCESS_FLAGS::MICROMAP_READ_BIT,MicromapAccelerationStructureBuilds);
+//                init(ACCESS_FLAGS::MICROMAP_WRITE_BIT,PIPELINE_STAGE_FLAGS::MICROMAP_BUILD_BIT);
+        
+        constexpr auto AllShaders = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT|PIPELINE_STAGE_FLAGS::PRE_RASTERIZATION_SHADERS_BITS|PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT|PIPELINE_STAGE_FLAGS::RAY_TRACING_SHADER_BIT;
+        constexpr auto AccelerationStructureOperations = PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_COPY_BIT|PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_BUILD_BIT;
+        init(ACCESS_FLAGS::ACCELERATION_STRUCTURE_READ_BIT,AccelerationStructureOperations|AllShaders);
+        init(ACCESS_FLAGS::ACCELERATION_STRUCTURE_WRITE_BIT,AccelerationStructureOperations);
+
+        init(ACCESS_FLAGS::COMMAND_PREPROCESS_READ_BIT,PIPELINE_STAGE_FLAGS::COMMAND_PREPROCESS_BIT);
+        init(ACCESS_FLAGS::COMMAND_PREPROCESS_WRITE_BIT,PIPELINE_STAGE_FLAGS::COMMAND_PREPROCESS_BIT);
+        init(ACCESS_FLAGS::CONDITIONAL_RENDERING_READ_BIT,PIPELINE_STAGE_FLAGS::CONDITIONAL_RENDERING_BIT);
+        init(ACCESS_FLAGS::INDIRECT_COMMAND_READ_BIT,PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_BUILD_BIT|PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT);
+
+        init(ACCESS_FLAGS::UNIFORM_READ_BIT,AllShaders);
+        init(ACCESS_FLAGS::SAMPLED_READ_BIT,AllShaders);//|PIPELINE_STAGE_FLAGS::MICROMAP_BUILD_BIT);
+        init(ACCESS_FLAGS::STORAGE_READ_BIT,AllShaders|MicromapAccelerationStructureBuilds);
+        init(ACCESS_FLAGS::STORAGE_WRITE_BIT,AllShaders);
+
+        init(ACCESS_FLAGS::INDEX_READ_BIT,PIPELINE_STAGE_FLAGS::INDEX_INPUT_BIT);
+        init(ACCESS_FLAGS::VERTEX_ATTRIBUTE_READ_BIT,PIPELINE_STAGE_FLAGS::VERTEX_ATTRIBUTE_INPUT_BIT);
+
+        init(ACCESS_FLAGS::FRAGMENT_DENSITY_MAP_READ_BIT,PIPELINE_STAGE_FLAGS::FRAGMENT_DENSITY_PROCESS_BIT);
+        init(ACCESS_FLAGS::SHADING_RATE_ATTACHMENT_READ_BIT,PIPELINE_STAGE_FLAGS::SHADING_RATE_ATTACHMENT_BIT);
+        constexpr auto FragmentTests = PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT|PIPELINE_STAGE_FLAGS::LATE_FRAGMENT_TESTS_BIT;
+        init(ACCESS_FLAGS::DEPTH_STENCIL_ATTACHMENT_READ_BIT,FragmentTests);
+        init(ACCESS_FLAGS::DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,FragmentTests);
+        init(ACCESS_FLAGS::INPUT_ATTACHMENT_READ_BIT,PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT);
+        init(ACCESS_FLAGS::COLOR_ATTACHMENT_READ_BIT,PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT);
+        init(ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT,PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT);
+
+        init(ACCESS_FLAGS::SHADER_BINDING_TABLE_READ_BIT,PIPELINE_STAGE_FLAGS::RAY_TRACING_SHADER_BIT);
+
+//                init(ACCESS_FLAGS::VIDEO_DECODE_READ_BIT,PIPELINE_STAGE_FLAGS::VIDEO_DECODE_BIT);
+//                init(ACCESS_FLAGS::VIDEO_DECODE_WRITE_BIT,PIPELINE_STAGE_FLAGS::VIDEO_DECODE_BIT);
+//                init(ACCESS_FLAGS::VIDEO_ENCODE_READ_BIT,PIPELINE_STAGE_FLAGS::VIDEO_ENCODE_BIT);
+//                init(ACCESS_FLAGS::VIDEO_ENCODE_WRITE_BIT,PIPELINE_STAGE_FLAGS::VIDEO_ENCODE_BIT);
+//                init(ACCESS_FLAGS::OPTICAL_FLOW_READ_BIT,PIPELINE_STAGE_FLAGS::OPTICAL_FLOW_BIT);
+//                init(ACCESS_FLAGS::OPTICAL_FLOW_WRITE_BIT,PIPELINE_STAGE_FLAGS::OPTICAL_FLOW_BIT);
+			}
+			constexpr const auto& operator[](const size_t ix) const {return data[ix];}
+
+		private:
+			constexpr void init(ACCESS_FLAGS accessFlags, PIPELINE_STAGE_FLAGS stageFlags)
+			{
+				const auto bitIx = findLSB(static_cast<size_t>(accessFlags));
+				data[bitIx] = stageFlags;
+			}
+
+			PIPELINE_STAGE_FLAGS data[32] = {};
+	};
+	constexpr PerAccessStages bitToStage = {};
+
+	core::bitflag<PIPELINE_STAGE_FLAGS> retval = PIPELINE_STAGE_FLAGS::NONE;
+	while (bool(accesses.value))
+	{
+		const auto bitIx = findLSB(static_cast<size_t>(accesses.value));
+		retval |= bitToStage[bitIx];
+		accesses ^= static_cast<ACCESS_FLAGS>(0x1u<<bitIx);
+	}
+
+	return retval;
+}
+}
+

From d36687f766de0d76eb19f3c2d973dae1de699306 Mon Sep 17 00:00:00 2001
From: kevyuu <kevin.kayu@gmail.com>
Date: Thu, 3 Jul 2025 21:03:10 +0700
Subject: [PATCH 12/40] Add missing ECommonEnums.cpp to CMakelists.txt

---
 src/nbl/CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/nbl/CMakeLists.txt b/src/nbl/CMakeLists.txt
index cd9572daa5..7819ca830c 100755
--- a/src/nbl/CMakeLists.txt
+++ b/src/nbl/CMakeLists.txt
@@ -151,6 +151,7 @@ set(NBL_UI_SOURCES
 )
 set(NBL_ASSET_SOURCES	
 # Assets
+	${NBL_ROOT_PATH}/src/nbl/asset/ECommonEnums.cpp
 	${NBL_ROOT_PATH}/src/nbl/asset/IAsset.cpp
 	${NBL_ROOT_PATH}/src/nbl/asset/IRenderpass.cpp
 	${NBL_ROOT_PATH}/src/nbl/asset/IAssetManager.cpp

From d92b274d9979fe78f389c1e5b065e1bf739ded6b Mon Sep 17 00:00:00 2001
From: kevyuu <kevin.kayu@gmail.com>
Date: Mon, 14 Jul 2025 11:46:08 +0700
Subject: [PATCH 13/40] Cpu ray tracing pipeline asset conversion

---
 include/nbl/asset/ICPURayTracingPipeline.h    |  38 +-
 include/nbl/video/asset_traits.h              |  16 +
 include/nbl/video/utilities/CAssetConverter.h |   2 +
 src/nbl/asset/utils/IShaderCompiler.cpp       |   4 +-
 src/nbl/video/utilities/CAssetConverter.cpp   | 364 ++++++++++++++----
 5 files changed, 332 insertions(+), 92 deletions(-)

diff --git a/include/nbl/asset/ICPURayTracingPipeline.h b/include/nbl/asset/ICPURayTracingPipeline.h
index 17c53557e1..2776939fad 100644
--- a/include/nbl/asset/ICPURayTracingPipeline.h
+++ b/include/nbl/asset/ICPURayTracingPipeline.h
@@ -25,14 +25,12 @@ class ICPURayTracingPipeline final : public ICPUPipeline<IRayTracingPipeline<ICP
             core::vector<SShaderSpecInfo> intersections;
         };
 
-        static core::smart_refctd_ptr<ICPURayTracingPipeline> create(const ICPUPipelineLayout* layout)
+        static core::smart_refctd_ptr<ICPURayTracingPipeline> create(ICPUPipelineLayout* layout)
         {
             auto retval = new ICPURayTracingPipeline(layout);
             return core::smart_refctd_ptr<ICPURayTracingPipeline>(retval,core::dont_grab);
         }
 
-        
-
         constexpr static inline auto AssetType = ET_RAYTRACING_PIPELINE;
         inline E_TYPE getAssetType() const override { return AssetType; }
         
@@ -83,12 +81,13 @@ class ICPURayTracingPipeline final : public ICPUPipeline<IRayTracingPipeline<ICP
             return nullptr;
         }
 
-
         inline bool valid() const override final
         {
             if (!m_layout) return false;
             if (!m_layout->valid()) return false;
             if (m_raygen.valid() == SShaderSpecInfo::INVALID_SPEC_INFO) return false;
+            if (m_hitGroups.anyHits.size() != m_hitGroups.closestHits.size()) return false;
+            if (m_hitGroups.anyHits.size() != m_hitGroups.intersections.size()) return false;
             return true;
         }
 
@@ -102,7 +101,23 @@ class ICPURayTracingPipeline final : public ICPUPipeline<IRayTracingPipeline<ICP
             return m_params;
         }
 
+        inline uint32_t getMissGroupCount() const
+        {
+            return m_misses.size();
+        }
+
+        inline uint32_t getHitGroupCount() const
+        {
+            return m_hitGroups.anyHits.size();
+        }
+
+        inline uint32_t getCallableGroupCount() const
+        {
+            return m_callables.size();
+        }
+
     protected:
+        using base_t::base_t;
         virtual ~ICPURayTracingPipeline() = default;
 
     private:
@@ -112,18 +127,19 @@ class ICPURayTracingPipeline final : public ICPUPipeline<IRayTracingPipeline<ICP
         SHitGroupSpecInfos m_hitGroups;
         core::vector<SShaderSpecInfo> m_callables;
 
-        explicit ICPURayTracingPipeline(const ICPUPipelineLayout* layout)
+        explicit ICPURayTracingPipeline(ICPUPipelineLayout* layout)
             : base_t(layout, {})
             {}
 
         inline void visitDependents_impl(std::function<bool(const IAsset*)> visit) const override
         {
-            if (!visit(m_raygen.shader.get()) return;
-            for (const auto& missInfo : self->m_misses) if (!visit(missInfo.shader.get())) return;
-            for (const auto& anyHitInfo : self->m_hitGroups.anyHits) if (!visit(anyHitInfo.shader.get())) return;
-            for (const auto& closestHitInfo : self->m_hitGroups.closestHits) if (!visit(closestHitInfo.shader.get())) return;
-            for (const auto& intersectionInfo : self->m_hitGroups.intersections) if (!visit(intersectionInfo.shader.get())) return;
-            for (const auto& callableInfo : self->m_callables) if(!visit(callableInfo.shader.get())) return;
+            if (!visit(m_layout.get())) return;
+            if (!visit(m_raygen.shader.get())) return;
+            for (const auto& missInfo : m_misses) if (!visit(missInfo.shader.get())) return;
+            for (const auto& anyHitInfo : m_hitGroups.anyHits) if (!visit(anyHitInfo.shader.get())) return;
+            for (const auto& closestHitInfo : m_hitGroups.closestHits) if (!visit(closestHitInfo.shader.get())) return;
+            for (const auto& intersectionInfo : m_hitGroups.intersections) if (!visit(intersectionInfo.shader.get())) return;
+            for (const auto& callableInfo : m_callables) if(!visit(callableInfo.shader.get())) return;
         }
 
         inline core::smart_refctd_ptr<base_t> clone_impl(core::smart_refctd_ptr<ICPUPipelineLayout>&& layout, uint32_t depth) const override final
diff --git a/include/nbl/video/asset_traits.h b/include/nbl/video/asset_traits.h
index faf5322798..c4a6c25ca5 100644
--- a/include/nbl/video/asset_traits.h
+++ b/include/nbl/video/asset_traits.h
@@ -21,6 +21,8 @@
 #include "nbl/video/IGPUAccelerationStructure.h"
 #include "nbl/asset/ICPUPolygonGeometry.h"
 #include "nbl/video/IGPUPolygonGeometry.h"
+#include "nbl/asset/ICPURayTracingPipeline.h"
+#include "nbl/video/IGPURayTracingPipeline.h"
 
 
 namespace nbl::video
@@ -244,6 +246,20 @@ struct asset_traits<asset::ICPUPolygonGeometry>
 };
 
 
+template<>
+struct asset_traits<asset::ICPURayTracingPipeline>
+{
+	// the asset type
+	using asset_t = asset::ICPURayTracingPipeline;
+	// Depends on shader and layout
+	constexpr static inline bool HasChildren = true;
+	// the video type
+	using video_t = IGPURayTracingPipeline;
+	// lookup type
+	using lookup_t = const video_t*;
+};
+
+
 /* TODO
 template<>
 struct asset_traits<asset::ICPUFramebuffer>;
diff --git a/include/nbl/video/utilities/CAssetConverter.h b/include/nbl/video/utilities/CAssetConverter.h
index 3f0225a78e..a360e3b0f5 100644
--- a/include/nbl/video/utilities/CAssetConverter.h
+++ b/include/nbl/video/utilities/CAssetConverter.h
@@ -48,6 +48,7 @@ class CAssetConverter : public core::IReferenceCounted
 			asset::ICPUPipelineLayout,
 			asset::ICPUPipelineCache,
 			asset::ICPUComputePipeline,
+			asset::ICPURayTracingPipeline,
 			asset::ICPURenderpass,
 			asset::ICPUGraphicsPipeline,
 			asset::ICPUDescriptorSet,
@@ -690,6 +691,7 @@ class CAssetConverter : public core::IReferenceCounted
 					bool operator()(lookup_t<asset::ICPUPipelineLayout>);
 					bool operator()(lookup_t<asset::ICPUPipelineCache>);
 					bool operator()(lookup_t<asset::ICPUComputePipeline>);
+					bool operator()(lookup_t<asset::ICPURayTracingPipeline>);
 					bool operator()(lookup_t<asset::ICPURenderpass>);
 					bool operator()(lookup_t<asset::ICPUGraphicsPipeline>);
 					bool operator()(lookup_t<asset::ICPUDescriptorSet>);
diff --git a/src/nbl/asset/utils/IShaderCompiler.cpp b/src/nbl/asset/utils/IShaderCompiler.cpp
index 5cfd36eced..e60bf31b5c 100644
--- a/src/nbl/asset/utils/IShaderCompiler.cpp
+++ b/src/nbl/asset/utils/IShaderCompiler.cpp
@@ -342,7 +342,7 @@ core::smart_refctd_ptr<ICPUBuffer> IShaderCompiler::CCache::serialize() const
     memcpy(retVal.data() + SHADER_BUFFER_SIZE_BYTES + shaderBufferSize, dumpedContainerJson.data(), dumpedContainerJsonLength);
 
     auto memoryResource = core::make_smart_refctd_ptr<core::adoption_memory_resource<decltype(retVal)>>(std::move(retVal));
-    return ICPUBuffer::create({ { retValSize }, memoryResource->getBacker().data(),std::move(memoryResource)});
+    return ICPUBuffer::create({ { retValSize }, memoryResource->getBacker().data(),std::move(memoryResource)}, core::adopt_memory);
 }
 
 core::smart_refctd_ptr<IShaderCompiler::CCache> IShaderCompiler::CCache::deserialize(const std::span<const uint8_t> serializedCache)
@@ -416,7 +416,7 @@ bool nbl::asset::IShaderCompiler::CCache::SEntry::setContent(const asset::ICPUBu
     compressedSpirv.resize(propsSize + destLen);
 
     auto memoryResource = core::make_smart_refctd_ptr<core::adoption_memory_resource<decltype(compressedSpirv)>>(std::move(compressedSpirv));
-    spirv = ICPUBuffer::create({ { propsSize + destLen }, memoryResource->getBacker().data(),std::move(memoryResource)});
+    spirv = ICPUBuffer::create({ { propsSize + destLen }, memoryResource->getBacker().data(),std::move(memoryResource)}, core::adopt_memory);
 
     return true;
 }
diff --git a/src/nbl/video/utilities/CAssetConverter.cpp b/src/nbl/video/utilities/CAssetConverter.cpp
index d004660e42..3980a7a3a4 100644
--- a/src/nbl/video/utilities/CAssetConverter.cpp
+++ b/src/nbl/video/utilities/CAssetConverter.cpp
@@ -543,6 +543,31 @@ class AssetVisitor : public CRTP
 				return false;
 			return true;
 		}
+		inline bool impl(const instance_t<ICPURayTracingPipeline>& instance, const CAssetConverter::patch_t<ICPURayTracingPipeline>& userPatch)
+		{
+			const auto* asset = instance.asset;
+			const auto* layout = asset->getLayout();
+			if (!layout || !descend(layout,{layout}))
+				return false;
+			using stage_t = hlsl::ShaderStage;
+			for (stage_t stage : {hlsl::ShaderStage::ESS_RAYGEN, hlsl::ShaderStage::ESS_MISS, hlsl::ShaderStage::ESS_ANY_HIT, hlsl::ShaderStage::ESS_CLOSEST_HIT, hlsl::ShaderStage::ESS_INTERSECTION, hlsl::ShaderStage::ESS_CALLABLE})
+			{
+				const auto& specInfos = asset->getSpecInfos(stage);
+				for (auto specInfo_i = 0; specInfo_i < specInfos.size(); specInfo_i++)
+				{
+					const auto& specInfo = specInfos[specInfo_i];
+					const auto* shader = specInfo.shader.get();
+					if (!shader)
+					{
+						if (stage == stage_t::ESS_RAYGEN) return false;
+						CRTP::template nullOptional<IShader>();
+						continue;
+					}
+          if (!descend(shader,{shader}, specInfo, stage, specInfo_i)) return false;
+				}
+			}
+			return true;
+		}
 		inline bool impl(const instance_t<ICPUGraphicsPipeline>& instance, const CAssetConverter::patch_t<ICPUGraphicsPipeline>& userPatch)
 		{
 			const auto* asset = instance.asset;
@@ -1370,6 +1395,25 @@ bool CAssetConverter::CHashCache::hash_impl::operator()(lookup_t<ICPUComputePipe
 	hasher << params.requireFullSubgroups;
 	return true;
 }
+bool CAssetConverter::CHashCache::hash_impl::operator()(lookup_t<ICPURayTracingPipeline> lookup)
+{
+	const auto* asset = lookup.asset;
+	//
+	hasher << asset->getMissGroupCount();
+	hasher << asset->getHitGroupCount();
+	hasher << asset->getCallableGroupCount();
+	AssetVisitor<HashVisit<ICPURayTracingPipeline>> visitor = {
+		*this,
+		{asset,static_cast<const PatchOverride*>(patchOverride)->uniqueCopyGroupID},
+		*lookup.patch
+	};
+	if (!visitor())
+		return false;
+	const auto& params = asset->getCachedCreationParams();
+	hasher << params.maxRecursionDepth;
+	hasher << params.dynamicStackSize;
+	return true;
+}
 bool CAssetConverter::CHashCache::hash_impl::operator()(lookup_t<ICPURenderpass> lookup)
 {
 	const auto* asset = lookup.asset;
@@ -1688,6 +1732,7 @@ void CAssetConverter::CHashCache::eraseStale(const IPatchOverride* patchOverride
 	rehash.operator()<IShader>();
 	rehash.operator()<ICPUPipelineCache>();
 	rehash.operator()<ICPUComputePipeline>();
+	rehash.template operator()<ICPURayTracingPipeline>();
 	// graphics pipeline needs a renderpass
 	rehash.template operator()<ICPURenderpass>();
 	rehash.template operator()<ICPUGraphicsPipeline>();
@@ -2041,103 +2086,184 @@ class GetDependantVisit<ICPUDescriptorSet> : public GetDependantVisitBase<ICPUDe
 				storageOffset.data += element;
 				potentialTLASRewrites.push_back(storageOffset);
 			}
-			if constexpr (std::is_same_v<DepType,ICPUImageView>)
+			if constexpr (std::is_same_v<DepType, ICPUImageView>)
 			{
 				outInfo.info.image.imageLayout = std::get<0>(argTuple);
-				if (type==IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER)
+				if (type == IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER)
 				{
 					assert(lastCombinedSampler);
 					outInfo.info.combinedImageSampler.sampler = smart_refctd_ptr<IGPUSampler>(lastCombinedSampler);
-					lastCombinedSampler = nullptr; // for debuggability
+					lastCombinedSampler = nullptr;
 				}
 			}
 			outInfo.desc = std::move(depObj);
 			return true;
-		}
-};
-template<>
-class GetDependantVisit<ICPUPolygonGeometry> : public GetDependantVisitBase<ICPUPolygonGeometry>
+    }
+  };
+  template<>
+  class GetDependantVisit<ICPUPolygonGeometry> : public GetDependantVisitBase<ICPUPolygonGeometry>
+  {
+    public:
+      bool finalize()
+      {
+        if (!creationParams.indexing)
+          return false;
+        creationParams.jointWeightViews = jointWeightViews;
+        creationParams.auxAttributeViews = auxAttributeViews;
+        return true;
+      }
+
+      IGPUPolygonGeometry::SCreationParams creationParams = {};
+      // has to be public because of aggregate init, but its only for internal usage!
+      core::vector<IGPUPolygonGeometry::SJointWeight> jointWeightViews = {};
+      core::vector<IGPUPolygonGeometry::SDataView> auxAttributeViews = {};
+
+    protected:
+      bool descend_impl(
+        const instance_t<AssetType>& user, const CAssetConverter::patch_t<AssetType>& userPatch,
+        const instance_t<ICPUBuffer>& dep, const CAssetConverter::patch_t<ICPUBuffer>& soloPatch,
+        const EPolygonGeometryViewType type, const uint32_t index
+      )
+      {
+        auto depObj = getDependant<ICPUBuffer>(dep,soloPatch);
+        if (!depObj)
+          return false;
+        const auto* asset = user.asset;
+        switch (type)
+        {
+          case EPolygonGeometryViewType::Position:
+            // obligatory attribute, handle basic setup here too
+            creationParams.indexing = asset->getIndexingCallback();
+            creationParams.aabb = asset->getAABBStorage();
+            creationParams.jointCount = asset->getJointCount();
+            creationParams.positionView = getView(asset->getPositionView(),std::move(depObj));
+            break;
+          case EPolygonGeometryViewType::Index:
+            creationParams.indexView = getView(asset->getIndexView(),std::move(depObj));
+            break;
+          case EPolygonGeometryViewType::Normal:
+            creationParams.normalView = getView(asset->getNormalView(),std::move(depObj));
+            break;
+          case EPolygonGeometryViewType::JointOBB:
+            creationParams.jointOBBView = getView(*asset->getJointOBBView(),std::move(depObj));
+            break;
+          case EPolygonGeometryViewType::JointIndices:
+            jointWeightViews.resize(index+1);
+            jointWeightViews[index].indices = getView(asset->getJointWeightViews()[index].indices,std::move(depObj));
+            break;
+          case EPolygonGeometryViewType::JointWeights:
+            jointWeightViews.resize(index+1);
+            jointWeightViews[index].weights = getView(asset->getJointWeightViews()[index].weights,std::move(depObj));
+            break;
+          case EPolygonGeometryViewType::Aux:
+            auxAttributeViews.push_back(getView(asset->getAuxAttributeViews()[index],std::move(depObj)));
+            break;
+          default:
+            return false;
+        }
+        // abuse this pointer to signal invalid state
+        return creationParams.indexing;
+      }
+
+    private:
+      IGPUPolygonGeometry::SDataView getView(const ICPUPolygonGeometry::SDataView& orig, core::smart_refctd_ptr<IGPUBuffer>&& buff)
+      {
+        IGPUPolygonGeometry::SDataView retval = {
+          .composed = orig.composed,
+          .src = {
+            .offset = orig.src.offset,
+            .size = orig.src.actualSize(),
+            .buffer = std::move(buff)
+          }
+        };
+        if (orig && !retval)
+          creationParams.indexing = nullptr;
+        return retval;
+      }
+  };
+  template<>
+class GetDependantVisit<ICPURayTracingPipeline> : public GetDependantVisitBase<ICPURayTracingPipeline>
 {
-	public:
-		bool finalize()
-		{
-			if (!creationParams.indexing)
-				return false;
-			creationParams.jointWeightViews = jointWeightViews;
-			creationParams.auxAttributeViews = auxAttributeViews;
-			return true;
-		}
+public:
 
-		IGPUPolygonGeometry::SCreationParams creationParams = {};
-		// has to be public because of aggregate init, but its only for internal usage!
-		core::vector<IGPUPolygonGeometry::SJointWeight> jointWeightViews = {};
-		core::vector<IGPUPolygonGeometry::SDataView> auxAttributeViews = {};
+	inline void allocateShaders(size_t missCount, size_t hitGroupCount, size_t callableGroupCount)
+	{
+		misses.resize(missCount);
+		hitGroups.anyHits.resize(hitGroupCount);
+		hitGroups.closestHits.resize(hitGroupCount);
+		hitGroups.intersections.resize(hitGroupCount);
+		callables.resize(callableGroupCount);
+	}
 
-	protected:
-		bool descend_impl(
-			const instance_t<AssetType>& user, const CAssetConverter::patch_t<AssetType>& userPatch,
-			const instance_t<ICPUBuffer>& dep, const CAssetConverter::patch_t<ICPUBuffer>& soloPatch,
-			const EPolygonGeometryViewType type, const uint32_t index
-		)
-		{
-			auto depObj = getDependant<ICPUBuffer>(dep,soloPatch);
-			if (!depObj)
-				return false;
-			const auto* asset = user.asset;
-			switch (type)
-			{
-				case EPolygonGeometryViewType::Position:
-					// obligatory attribute, handle basic setup here too
-					creationParams.indexing = asset->getIndexingCallback();
-					creationParams.aabb = asset->getAABBStorage();
-					creationParams.jointCount = asset->getJointCount();
-					creationParams.positionView = getView(asset->getPositionView(),std::move(depObj));
-					break;
-				case EPolygonGeometryViewType::Index:
-					creationParams.indexView = getView(asset->getIndexView(),std::move(depObj));
-					break;
-				case EPolygonGeometryViewType::Normal:
-					creationParams.normalView = getView(asset->getNormalView(),std::move(depObj));
-					break;
-				case EPolygonGeometryViewType::JointOBB:
-					creationParams.jointOBBView = getView(*asset->getJointOBBView(),std::move(depObj));
-					break;
-				case EPolygonGeometryViewType::JointIndices:
-					jointWeightViews.resize(index+1);
-					jointWeightViews[index].indices = getView(asset->getJointWeightViews()[index].indices,std::move(depObj));
-					break;
-				case EPolygonGeometryViewType::JointWeights:
-					jointWeightViews.resize(index+1);
-					jointWeightViews[index].weights = getView(asset->getJointWeightViews()[index].weights,std::move(depObj));
-					break;
-				case EPolygonGeometryViewType::Aux:
-					auxAttributeViews.push_back(getView(asset->getAuxAttributeViews()[index],std::move(depObj)));
-					break;
-				default:
-					return false;
-			}
-			// abuse this pointer to signal invalid state
-			return creationParams.indexing;
-		}
+  inline core::vector<ICPUPipelineBase::SShaderSpecInfo>* getSpecInfoVector(const hlsl::ShaderStage stage)
+  {
+    switch (stage) 
+    {
+      // raygen is not stored as vector so we can't return it here. Use getSpecInfo
+      case hlsl::ShaderStage::ESS_MISS:
+        return &misses;
+      case hlsl::ShaderStage::ESS_ANY_HIT:
+        return &hitGroups.anyHits;
+      case hlsl::ShaderStage::ESS_CLOSEST_HIT:
+        return &hitGroups.closestHits;
+      case hlsl::ShaderStage::ESS_INTERSECTION:
+        return &hitGroups.intersections;
+      case hlsl::ShaderStage::ESS_CALLABLE:
+        return &callables;
+    }
+    return nullptr;
+  }
+
+  // ok to do non owning since some cache owns anyway
+  IGPUPipelineLayout* layout = nullptr;
+  ICPUPipelineBase::SShaderSpecInfo raygen;
+  core::vector<ICPUPipelineBase::SShaderSpecInfo> misses;
+  ICPURayTracingPipeline::SHitGroupSpecInfos hitGroups;
+  core::vector<ICPUPipelineBase::SShaderSpecInfo> callables;
+
+protected:
+	bool descend_impl(
+		const instance_t<ICPURayTracingPipeline>& user, const CAssetConverter::patch_t<ICPURayTracingPipeline>& userPatch,
+		const instance_t<ICPUPipelineLayout>& dep, const CAssetConverter::patch_t<ICPUPipelineLayout>& soloPatch
+	)
+	{
+		auto depObj = getDependant<ICPUPipelineLayout>(dep, soloPatch);
+		if (!depObj)
+			return false;
+		layout = depObj.get();
+		return true;
+	}
+	bool descend_impl(
+		const instance_t<ICPURayTracingPipeline>& user, const CAssetConverter::patch_t<ICPURayTracingPipeline>& userPatch,
+		const instance_t<IShader>& dep, const CAssetConverter::patch_t<IShader>& soloPatch, const ICPUPipelineBase::SShaderSpecInfo& inSpecInfo, hlsl::ShaderStage stage, uint32_t groupIndex
+	)
+	{
+		auto depObj = getDependant<IShader>(dep, soloPatch);
 
-	private:
-		IGPUPolygonGeometry::SDataView getView(const ICPUPolygonGeometry::SDataView& orig, core::smart_refctd_ptr<IGPUBuffer>&& buff)
+		if (stage == hlsl::ShaderStage::ESS_RAYGEN)
 		{
-			IGPUPolygonGeometry::SDataView retval = {
-				.composed = orig.composed,
-				.src = {
-					.offset = orig.src.offset,
-					.size = orig.src.actualSize(),
-					.buffer = std::move(buff)
-				}
+			assert(groupIndex == 0);
+			raygen = ICPUPipelineBase::SShaderSpecInfo{
+				.shader = depObj,
+				.entryPoint = inSpecInfo.entryPoint,
+				.requiredSubgroupSize = inSpecInfo.requiredSubgroupSize,
+        .entries = inSpecInfo.entries,
 			};
-			if (orig && !retval)
-				creationParams.indexing = nullptr;
-			return retval;
+		} else
+		{
+			auto& shaderGroups = *getSpecInfoVector(stage);
+			assert(groupIndex < shaderGroups.size());
+			shaderGroups[groupIndex] = ICPUPipelineBase::SShaderSpecInfo{
+				.shader = depObj,
+				.entryPoint = inSpecInfo.entryPoint,
+				.requiredSubgroupSize = inSpecInfo.requiredSubgroupSize,
+				.entries = inSpecInfo.entries,
+      };
 		}
+		return true;
+	}
 };
 
-
 // Needed both for reservation and conversion
 class MetaDeviceMemoryAllocator final
 {
@@ -2774,6 +2900,9 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
 					case ICPUGraphicsPipeline::AssetType:
 						visit.template operator()<ICPUGraphicsPipeline>(entry);
 						break;
+					case ICPURayTracingPipeline::AssetType:
+						visit.template operator()<ICPURayTracingPipeline>(entry);
+						break;
 					case ICPUDescriptorSet::AssetType:
 						visit.template operator()<ICPUDescriptorSet>(entry);
 						break;
@@ -3474,6 +3603,81 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
 					}
 				}
 			}
+		  if constexpr (std::is_same_v<AssetType,ICPURayTracingPipeline>)
+			{
+				for (auto& entry : conversionRequests.contentHashToCanonical)
+				{
+					const ICPURayTracingPipeline* asset = entry.second.canonicalAsset;
+					// there is no patching possible for this asset
+					for (auto i=0ull; i<entry.second.copyCount; i++)
+					{
+						const auto outIx = i+entry.second.firstCopyIx;
+						const auto uniqueCopyGroupID = conversionRequests.gpuObjUniqueCopyGroupIDs[outIx];
+						AssetVisitor<GetDependantVisit<ICPURayTracingPipeline>> visitor = {
+							{visitBase},
+							{asset,uniqueCopyGroupID},
+							{}
+						};
+						visitor.allocateShaders(
+							asset->getMissGroupCount(),
+							asset->getHitGroupCount(), 
+							asset->getCallableGroupCount());
+						if (!visitor())
+							continue;
+						// ILogicalDevice::createComputePipelines is rather aggressive on the spec constant validation, so we create one pipeline at a time
+						core::smart_refctd_ptr<IGPURayTracingPipeline> ppln;
+						{
+							// no derivatives, special flags, etc.
+							IGPURayTracingPipeline::SCreationParams params = {};
+							using SShaderEntryMap = IGPUPipelineBase::SShaderEntryMap;
+							using stage_t = hlsl::ShaderStage;
+							using GPUShaderSpecInfo = IGPUPipelineBase::SShaderSpecInfo;
+
+							params.layout = visitor.layout;
+
+							SShaderEntryMap raygenEntryMap;
+							params.shaderGroups.raygen = GPUShaderSpecInfo::create(visitor.raygen, &raygenEntryMap);
+
+							struct GPUSpecEntryVec
+							{
+								core::vector<SShaderEntryMap> entryMaps;
+								core::vector<IGPUPipelineBase::SShaderSpecInfo> specs;
+
+								explicit GPUSpecEntryVec(std::span<ICPUPipelineBase::SShaderSpecInfo> cpuSpecs)
+								  : entryMaps(cpuSpecs.size()), specs(cpuSpecs.size())
+								{
+									for (auto spec_i = 0u; spec_i < cpuSpecs.size(); spec_i++)
+										specs[spec_i] = GPUShaderSpecInfo::create(cpuSpecs[spec_i], &entryMaps[spec_i]);
+								}
+							};
+
+							GPUSpecEntryVec missSpecEntry(visitor.misses);
+							params.shaderGroups.misses = missSpecEntry.specs;
+
+							GPUSpecEntryVec callableSpecEntry(visitor.callables);
+							params.shaderGroups.callables = callableSpecEntry.specs;
+
+							core::vector<IGPURayTracingPipeline::SHitGroup> hitGroups(visitor.hitGroups.closestHits.size());
+							core::vector<SShaderEntryMap> closestHitEntryMaps(visitor.hitGroups.closestHits.size());
+							core::vector<SShaderEntryMap> anyHitEntryMaps(visitor.hitGroups.anyHits.size());
+							core::vector<SShaderEntryMap> intersectionEntryMaps(visitor.hitGroups.intersections.size());
+							assert(anyHitEntryMaps.size() == closestHitEntryMaps.size());
+							assert(anyHitEntryMaps.size() == intersectionEntryMaps.size());
+							for (auto hitGroup_i = 0u ; hitGroup_i < hitGroups.size(); hitGroup_i++)
+							{
+								hitGroups[hitGroup_i].closestHit = GPUShaderSpecInfo::create(visitor.hitGroups.closestHits[hitGroup_i], &closestHitEntryMaps[hitGroup_i]);
+								hitGroups[hitGroup_i].anyHit = GPUShaderSpecInfo::create(visitor.hitGroups.anyHits[hitGroup_i], &anyHitEntryMaps[hitGroup_i]);
+								hitGroups[hitGroup_i].intersection = GPUShaderSpecInfo::create(visitor.hitGroups.intersections[hitGroup_i], &intersectionEntryMaps[hitGroup_i]);
+							}
+							params.shaderGroups.hits = hitGroups;
+
+							params.cached = asset->getCachedCreationParams();
+							device->createRayTracingPipelines(inputs.pipelineCache, {&params, 1}, &ppln);
+							conversionRequests.assign(entry.first, entry.second.firstCopyIx, i, std::move(ppln));
+						}
+					}
+				}
+			}
 			if constexpr (std::is_same_v<AssetType,ICPUDescriptorSet>)
 			{
 				// Why we're not grouping multiple descriptor sets into few pools and doing 1 pool per descriptor set.
@@ -3675,6 +3879,7 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
 		dedupCreateProp.template operator()<ICPUPipelineLayout>();
 		dedupCreateProp.template operator()<ICPUPipelineCache>();
 		dedupCreateProp.template operator()<ICPUComputePipeline>();
+		dedupCreateProp.template operator()<ICPURayTracingPipeline>();
 		dedupCreateProp.template operator()<ICPURenderpass>();
 		dedupCreateProp.template operator()<ICPUGraphicsPipeline>();
 		dedupCreateProp.template operator()<ICPUDescriptorSet>();
@@ -3758,6 +3963,7 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
 		pruneStaging.template operator()<ICPUGraphicsPipeline>();
 		pruneStaging.template operator()<ICPURenderpass>();
 		pruneStaging.template operator()<ICPUComputePipeline>();
+		pruneStaging.template operator()<ICPURayTracingPipeline>();
 		pruneStaging.template operator()<ICPUPipelineCache>();
 		pruneStaging.template operator()<ICPUPipelineLayout>();
 		pruneStaging.template operator()<ICPUDescriptorSetLayout>();

From 6d2df490b48bfdfe92061a9e4bd029439bfea0bd Mon Sep 17 00:00:00 2001
From: kevyuu <kevin.kayu@gmail.com>
Date: Mon, 14 Jul 2025 20:30:59 +0700
Subject: [PATCH 14/40] Add groupIndex to shader hash

---
 src/nbl/video/utilities/CAssetConverter.cpp | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/src/nbl/video/utilities/CAssetConverter.cpp b/src/nbl/video/utilities/CAssetConverter.cpp
index 3980a7a3a4..5bf552c639 100644
--- a/src/nbl/video/utilities/CAssetConverter.cpp
+++ b/src/nbl/video/utilities/CAssetConverter.cpp
@@ -1129,6 +1129,15 @@ class HashVisit : public CAssetConverter::CHashCache::hash_impl_base
 					assert(hlsl::bitCount(stage) == 1);
 					hasher << stage;
 					hasher << arg0.requiredSubgroupSize;
+					if (std::tuple_size(argTuple) >= 3)
+					{
+						const auto groupIndex = std::get<2>(argTuple);
+						hasher << groupIndex;
+					} else
+					{
+						// assume group index to be zero.
+						hasher << 0;
+					}
 					if (!arg0.entries.empty())
 					{
 					  for (const auto& specConstant : arg0.entries) 
@@ -2239,7 +2248,8 @@ class GetDependantVisit<ICPURayTracingPipeline> : public GetDependantVisitBase<I
 	)
 	{
 		auto depObj = getDependant<IShader>(dep, soloPatch);
-
+		if (!depObj)
+			return false;
 		if (stage == hlsl::ShaderStage::ESS_RAYGEN)
 		{
 			assert(groupIndex == 0);

From 9eb02276c4ef3ca1d0c392498153381c0fde51f0 Mon Sep 17 00:00:00 2001
From: kevyuu <kevin.kayu@gmail.com>
Date: Tue, 15 Jul 2025 10:57:11 +0700
Subject: [PATCH 15/40] Small fix on rt pipeline has computation

---
 src/nbl/video/utilities/CAssetConverter.cpp | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/src/nbl/video/utilities/CAssetConverter.cpp b/src/nbl/video/utilities/CAssetConverter.cpp
index 5bf552c639..29b3d291fd 100644
--- a/src/nbl/video/utilities/CAssetConverter.cpp
+++ b/src/nbl/video/utilities/CAssetConverter.cpp
@@ -1129,14 +1129,10 @@ class HashVisit : public CAssetConverter::CHashCache::hash_impl_base
 					assert(hlsl::bitCount(stage) == 1);
 					hasher << stage;
 					hasher << arg0.requiredSubgroupSize;
-					if (std::tuple_size(argTuple) >= 3)
+					if constexpr (std::is_same_v<AssetT, ICPURayTracingPipeline>)
 					{
 						const auto groupIndex = std::get<2>(argTuple);
 						hasher << groupIndex;
-					} else
-					{
-						// assume group index to be zero.
-						hasher << 0;
 					}
 					if (!arg0.entries.empty())
 					{

From 73ac23fce1b5d45f7ea2e12005c0490abea30c3f Mon Sep 17 00:00:00 2001
From: kevyuu <kevin.kayu@gmail.com>
Date: Tue, 15 Jul 2025 13:22:28 +0700
Subject: [PATCH 16/40] Automatic no null flags insertion for rt pipeline in
 asset converter

---
 src/nbl/video/utilities/CAssetConverter.cpp | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/src/nbl/video/utilities/CAssetConverter.cpp b/src/nbl/video/utilities/CAssetConverter.cpp
index 29b3d291fd..e04d0153be 100644
--- a/src/nbl/video/utilities/CAssetConverter.cpp
+++ b/src/nbl/video/utilities/CAssetConverter.cpp
@@ -3677,6 +3677,27 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
 							}
 							params.shaderGroups.hits = hitGroups;
 
+							using RayTracingFlags = IGPURayTracingPipeline::SCreationParams::FLAGS;
+							const auto isNullSpecInfo = [](const ICPUPipelineBase::SShaderSpecInfo& specInfo)
+								{
+									return specInfo.shader.get() == nullptr;
+								};
+							const auto noNullMiss = std::none_of(
+								visitor.misses.begin(), 
+								visitor.misses.end(), 
+								isNullSpecInfo);
+							if (noNullMiss) params.flags |= RayTracingFlags::NO_NULL_MISS_SHADERS;
+							const auto noNullClosestHit = std::none_of(
+								visitor.hitGroups.closestHits.begin(), 
+								visitor.hitGroups.closestHits.end(),
+								isNullSpecInfo);
+							if (noNullClosestHit) params.flags |= RayTracingFlags::NO_NULL_CLOSEST_HIT_SHADERS;
+							const auto noNullAnyHit = std::none_of(
+								visitor.hitGroups.anyHits.begin(),
+								visitor.hitGroups.anyHits.end(),
+								isNullSpecInfo);
+							if (noNullAnyHit) params.flags |= RayTracingFlags::NO_NULL_ANY_HIT_SHADERS;
+
 							params.cached = asset->getCachedCreationParams();
 							device->createRayTracingPipelines(inputs.pipelineCache, {&params, 1}, &ppln);
 							conversionRequests.assign(entry.first, entry.second.firstCopyIx, i, std::move(ppln));

From bd3a266956401e7109b43966f0cce8211573cf56 Mon Sep 17 00:00:00 2001
From: kevyuu <kevin.kayu@gmail.com>
Date: Fri, 18 Jul 2025 19:14:58 +0700
Subject: [PATCH 17/40] Extract some common attribute view creation into its
 own function

---
 src/nbl/asset/utils/CGeometryCreator.cpp | 726 +++++++++--------------
 1 file changed, 288 insertions(+), 438 deletions(-)

diff --git a/src/nbl/asset/utils/CGeometryCreator.cpp b/src/nbl/asset/utils/CGeometryCreator.cpp
index 05a80cf3a7..f2a9515566 100644
--- a/src/nbl/asset/utils/CGeometryCreator.cpp
+++ b/src/nbl/asset/utils/CGeometryCreator.cpp
@@ -15,11 +15,142 @@
 namespace nbl::asset
 {
 
+	namespace
+	{
+		using snorm_normal_t = hlsl::vector<int8_t, 4>;
+		constexpr int8_t snorm_one = std::numeric_limits<int8_t>::max();
+		constexpr int8_t snorm_neg_one = std::numeric_limits<int8_t>::min();
+    constexpr auto snorm_positive_x = hlsl::vector<int8_t, 4>(snorm_one, 0, 0, 0);
+    constexpr auto snorm_negative_x = hlsl::vector<int8_t, 4>(snorm_neg_one, 0, 0, 0);
+		constexpr auto snorm_positive_y = hlsl::vector<int8_t, 4>(0, snorm_one, 0, 0);
+		constexpr auto snorm_negative_y = hlsl::vector<int8_t, 4>(0, snorm_neg_one, 0, 0);
+    constexpr auto snorm_positive_z = hlsl::vector<int8_t, 4>(0, 0, snorm_one, 0);
+		constexpr auto snorm_negative_z = hlsl::vector<int8_t, 4>(0, 0, snorm_neg_one, 0);
+
+		constexpr auto snorm_all_ones = hlsl::vector<int8_t, 4>(snorm_one, snorm_one, snorm_one, snorm_one);
+
+}
+
 static uint8_t packSnorm(float val)
 {
 	return round(hlsl::clamp(val, -1.0f, 1.0f) * 127);
 }
 
+template <typename ElementT>
+  requires(std::is_same_v<ElementT, uint8_t> || std::is_same_v<ElementT, uint16_t>)
+static ICPUPolygonGeometry::SDataView createUvView(size_t vertexCount)
+{
+	const auto elementCount = 2;
+	const auto attrSize = sizeof(ElementT) * elementCount;
+  auto buff = ICPUBuffer::create({{attrSize * vertexCount,IBuffer::EUF_NONE}});
+  hlsl::shapes::AABB<4, ElementT> aabb;
+  aabb.minVx = hlsl::vector<ElementT, 4>(0,0,0,0);
+  aabb.maxVx = hlsl::vector<ElementT, 4>(std::numeric_limits<ElementT>::max(), std::numeric_limits<ElementT>::max(), 0, 0);
+
+	auto retval = ICPUPolygonGeometry::SDataView{
+		.composed = {
+      .stride = attrSize,
+		},
+		.src = {
+			.offset = 0,
+			.size = buff->getSize(),
+			.buffer = std::move(buff),
+		}
+	};
+
+	if constexpr(std::is_same_v<ElementT, uint8_t>)
+	{
+		retval.composed.encodedDataRange.u8 = aabb;
+		retval.composed.format = EF_R8G8_UNORM;
+		retval.composed.rangeFormat = IGeometryBase::EAABBFormat::U8_NORM;
+	}
+	else if constexpr(std::is_same_v<ElementT, uint16_t>)
+	{
+		retval.composed.encodedDataRange.u16 = aabb;
+		retval.composed.format = EF_R16G16_UNORM;
+		retval.composed.rangeFormat = IGeometryBase::EAABBFormat::U16_NORM;
+	}
+
+	return retval;
+}
+
+template <typename IndexT>
+  requires(std::is_same_v<IndexT, uint16_t> || std::is_same_v<IndexT, uint32_t>)
+static ICPUPolygonGeometry::SDataView createIndexView(size_t indexCount, size_t maxIndex)
+{
+  
+  const auto bytesize = sizeof(IndexT) * indexCount;
+  auto indices = ICPUBuffer::create({bytesize,IBuffer::EUF_INDEX_BUFFER_BIT});
+
+  hlsl::shapes::AABB<4,IndexT> aabb;
+  aabb.minVx[0] = 0;
+  aabb.maxVx[0] = maxIndex;
+
+	auto retval = ICPUPolygonGeometry::SDataView{
+	  .composed = {
+      .stride = sizeof(IndexT),
+    },
+    .src = {.offset = 0,.size = bytesize,.buffer = std::move(indices)},
+	};
+
+	if constexpr(std::is_same_v<IndexT, uint16_t>)
+	{
+		retval.composed.encodedDataRange.u16 = aabb;
+		retval.composed.format = EF_R16_UINT;
+		retval.composed.rangeFormat = IGeometryBase::EAABBFormat::U16;
+	}
+	else if constexpr(std::is_same_v<IndexT, uint32_t>)
+	{
+		retval.composed.encodedDataRange.u32 = aabb;
+		retval.composed.format = EF_R32_UINT;
+		retval.composed.rangeFormat = IGeometryBase::EAABBFormat::U32;
+	}
+
+	return retval;
+}
+
+template <size_t ElementCountV = 3>
+  requires(ElementCountV > 0 && ElementCountV <= 4)
+static ICPUPolygonGeometry::SDataView createPositionView(size_t positionCount, const hlsl::shapes::AABB<4, hlsl::float32_t>& aabb)
+{
+	using position_t = hlsl::vector<hlsl::float32_t, ElementCountV>;
+	constexpr auto AttrSize = sizeof(position_t);
+  auto buff = ICPUBuffer::create({AttrSize * positionCount,IBuffer::EUF_NONE});
+
+	constexpr auto format = []()
+	{
+    if constexpr (ElementCountV == 1) return EF_R32_SFLOAT;
+    if constexpr (ElementCountV == 2) return EF_R32G32_SFLOAT;
+    if constexpr (ElementCountV == 3) return EF_R32G32B32_SFLOAT;
+    if constexpr (ElementCountV == 4) return EF_R32G32B32A32_SFLOAT;
+  }();
+
+	return {
+		.composed = {
+			.encodedDataRange = {.f32 = aabb},
+			.stride = AttrSize,
+			.format = format,
+			.rangeFormat = IGeometryBase::EAABBFormat::F32
+		},
+		.src = {.offset = 0,.size = buff->getSize(),.buffer = std::move(buff)}
+	};
+}
+
+static ICPUPolygonGeometry::SDataView createSnormNormalView(size_t normalCount, const hlsl::shapes::AABB<4, int8_t>& aabb)
+{
+	constexpr auto AttrSize = sizeof(snorm_normal_t);
+  auto buff = ICPUBuffer::create({AttrSize * normalCount,IBuffer::EUF_NONE});
+  return {
+    .composed = {
+      .encodedDataRange = {.s8=aabb},
+      .stride = AttrSize,
+      .format = EF_R8G8B8A8_SNORM,
+      .rangeFormat = IGeometryBase::EAABBFormat::S8_NORM
+    },
+    .src = {.offset=0,.size=buff->getSize(),.buffer=std::move(buff)}
+  };
+}
+
 core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCube(const hlsl::float32_t3 size) const
 {
 	using namespace hlsl;
@@ -27,13 +158,15 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCube(const h
 	auto retval = core::make_smart_refctd_ptr<ICPUPolygonGeometry>();
 	retval->setIndexing(IPolygonGeometryBase::TriangleList());
 
+	constexpr auto CubeUniqueVertices = 24;
+
 	// Create indices
 	using index_t = uint16_t;
 	{
-		constexpr auto IndexCount = 36u;
-		constexpr auto bytesize = sizeof(index_t) * IndexCount;
-		auto indices = ICPUBuffer::create({bytesize,IBuffer::EUF_INDEX_BUFFER_BIT});
-		auto u = reinterpret_cast<index_t*>(indices->getPointer());
+		constexpr auto IndexCount = 36;
+		constexpr auto MaxIndex = CubeUniqueVertices - 1;
+		auto indexView = createIndexView<index_t>(IndexCount, MaxIndex);
+		auto u = reinterpret_cast<index_t*>(indexView.src.buffer->getPointer());
 		for (uint32_t i=0u; i<6u; ++i)
 		{
 			u[i*6+0] = 4*i+0;
@@ -43,84 +176,42 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCube(const h
 			u[i*6+4] = 4*i+2;
 			u[i*6+5] = 4*i+3;
 		}
-		shapes::AABB<4,index_t> aabb;
-		aabb.minVx[0] = 0;
-		aabb.maxVx[0] = 23;
-		retval->setIndexView({
-			.composed = {
-				.encodedDataRange = {.u16=aabb},
-				.stride = sizeof(index_t),
-				.format = EF_R16_UINT,
-				.rangeFormat = IGeometryBase::EAABBFormat::U16
-			},
-			.src = {.offset=0,.size=bytesize,.buffer=std::move(indices)}
-		});
+		retval->setIndexView(std::move(indexView));
 	}
 
-	constexpr auto CubeUniqueVertices = 24;
 
 	// Create vertex attributes with NONE usage because we have no clue how they'll be used
 	hlsl::float32_t3* positions;
+
 	// for now because no reliable RGB10A2 encode and scant support for 24-bit UTB formats
-	hlsl::vector<int8_t,4>* normals;
-	hlsl::vector<uint8_t,2>* uvs;
+	snorm_normal_t* normals;
+
+	using UvElementT = uint8_t;
+	constexpr auto MaxUvVal = std::numeric_limits<UvElementT>::max();
+	hlsl::vector<UvElementT,2>* uvs;
 	{
 		{
-			constexpr auto AttrSize = sizeof(decltype(*positions));
-			auto buff = ICPUBuffer::create({AttrSize*CubeUniqueVertices,IBuffer::EUF_NONE});
-			positions = reinterpret_cast<decltype(positions)>(buff->getPointer());
 			shapes::AABB<4,float32_t> aabb;
 			aabb.maxVx = float32_t4(size*0.5f,0.f);
-			aabb.minVx = -aabb.maxVx;
-			retval->visitAABB([aabb](auto& ref)->void
-				{
-					ref.minVx = hlsl::trunc(aabb.minVx);
-					ref.maxVx = hlsl::trunc(aabb.maxVx);
-				}
-			);
-			retval->setPositionView({
-				.composed = {
-					.encodedDataRange = {.f32=aabb},
-					.stride = AttrSize,
-					.format = EF_R32G32B32_SFLOAT,
-					.rangeFormat = IGeometryBase::EAABBFormat::F32
-				},
-				.src = {.offset=0,.size=buff->getSize(),.buffer = std::move(buff)}
-			});
+			aabb.minVx = - aabb.maxVx;
+
+			auto positionView = createPositionView(CubeUniqueVertices, aabb);
+			positions = reinterpret_cast<decltype(positions)>(positionView.src.buffer->getPointer());
+			retval->setPositionView(std::move(positionView));
 		}
 		{
-			constexpr auto AttrSize = sizeof(decltype(*normals));
-			auto buff = ICPUBuffer::create({AttrSize*CubeUniqueVertices,IBuffer::EUF_NONE});
-			normals = reinterpret_cast<decltype(normals)>(buff->getPointer());
 			shapes::AABB<4,int8_t> aabb;
-			aabb.maxVx = hlsl::vector<int8_t,4>(127,127,127,0);
+			aabb.maxVx = snorm_all_ones;
 			aabb.minVx = -aabb.maxVx;
-			retval->setNormalView({
-				.composed = {
-					.encodedDataRange = {.s8=aabb},
-					.stride = AttrSize,
-					.format = EF_R8G8B8A8_SNORM,
-					.rangeFormat = IGeometryBase::EAABBFormat::S8_NORM
-				},
-				.src = {.offset=0,.size=buff->getSize(),.buffer=std::move(buff)}
-			});
+			auto normalView = createSnormNormalView(CubeUniqueVertices, aabb);
+			normals = reinterpret_cast<decltype(normals)>(normalView.src.buffer->getPointer());
+			retval->setNormalView(std::move(normalView));
 		}
+
 		{
-			constexpr auto AttrSize = sizeof(decltype(*uvs));
-			auto buff = ICPUBuffer::create({AttrSize*CubeUniqueVertices,IBuffer::EUF_NONE});
-			uvs = reinterpret_cast<decltype(uvs)>(buff->getPointer());
-			shapes::AABB<4,uint8_t> aabb;
-			aabb.minVx = hlsl::vector<uint8_t,4>(0,0,0,0);
-			aabb.maxVx = hlsl::vector<uint8_t,4>(255,255,0,0);
-			retval->getAuxAttributeViews()->push_back({
-				.composed = {
-					.encodedDataRange = {.u8=aabb},
-					.stride = AttrSize,
-					.format = EF_R8G8_UNORM,
-					.rangeFormat = IGeometryBase::EAABBFormat::U8_NORM
-				},
-				.src = {.offset=0,.size=buff->getSize(),.buffer=std::move(buff)}
-			});
+      auto uvView = createUvView<UvElementT>(CubeUniqueVertices);
+			uvs = reinterpret_cast<decltype(uvs)>(uvView.src.buffer->getPointer());
+			retval->getAuxAttributeViews()->push_back(std::move(uvView));
 		}
 	}
 
@@ -165,30 +256,31 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCube(const h
 
 	//
 	{
-		const hlsl::vector<int8_t, 3> norm[6] =
+		const snorm_normal_t norm[6] =
 		{
-			hlsl::vector<int8_t,3>(0, 0, 1),
-			hlsl::vector<int8_t,3>(127, 0, 0),
-			hlsl::vector<int8_t,3>(0, 0,-127),
-			hlsl::vector<int8_t,3>(-127, 0, 0),
-			hlsl::vector<int8_t,3>(0, 127, 0),
-			hlsl::vector<int8_t,3>(0,-127, 0)
+			snorm_positive_z,
+			snorm_positive_x,
+			snorm_negative_z,
+			snorm_negative_x,
+			snorm_positive_y,
+			snorm_negative_y
 		};
-		const hlsl::vector<uint8_t, 2> uv[4] =
+		const hlsl::vector<UvElementT, 2> uv[4] =
 		{
-			hlsl::vector<uint8_t,2>(  0,255),
-			hlsl::vector<uint8_t,2>(255,255),
-			hlsl::vector<uint8_t,2>(255,  0),
-			hlsl::vector<uint8_t,2>(  0,  0)
+			hlsl::vector<UvElementT,2>(  0, MaxUvVal),
+			hlsl::vector<UvElementT,2>(MaxUvVal, MaxUvVal),
+			hlsl::vector<UvElementT,2>(MaxUvVal,  0),
+			hlsl::vector<UvElementT,2>(  0,  0)
 		};
-		for (size_t f=0ull; f<6ull; ++f)
+
+		for (size_t f = 0ull; f < 6ull; ++f)
 		{
-			const size_t v = f*4ull;
+			const size_t v = f * 4ull;
 
-			for (size_t i=0ull; i<4ull; ++i)
+			for (size_t i = 0ull; i < 4ull; ++i)
 			{
-				normals[v+i] = vector<int8_t,4>(norm[f],0);
-				uvs[v+i] = uv[i];
+				normals[v + i] = snorm_normal_t(norm[f]);
+				uvs[v + i] = uv[i];
 			}
 		}
 	}
@@ -218,10 +310,11 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createSphere(float
 	// Create indices
 	{
 		using index_t = uint32_t;
+
 		const auto indexCount = (polyCountX * polyCountY) * 6;
-		const auto bytesize = sizeof(index_t) * indexCount;
-		auto indices = ICPUBuffer::create({bytesize,IBuffer::EUF_INDEX_BUFFER_BIT});
-		auto indexPtr = reinterpret_cast<index_t*>(indices->getPointer());
+		auto indexView = createIndexView<index_t>(indexCount, vertexCount - 1);
+		auto indexPtr = reinterpret_cast<index_t*>(indexView.src.buffer->getPointer());
+
 		uint32_t level = 0;
 		size_t indexAddIx = 0;
 		for (uint32_t p1 = 0; p1 < polyCountY - 1; ++p1)
@@ -280,18 +373,8 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createSphere(float
 		indexPtr[indexAddIx++] = polyCountSqM1;
 		indexPtr[indexAddIx++] = polyCountSq1;
 
-		shapes::AABB<4,index_t> aabb;
-		aabb.minVx[0] = 0;
-		aabb.maxVx[0] = vertexCount - 1;
-		retval->setIndexView({
-			.composed = {
-				.encodedDataRange = {.u32=aabb},
-				.stride = sizeof(index_t),
-				.format = EF_R16_UINT,
-				.rangeFormat = IGeometryBase::EAABBFormat::U16
-			},
-			.src = {.offset=0,.size=bytesize,.buffer=std::move(indices)}
-		});
+		retval->setIndexView(std::move(indexView));
+
 	}
 
 	constexpr auto NormalCacheFormat = EF_R8G8B8_SNORM;
@@ -299,71 +382,32 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createSphere(float
 
 	// Create vertex attributes with NONE usage because we have no clue how they'll be used
 	hlsl::float32_t3* positions;
-	hlsl::vector<int8_t, 4>* normals;
-	hlsl::vector<uint16_t, 2>* uvs;
+
+	snorm_normal_t* normals;
+
+	using UvElementT = uint16_t;
+	hlsl::vector<UvElementT, 2>* uvs;
 	{
 		{
-			constexpr auto AttrSize = sizeof(decltype(*positions));
-			auto buff = ICPUBuffer::create({{AttrSize * vertexCount,IBuffer::EUF_NONE}});
-			positions = reinterpret_cast<decltype(positions)>(buff->getPointer());
 			shapes::AABB<4, float32_t> aabb;
 			aabb.maxVx = float32_t4(radius, radius, radius, 0.0f);
 			aabb.minVx = float32_t4(-radius, -radius, -radius, 0.0f);
-			retval->setPositionView({
-				.composed = {
-					.encodedDataRange = {.f32 = aabb},
-					.stride = AttrSize,
-					.format = EF_R32G32B32_SFLOAT,
-					.rangeFormat = IGeometryBase::EAABBFormat::F32
-				},
-				.src = {
-					.offset=0,
-					.size = buff->getSize(),
-					.buffer = std::move(buff),
-				}
-			});
+			auto positionView = createPositionView(vertexCount, aabb);
+			positions = reinterpret_cast<decltype(positions)>(positionView.src.buffer->getPointer());
+			retval->setPositionView(std::move(positionView));
 		}
 		{
-			constexpr auto AttrSize = sizeof(decltype(*normals));
-			auto buff = ICPUBuffer::create({{AttrSize * vertexCount,IBuffer::EUF_NONE}});
-			normals = reinterpret_cast<decltype(normals)>(buff->getPointer());
 			shapes::AABB<4, int8_t> aabb;
-			aabb.maxVx = hlsl::vector<int8_t,4>(127,127,127,0);
+			aabb.maxVx = snorm_all_ones;
 			aabb.minVx = -aabb.maxVx;
-			retval->setNormalView({
-				.composed = {
-					.encodedDataRange = {.s8=aabb},
-					.stride = AttrSize,
-					.format = NormalFormat,
-					.rangeFormat = IGeometryBase::EAABBFormat::S8_NORM
-				},
-				.src = {
-					.offset = 0,
-					.size = buff->getSize(),
-					.buffer = std::move(buff)
-				}
-			});
+			auto normalView = createSnormNormalView(vertexCount, aabb);
+			normals = reinterpret_cast<decltype(normals)>(normalView.src.buffer->getPointer());
+			retval->setNormalView(std::move(normalView));
 		}
 		{
-			constexpr auto AttrSize = sizeof(decltype(*uvs));
-			auto buff = ICPUBuffer::create({{AttrSize * vertexCount,IBuffer::EUF_NONE}});
-			uvs = reinterpret_cast<decltype(uvs)>(buff->getPointer());
-			shapes::AABB<4, uint16_t> aabb;
-			aabb.minVx = hlsl::vector<uint8_t, 4>(0,0,0,0);
-			aabb.maxVx = hlsl::vector<uint8_t, 4>(255,255,0,0);
-			retval->getAuxAttributeViews()->push_back({
-				.composed = {
-					.encodedDataRange = {.u16=aabb},
-					.stride = AttrSize,
-					.format = EF_R8G8_UNORM,
-					.rangeFormat = IGeometryBase::EAABBFormat::U8_NORM
-				},
-				.src = {
-					.offset = 0,
-					.size = buff->getSize(),
-					.buffer = std::move(buff),
-				}
-			});
+			auto uvView = createUvView<UvElementT>(vertexCount);
+			uvs = reinterpret_cast<decltype(uvs)>(uvView.src.buffer->getPointer());
+			retval->getAuxAttributeViews()->push_back(std::move(uvView));
 		}
 	}
 
@@ -468,9 +512,9 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCylinder(
 	{
 		constexpr uint32_t RowCount = 2u;
 		const auto IndexCount = RowCount * 3 * tesselation;
-		const auto bytesize = sizeof(index_t) * IndexCount;
-		auto indices = ICPUBuffer::create({bytesize,IBuffer::EUF_INDEX_BUFFER_BIT});
-		auto u = reinterpret_cast<index_t*>(indices->getPointer());
+		auto indexView = createIndexView<index_t>(IndexCount, vertexCount - 1);
+		auto u = reinterpret_cast<index_t*>(indexView.src.buffer->getPointer());
+
 		for (uint16_t i = 0u, j = 0u; i < halfIx; ++i)
 		{
 			u[j++] = i;
@@ -481,18 +525,7 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCylinder(
 			u[j++] = (i + 1u)!= halfIx ? (i + 1u + halfIx) : halfIx;
 		}
 
-		shapes::AABB<4,index_t> aabb;
-		aabb.minVx[0] = 0;
-		aabb.maxVx[0] = vertexCount - 1;
-		retval->setIndexView({
-			.composed = {
-				.encodedDataRange = {.u16=aabb},
-				.stride = sizeof(index_t),
-				.format = EF_R16_UINT,
-				.rangeFormat = IGeometryBase::EAABBFormat::U16
-			},
-			.src = {.offset=0,.size=bytesize,.buffer=std::move(indices)}
-		});
+		retval->setIndexView(std::move(indexView));
 	}
 
 	constexpr auto NormalCacheFormat = EF_R8G8B8_SNORM;
@@ -500,71 +533,32 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCylinder(
 
 	// Create vertex attributes with NONE usage because we have no clue how they'll be used
 	hlsl::float32_t3* positions;
-	hlsl::vector<int8_t, 4>* normals;
-	hlsl::vector<uint16_t, 2>* uvs;
+
+	snorm_normal_t* normals;
+
+	using UvElementT = uint16_t;
+	hlsl::vector<UvElementT, 2>* uvs;
 	{
 		{
-			constexpr auto AttrSize = sizeof(decltype(*positions));
-			auto buff = ICPUBuffer::create({{AttrSize * vertexCount,IBuffer::EUF_NONE}});
-			positions = reinterpret_cast<decltype(positions)>(buff->getPointer());
 			shapes::AABB<4, float32_t> aabb;
 			aabb.maxVx = float32_t4(radius, radius, length, 0.0f);
 			aabb.minVx = float32_t4(-radius, -radius, 0.0f, 0.0f);
-			retval->setPositionView({
-				.composed = {
-					.encodedDataRange = {.f32 = aabb},
-					.stride = AttrSize,
-					.format = EF_R32G32B32_SFLOAT,
-					.rangeFormat = IGeometryBase::EAABBFormat::F32
-				},
-				.src = {
-					.offset=0,
-					.size = buff->getSize(),
-					.buffer = std::move(buff),
-				}
-			});
+			auto positionView = createPositionView(vertexCount, aabb);
+			positions = reinterpret_cast<decltype(positions)>(positionView.src.buffer->getPointer());
+			retval->setPositionView(std::move(positionView));
 		}
 		{
-			constexpr auto AttrSize = sizeof(decltype(*normals));
-			auto buff = ICPUBuffer::create({{AttrSize * vertexCount,IBuffer::EUF_NONE}});
-			normals = reinterpret_cast<decltype(normals)>(buff->getPointer());
 			shapes::AABB<4, int8_t> aabb;
 			aabb.maxVx = hlsl::vector<int8_t,4>(127,127,127,0);
 			aabb.minVx = -aabb.maxVx;
-			retval->setNormalView({
-				.composed = {
-					.encodedDataRange = {.s8=aabb},
-					.stride = AttrSize,
-					.format = NormalFormat,
-					.rangeFormat = IGeometryBase::EAABBFormat::S8_NORM
-				},
-				.src = {
-					.offset = 0,
-					.size = buff->getSize(),
-					.buffer = std::move(buff)
-				}
-			});
+			auto normalView = createSnormNormalView(vertexCount, aabb);
+			normals = reinterpret_cast<decltype(normals)>(normalView.src.buffer->getPointer());
+			retval->setNormalView(std::move(normalView));
 		}
 		{
-			constexpr auto AttrSize = sizeof(decltype(*uvs));
-			auto buff = ICPUBuffer::create({{AttrSize * vertexCount,IBuffer::EUF_NONE}});
-			uvs = reinterpret_cast<decltype(uvs)>(buff->getPointer());
-			shapes::AABB<4, uint8_t> aabb;
-			aabb.minVx = hlsl::vector<uint8_t, 4>(0,0,0,0);
-			aabb.maxVx = hlsl::vector<uint8_t, 4>(255,255,0,0);
-			retval->getAuxAttributeViews()->push_back({
-				.composed = {
-					.encodedDataRange = {.u8=aabb},
-					.stride = AttrSize,
-					.format = EF_R8G8_UNORM,
-					.rangeFormat = IGeometryBase::EAABBFormat::U8_NORM
-				},
-				.src = {
-					.offset = 0,
-					.size = buff->getSize(),
-					.buffer = std::move(buff),
-				}
-			});
+			auto uvView = createUvView<UvElementT>(vertexCount);
+			uvs = reinterpret_cast<decltype(uvs)>(uvView.src.buffer->getPointer());
+			retval->getAuxAttributeViews()->push_back(std::move(uvView));
 		}
 	}
 
@@ -612,11 +606,13 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCone(
 	{
 		constexpr uint32_t RowCount = 2u;
 		const auto IndexCount = 3 * tesselation;
-		const auto bytesize = sizeof(index_t) * IndexCount;
-		auto indices = ICPUBuffer::create({bytesize,IBuffer::EUF_INDEX_BUFFER_BIT});
-		auto u = reinterpret_cast<index_t*>(indices->getPointer());
+
+		auto indexView = createIndexView<index_t>(IndexCount, vertexCount - 1);
+		auto u = reinterpret_cast<index_t*>(indexView.src.buffer->getPointer());
+
 		const uint32_t firstIndexOfBaseVertices = 0;
 		const uint32_t firstIndexOfApexVertices = tesselation;
+
 		for (uint32_t i = 0; i < tesselation; i++)
 		{
 			u[i * 3] = firstIndexOfApexVertices + i;
@@ -624,18 +620,7 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCone(
 			u[(i * 3) + 2] = i == (tesselation - 1) ? firstIndexOfBaseVertices : firstIndexOfBaseVertices + i + 1;
 		}
 
-		shapes::AABB<4,index_t> aabb;
-		aabb.minVx[0] = 0;
-		aabb.maxVx[0] = vertexCount - 1;
-		retval->setIndexView({
-			.composed = {
-				.encodedDataRange = {.u16=aabb},
-				.stride = sizeof(index_t),
-				.format = EF_R16_UINT,
-				.rangeFormat = IGeometryBase::EAABBFormat::U16
-			},
-			.src = {.offset=0,.size=bytesize,.buffer=std::move(indices)}
-		});
+		retval->setIndexView(std::move(indexView));
 	}
 
 	constexpr auto NormalCacheFormat = EF_R8G8B8_SNORM;
@@ -646,46 +631,20 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCone(
 	hlsl::vector<int8_t, 4>* normals;
 	{
 		{
-			constexpr auto AttrSize = sizeof(decltype(*positions));
-			auto buff = ICPUBuffer::create({{AttrSize * vertexCount,IBuffer::EUF_NONE}});
-			positions = reinterpret_cast<decltype(positions)>(buff->getPointer());
 			shapes::AABB<4, float32_t> aabb;
 			aabb.maxVx = float32_t4(radius, radius, length, 0.0f);
 			aabb.minVx = float32_t4(-radius, -radius, 0.0f, 0.0f);
-			retval->setPositionView({
-				.composed = {
-					.encodedDataRange = {.f32 = aabb},
-					.stride = AttrSize,
-					.format = EF_R32G32B32_SFLOAT,
-					.rangeFormat = IGeometryBase::EAABBFormat::F32
-				},
-				.src = {
-					.offset=0,
-					.size = buff->getSize(),
-					.buffer = std::move(buff),
-				}
-			});
+			auto positionView = createPositionView(vertexCount, aabb);
+			positions = reinterpret_cast<decltype(positions)>(positionView.src.buffer->getPointer());
+			retval->setPositionView(std::move(positionView));
 		}
 		{
-			constexpr auto AttrSize = sizeof(decltype(*normals));
-			auto buff = ICPUBuffer::create({{AttrSize * vertexCount,IBuffer::EUF_NONE}});
-			normals = reinterpret_cast<decltype(normals)>(buff->getPointer());
 			shapes::AABB<4, int8_t> aabb;
-			aabb.maxVx = hlsl::vector<int8_t,4>(127,127,127,0);
+			aabb.maxVx = snorm_all_ones;
 			aabb.minVx = -aabb.maxVx;
-			retval->setNormalView({
-				.composed = {
-					.encodedDataRange = {.s8=aabb},
-					.stride = AttrSize,
-					.format = NormalFormat,
-					.rangeFormat = IGeometryBase::EAABBFormat::S8_NORM
-				},
-				.src = {
-					.offset = 0,
-					.size = buff->getSize(),
-					.buffer = std::move(buff)
-				}
-			});
+			auto normalView = createSnormNormalView(vertexCount, aabb);
+			normals = reinterpret_cast<decltype(normals)>(normalView.src.buffer->getPointer());
+			retval->setNormalView(std::move(normalView));
 		}
 	}
 
@@ -779,94 +738,56 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createRectangle(co
 		3---2
 		*/
 		const index_t indices[] = {0,3,1,1,3,2};
-		auto buffer = ICPUBuffer::create({
-			{sizeof(indices),IBuffer::EUF_INDEX_BUFFER_BIT},
-			const_cast<void*>((const void*)indices) // TODO: temporary till two different creation params (adopting needs non const void, copying needs const void only
-		});
-		shapes::AABB<4,index_t> aabb;
-		aabb.minVx[0] = 0;
-		aabb.maxVx[0] = 3;
-		retval->setIndexView({
-			.composed = {
-				.encodedDataRange = {.u16=aabb},
-				.stride = sizeof(index_t),
-				.format = EF_R16_UINT,
-				.rangeFormat = IGeometryBase::EAABBFormat::U16
-			},
-			.src = {.offset=0,.size=buffer->getSize(),.buffer=std::move(buffer)}
-		});
+		auto indexView = createIndexView<index_t>(std::size(indices), 3);
+		memcpy(indexView.src.buffer->getPointer(), indices, sizeof(indices));
+		retval->setIndexView(std::move(indexView));
 	}
 
+	constexpr auto VertexCount = 4;
 	// Create vertices
 	{
 		{
-			const hlsl::float32_t2 positions[] = {
+			const hlsl::float32_t2 positions[VertexCount] = {
 				hlsl::float32_t2(-size.x, size.y),
 				hlsl::float32_t2( size.x, size.y),
 				hlsl::float32_t2( size.x,-size.y),
 				hlsl::float32_t2(-size.x,-size.y)
 			};
-			auto buff = ICPUBuffer::create({{sizeof(positions),IBuffer::EUF_NONE},(void*)positions});
 			shapes::AABB<4,float32_t> aabb;
 			aabb.minVx = float32_t4(-size,0.f,0.f);
 			aabb.maxVx = float32_t4( size,0.f,0.f);
-			retval->visitAABB([aabb](auto& ref)->void
-				{
-					ref.minVx = hlsl::trunc(aabb.minVx);
-					ref.maxVx = hlsl::trunc(aabb.maxVx);
-				}
-			);
-			retval->setPositionView({
-				.composed = {
-					.encodedDataRange = {.f32=aabb},
-					.stride = sizeof(positions[0]),
-					.format = EF_R32G32_SFLOAT,
-					.rangeFormat = IGeometryBase::EAABBFormat::F32
-				},
-				.src = {.offset=0,.size=buff->getSize(),.buffer = std::move(buff)}
-			});
+			auto positionView = createPositionView<2>(VertexCount, aabb);
+			memcpy(positionView.src.buffer->getPointer(), positions, sizeof(positions));
+			retval->setPositionView(std::move(positionView));
 		}
 		{
-			const hlsl::vector<int8_t,4> normals[] = {
-				hlsl::vector<int8_t,4>(0,0,127,0),
-				hlsl::vector<int8_t,4>(0,0,127,0),
-				hlsl::vector<int8_t,4>(0,0,127,0),
-				hlsl::vector<int8_t,4>(0,0,127,0)
+			const hlsl::vector<int8_t,4> normals[VertexCount] = {
+				snorm_positive_z,
+				snorm_positive_z,
+				snorm_positive_z,
+				snorm_positive_z,
 			};
-			auto buff = ICPUBuffer::create({{sizeof(normals),IBuffer::EUF_NONE},(void*)normals});
 			shapes::AABB<4,int8_t> aabb;
-			aabb.maxVx = hlsl::vector<int8_t,4>(0,0,127,0);
-			aabb.minVx = -aabb.maxVx;
-			retval->setNormalView({
-				.composed = {
-					.encodedDataRange = {.s8=aabb},
-					.stride = sizeof(normals[0]),
-					.format = EF_R8G8B8A8_SNORM,
-					.rangeFormat = IGeometryBase::EAABBFormat::S8_NORM
-				},
-				.src = {.offset=0,.size=buff->getSize(),.buffer=std::move(buff)}
-			});
+			aabb.maxVx = snorm_positive_z;
+			aabb.minVx = snorm_normal_t(0, 0, 0, 0);
+			auto normalView = createSnormNormalView(VertexCount, aabb);
+			memcpy(normalView.src.buffer->getPointer(), normals, sizeof(normals));
+			retval->setNormalView(std::move(normalView));
 		}
 		{
-			const hlsl::vector<uint8_t,2> uvs[] = {
-				hlsl::vector<uint8_t,2>(  0,255),
-				hlsl::vector<uint8_t,2>(255,255),
-				hlsl::vector<uint8_t,2>(255,  0),
-				hlsl::vector<uint8_t,2>(  0,  0)
+			using UvElementT = uint8_t;
+			constexpr auto MaxUvVal = std::numeric_limits<UvElementT>::max();
+			const hlsl::vector<UvElementT, 2> uvsData[VertexCount] = {
+				hlsl::vector<UvElementT,2>(  0, MaxUvVal),
+				hlsl::vector<UvElementT,2>(MaxUvVal, MaxUvVal),
+				hlsl::vector<UvElementT,2>(MaxUvVal,  0),
+				hlsl::vector<UvElementT,2>(  0,  0)
 			};
-			auto buff = ICPUBuffer::create({{sizeof(uvs),IBuffer::EUF_NONE},(void*)uvs});
-			shapes::AABB<4,uint8_t> aabb;
-			aabb.minVx = hlsl::vector<uint8_t,4>(0,0,0,0);
-			aabb.maxVx = hlsl::vector<uint8_t,4>(255,255,0,0);
-			retval->getAuxAttributeViews()->push_back({
-				.composed = {
-					.encodedDataRange = {.u8=aabb},
-					.stride = sizeof(uvs[0]),
-					.format = EF_R8G8_UNORM,
-					.rangeFormat = IGeometryBase::EAABBFormat::U8_NORM
-				},
-				.src = {.offset=0,.size=buff->getSize(),.buffer=std::move(buff)}
-			});
+			hlsl::vector<UvElementT, 2>* uvs;
+			auto uvView = createUvView<UvElementT>(VertexCount);
+			uvs = reinterpret_cast<decltype(uvs)>(uvView.src.buffer->getPointer());
+			memcpy(uvs, uvsData, sizeof(uvsData));
+			retval->getAuxAttributeViews()->push_back(std::move(uvView));
 		}
 	}
 
@@ -889,68 +810,36 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createDisk(const f
 	const size_t vertexCount = 2u + tesselation;
 
 	float32_t2* positions;
+
 	// for now because no reliable RGB10A2 encode and scant support for 24-bit UTB formats
-	hlsl::vector<int8_t,4>* normals;
+	snorm_normal_t* normals;
 	//
-	constexpr uint16_t UnityUV = 0xffffu;
-	uint16_t2* uvs;
+	using UvElementT = uint16_t;
+	constexpr uint16_t UnityUV = std::numeric_limits<UvElementT>::max();
+	hlsl::vector<UvElementT, 2>* uvs;
 	{
 		{
-			constexpr auto AttrSize = sizeof(decltype(*positions));
-			auto buff = ICPUBuffer::create({AttrSize*vertexCount,IBuffer::EUF_NONE});
-			positions = reinterpret_cast<decltype(positions)>(buff->getPointer());
 			shapes::AABB<4,float32_t> aabb;
-			aabb.maxVx = float32_t4(radius,radius,0.f,0.f);
+			aabb.maxVx = float32_t4(radius,radius, 0.f, 0.f);
 			aabb.minVx = -aabb.maxVx;
-			retval->visitAABB([aabb](auto& ref)->void
-				{
-					ref.minVx = hlsl::trunc(aabb.minVx);
-					ref.maxVx = hlsl::trunc(aabb.maxVx);
-				}
-			);
-			retval->setPositionView({
-				.composed = {
-					.encodedDataRange = {.f32=aabb},
-					.stride = AttrSize,
-					.format = EF_R32G32_SFLOAT,
-					.rangeFormat = IGeometryBase::EAABBFormat::F32
-				},
-				.src = {.offset=0,.size=buff->getSize(),.buffer = std::move(buff)}
-			});
+			auto positionView = createPositionView<2>(vertexCount, aabb);
+			positions = reinterpret_cast<decltype(positions)>(positionView.src.buffer->getPointer());
+			retval->setPositionView(std::move(positionView));
 		}
 		{
 			constexpr auto AttrSize = sizeof(decltype(*normals));
 			auto buff = ICPUBuffer::create({AttrSize*vertexCount,IBuffer::EUF_NONE});
-			normals = reinterpret_cast<decltype(normals)>(buff->getPointer());
 			shapes::AABB<4,int8_t> aabb;
-			aabb.maxVx = hlsl::vector<int8_t,4>(0,0,127,0);
+			aabb.maxVx = snorm_positive_z;
 			aabb.minVx = -aabb.maxVx;
-			retval->setNormalView({
-				.composed = {
-					.encodedDataRange = {.s8=aabb},
-					.stride = AttrSize,
-					.format = EF_R8G8B8A8_SNORM,
-					.rangeFormat = IGeometryBase::EAABBFormat::S8_NORM
-				},
-				.src = {.offset=0,.size=buff->getSize(),.buffer=std::move(buff)}
-			});
+			auto normalView = createSnormNormalView(vertexCount, aabb);
+			normals = reinterpret_cast<decltype(normals)>(normalView.src.buffer->getPointer());
+			retval->setNormalView(std::move(normalView));
 		}
 		{
-			constexpr auto AttrSize = sizeof(decltype(*uvs));
-			auto buff = ICPUBuffer::create({AttrSize*vertexCount,IBuffer::EUF_NONE});
-			uvs = reinterpret_cast<decltype(uvs)>(buff->getPointer());
-			shapes::AABB<4,uint16_t> aabb;
-			aabb.minVx = uint16_t4(0,0,0,0);
-			aabb.maxVx = uint16_t4(UnityUV,UnityUV,0,0);
-			retval->getAuxAttributeViews()->push_back({
-				.composed = {
-					.encodedDataRange = {.u16=aabb},
-					.stride = AttrSize,
-					.format = EF_R16G16_UNORM,
-					.rangeFormat = IGeometryBase::EAABBFormat::U16_NORM
-				},
-				.src = {.offset=0,.size=buff->getSize(),.buffer=std::move(buff)}
-			});
+			auto uvView = createUvView<UvElementT>(vertexCount);
+			uvs = reinterpret_cast<decltype(uvs)>(uvView.src.buffer->getPointer());
+			retval->getAuxAttributeViews()->push_back(std::move(uvView));
 		}
 	}
 
@@ -1955,50 +1844,19 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createIcoSphere(fl
 
 	// Create indices
 	{
-		auto indexBuffer = asset::ICPUBuffer::create({ icosphere.getIndexSize() });
-		memcpy(indexBuffer->getPointer(), icosphere.getIndices(), indexBuffer->getSize());
-
-		shapes::AABB<4,Icosphere::index_t> aabb;
-		aabb.minVx[0] = 0;
-		aabb.maxVx[0] = icosphere.getPositionCount() - 1;
-
-		static_assert(sizeof(Icosphere::index_t) == 2 || sizeof(Icosphere::index_t) == 4);
-		const auto isIndex16Bit = sizeof(Icosphere::index_t) == 2;
-
-		retval->setIndexView({
-			.composed = {
-				.encodedDataRange = {.u32=aabb},
-				.stride = sizeof(Icosphere::index_t),
-				.format = isIndex16Bit ? EF_R16_UINT : EF_R32_UINT,
-				.rangeFormat = isIndex16Bit? IGeometryBase::EAABBFormat::U16 : IGeometryBase::EAABBFormat::U32
-			},
-			.src = {.offset=0,.size=icosphere.getIndexSize(),.buffer = std::move(indexBuffer)}
-		});
+		auto indexView = createIndexView<Icosphere::index_t>(icosphere.getIndexCount(), icosphere.getPositionCount() - 1);
+		memcpy(indexView.src.buffer->getPointer(), icosphere.getIndices(), icosphere.getIndexSize());
+		retval->setIndexView(std::move(indexView));
 	}
 
 	{
 		{
-			using position_t = float32_t3;
-			constexpr auto AttrSize = sizeof(position_t);
-			auto buff = ICPUBuffer::create({ icosphere.getPositionCount() * AttrSize, IBuffer::EUF_NONE });
-			const auto positions = reinterpret_cast<position_t*>(buff->getPointer());
-			memcpy(positions, icosphere.getPositions(), icosphere.getPositionSize());
 			shapes::AABB<4, float32_t> aabb;
 			aabb.maxVx = float32_t4(radius, radius, radius, 0.f);
 			aabb.minVx = -aabb.maxVx;
-			retval->setPositionView({
-				.composed = {
-					.encodedDataRange = {.f32 = aabb},
-					.stride = AttrSize,
-					.format = EF_R32G32B32_SFLOAT,
-					.rangeFormat = IGeometryBase::EAABBFormat::F32
-				},
-				.src = {
-					.offset = 0,
-					.size = buff->getSize(),
-					.buffer = std::move(buff),
-				}
-			});
+			auto positionView = createPositionView(icosphere.getPositionCount(), aabb);
+			memcpy(positionView.src.buffer->getPointer(), icosphere.getPositions(), icosphere.getPositionSize());
+			retval->setPositionView(std::move(positionView));
 		}
 		{
 			using normal_t = float32_t3;
@@ -2020,28 +1878,20 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createIcoSphere(fl
 			});
 		}
 		{
-			using uv_t = uint32_t;
-			constexpr auto AttrSize = sizeof(uv_t);
-			auto buff = ICPUBuffer::create({AttrSize * icosphere.getTexCoordCount(), IBuffer::EUF_NONE});
-			const auto uvs = reinterpret_cast<uv_t*>(buff->getPointer());
-			shapes::AABB<4, uint16_t> aabb;
-			aabb.minVx = uint16_t4(0,0,0,0);
-			aabb.maxVx = uint16_t4(0xFFFF,0xFFFF,0,0);
-			retval->getAuxAttributeViews()->push_back({
-				.composed = {
-					.encodedDataRange = {.u16=aabb},
-					.stride = AttrSize,
-					.format = EF_R16G16_UNORM,
-					.rangeFormat = IGeometryBase::EAABBFormat::U16_NORM
-				},
-				.src = {.offset=0,.size=buff->getSize(),.buffer=std::move(buff)}
-			});
+			using UvElementT = uint16_t;
+      hlsl::vector<UvElementT, 2>* uvs;
+			auto uvView = createUvView<UvElementT>(icosphere.getTexCoordCount());
+			uvs = reinterpret_cast<decltype(uvs)>(uvView.src.buffer->getPointer());
+
 			for (auto uv_i = 0u; uv_i < icosphere.getTexCoordCount(); uv_i++)
 			{
 				const auto texCoords = icosphere.getTexCoords();
 				const auto f32_uv = float32_t2{ texCoords[2 * uv_i], texCoords[(2 * uv_i) + 1] };
-				uvs[uv_i] = packUnorm2x16(f32_uv);
+				const auto u32_uv = packUnorm2x16(f32_uv);
+				memcpy(uvs + uv_i, &u32_uv, sizeof(u32_uv));
 			}
+
+			retval->getAuxAttributeViews()->push_back(std::move(uvView));
 		}
 	}
 

From 489e2f2d73fdad25e82cde1c83abd98e7b1eafce Mon Sep 17 00:00:00 2001
From: kevyuu <kevin.kayu@gmail.com>
Date: Fri, 18 Jul 2025 21:14:46 +0700
Subject: [PATCH 18/40] Slight type naming improvement in geometry creator

---
 src/nbl/asset/utils/CGeometryCreator.cpp | 62 ++++++++++++------------
 1 file changed, 31 insertions(+), 31 deletions(-)

diff --git a/src/nbl/asset/utils/CGeometryCreator.cpp b/src/nbl/asset/utils/CGeometryCreator.cpp
index f2a9515566..3e59d2f8c6 100644
--- a/src/nbl/asset/utils/CGeometryCreator.cpp
+++ b/src/nbl/asset/utils/CGeometryCreator.cpp
@@ -186,9 +186,9 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCube(const h
 	// for now because no reliable RGB10A2 encode and scant support for 24-bit UTB formats
 	snorm_normal_t* normals;
 
-	using UvElementT = uint8_t;
-	constexpr auto MaxUvVal = std::numeric_limits<UvElementT>::max();
-	hlsl::vector<UvElementT,2>* uvs;
+	using uv_element_t = uint8_t;
+	constexpr auto MaxUvVal = std::numeric_limits<uv_element_t>::max();
+	hlsl::vector<uv_element_t,2>* uvs;
 	{
 		{
 			shapes::AABB<4,float32_t> aabb;
@@ -209,7 +209,7 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCube(const h
 		}
 
 		{
-      auto uvView = createUvView<UvElementT>(CubeUniqueVertices);
+      auto uvView = createUvView<uv_element_t>(CubeUniqueVertices);
 			uvs = reinterpret_cast<decltype(uvs)>(uvView.src.buffer->getPointer());
 			retval->getAuxAttributeViews()->push_back(std::move(uvView));
 		}
@@ -265,12 +265,12 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCube(const h
 			snorm_positive_y,
 			snorm_negative_y
 		};
-		const hlsl::vector<UvElementT, 2> uv[4] =
+		const hlsl::vector<uv_element_t, 2> uv[4] =
 		{
-			hlsl::vector<UvElementT,2>(  0, MaxUvVal),
-			hlsl::vector<UvElementT,2>(MaxUvVal, MaxUvVal),
-			hlsl::vector<UvElementT,2>(MaxUvVal,  0),
-			hlsl::vector<UvElementT,2>(  0,  0)
+			hlsl::vector<uv_element_t,2>(  0, MaxUvVal),
+			hlsl::vector<uv_element_t,2>(MaxUvVal, MaxUvVal),
+			hlsl::vector<uv_element_t,2>(MaxUvVal,  0),
+			hlsl::vector<uv_element_t,2>(  0,  0)
 		};
 
 		for (size_t f = 0ull; f < 6ull; ++f)
@@ -385,8 +385,8 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createSphere(float
 
 	snorm_normal_t* normals;
 
-	using UvElementT = uint16_t;
-	hlsl::vector<UvElementT, 2>* uvs;
+	using uv_element_t = uint16_t;
+	hlsl::vector<uv_element_t, 2>* uvs;
 	{
 		{
 			shapes::AABB<4, float32_t> aabb;
@@ -405,7 +405,7 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createSphere(float
 			retval->setNormalView(std::move(normalView));
 		}
 		{
-			auto uvView = createUvView<UvElementT>(vertexCount);
+			auto uvView = createUvView<uv_element_t>(vertexCount);
 			uvs = reinterpret_cast<decltype(uvs)>(uvView.src.buffer->getPointer());
 			retval->getAuxAttributeViews()->push_back(std::move(uvView));
 		}
@@ -536,8 +536,8 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCylinder(
 
 	snorm_normal_t* normals;
 
-	using UvElementT = uint16_t;
-	hlsl::vector<UvElementT, 2>* uvs;
+	using uv_element_t = uint16_t;
+	hlsl::vector<uv_element_t, 2>* uvs;
 	{
 		{
 			shapes::AABB<4, float32_t> aabb;
@@ -556,7 +556,7 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCylinder(
 			retval->setNormalView(std::move(normalView));
 		}
 		{
-			auto uvView = createUvView<UvElementT>(vertexCount);
+			auto uvView = createUvView<uv_element_t>(vertexCount);
 			uvs = reinterpret_cast<decltype(uvs)>(uvView.src.buffer->getPointer());
 			retval->getAuxAttributeViews()->push_back(std::move(uvView));
 		}
@@ -775,16 +775,16 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createRectangle(co
 			retval->setNormalView(std::move(normalView));
 		}
 		{
-			using UvElementT = uint8_t;
-			constexpr auto MaxUvVal = std::numeric_limits<UvElementT>::max();
-			const hlsl::vector<UvElementT, 2> uvsData[VertexCount] = {
-				hlsl::vector<UvElementT,2>(  0, MaxUvVal),
-				hlsl::vector<UvElementT,2>(MaxUvVal, MaxUvVal),
-				hlsl::vector<UvElementT,2>(MaxUvVal,  0),
-				hlsl::vector<UvElementT,2>(  0,  0)
+			using uv_element_t = uint8_t;
+			constexpr auto MaxUvVal = std::numeric_limits<uv_element_t>::max();
+			const hlsl::vector<uv_element_t, 2> uvsData[VertexCount] = {
+				hlsl::vector<uv_element_t,2>(  0, MaxUvVal),
+				hlsl::vector<uv_element_t,2>(MaxUvVal, MaxUvVal),
+				hlsl::vector<uv_element_t,2>(MaxUvVal,  0),
+				hlsl::vector<uv_element_t,2>(  0,  0)
 			};
-			hlsl::vector<UvElementT, 2>* uvs;
-			auto uvView = createUvView<UvElementT>(VertexCount);
+			hlsl::vector<uv_element_t, 2>* uvs;
+			auto uvView = createUvView<uv_element_t>(VertexCount);
 			uvs = reinterpret_cast<decltype(uvs)>(uvView.src.buffer->getPointer());
 			memcpy(uvs, uvsData, sizeof(uvsData));
 			retval->getAuxAttributeViews()->push_back(std::move(uvView));
@@ -814,9 +814,9 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createDisk(const f
 	// for now because no reliable RGB10A2 encode and scant support for 24-bit UTB formats
 	snorm_normal_t* normals;
 	//
-	using UvElementT = uint16_t;
-	constexpr uint16_t UnityUV = std::numeric_limits<UvElementT>::max();
-	hlsl::vector<UvElementT, 2>* uvs;
+	using uv_element_t = uint16_t;
+	constexpr uint16_t UnityUV = std::numeric_limits<uv_element_t>::max();
+	hlsl::vector<uv_element_t, 2>* uvs;
 	{
 		{
 			shapes::AABB<4,float32_t> aabb;
@@ -837,7 +837,7 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createDisk(const f
 			retval->setNormalView(std::move(normalView));
 		}
 		{
-			auto uvView = createUvView<UvElementT>(vertexCount);
+			auto uvView = createUvView<uv_element_t>(vertexCount);
 			uvs = reinterpret_cast<decltype(uvs)>(uvView.src.buffer->getPointer());
 			retval->getAuxAttributeViews()->push_back(std::move(uvView));
 		}
@@ -1878,9 +1878,9 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createIcoSphere(fl
 			});
 		}
 		{
-			using UvElementT = uint16_t;
-      hlsl::vector<UvElementT, 2>* uvs;
-			auto uvView = createUvView<UvElementT>(icosphere.getTexCoordCount());
+			using uv_element_t = uint16_t;
+      hlsl::vector<uv_element_t, 2>* uvs;
+			auto uvView = createUvView<uv_element_t>(icosphere.getTexCoordCount());
 			uvs = reinterpret_cast<decltype(uvs)>(uvView.src.buffer->getPointer());
 
 			for (auto uv_i = 0u; uv_i < icosphere.getTexCoordCount(); uv_i++)

From de023238681df1b44a3f6754a85d89dada4f5631 Mon Sep 17 00:00:00 2001
From: kevyuu <kevin.kayu@gmail.com>
Date: Fri, 18 Jul 2025 21:29:23 +0700
Subject: [PATCH 19/40] Fix bug prone constant

---
 src/nbl/asset/utils/CGeometryCreator.cpp | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/nbl/asset/utils/CGeometryCreator.cpp b/src/nbl/asset/utils/CGeometryCreator.cpp
index 3e59d2f8c6..a867bf1277 100644
--- a/src/nbl/asset/utils/CGeometryCreator.cpp
+++ b/src/nbl/asset/utils/CGeometryCreator.cpp
@@ -187,7 +187,7 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCube(const h
 	snorm_normal_t* normals;
 
 	using uv_element_t = uint8_t;
-	constexpr auto MaxUvVal = std::numeric_limits<uv_element_t>::max();
+	constexpr auto UnityUV = std::numeric_limits<uv_element_t>::max();
 	hlsl::vector<uv_element_t,2>* uvs;
 	{
 		{
@@ -267,9 +267,9 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCube(const h
 		};
 		const hlsl::vector<uv_element_t, 2> uv[4] =
 		{
-			hlsl::vector<uv_element_t,2>(  0, MaxUvVal),
-			hlsl::vector<uv_element_t,2>(MaxUvVal, MaxUvVal),
-			hlsl::vector<uv_element_t,2>(MaxUvVal,  0),
+			hlsl::vector<uv_element_t,2>(  0, UnityUV),
+			hlsl::vector<uv_element_t,2>(UnityUV, UnityUV),
+			hlsl::vector<uv_element_t,2>(UnityUV,  0),
 			hlsl::vector<uv_element_t,2>(  0,  0)
 		};
 
@@ -378,7 +378,6 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createSphere(float
 	}
 
 	constexpr auto NormalCacheFormat = EF_R8G8B8_SNORM;
-	constexpr auto NormalFormat = EF_R8G8B8A8_SNORM;
 
 	// Create vertex attributes with NONE usage because we have no clue how they'll be used
 	hlsl::float32_t3* positions;
@@ -386,6 +385,8 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createSphere(float
 	snorm_normal_t* normals;
 
 	using uv_element_t = uint16_t;
+	constexpr auto UnityUV = std::numeric_limits<uv_element_t>::max();
+
 	hlsl::vector<uv_element_t, 2>* uvs;
 	{
 		{
@@ -474,14 +475,14 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createSphere(float
 
 		// the vertex at the top of the sphere
 		positions[vertex_i] = { 0.f, radius, 0.f };
-		uvs[vertex_i] = { 0, 63};
+		uvs[vertex_i] = { 0, UnityUV / 2};
 		const auto quantizedTopNormal = quantNormalCache->quantize<NormalCacheFormat>(hlsl::float32_t3(0.f, 1.f, 0.f));
 		memcpy(normals + vertex_i, &quantizedTopNormal, sizeof(quantizedTopNormal));
 
 		// the vertex at the bottom of the sphere
 		vertex_i++;
 		positions[vertex_i] = { 0.f, -radius, 0.f };
-		uvs[vertex_i] = { 63, 127};
+		uvs[vertex_i] = { UnityUV / 2, UnityUV};
 		const auto quantizedBottomNormal = quantNormalCache->quantize<NormalCacheFormat>(hlsl::float32_t3(0.f, -1.f, 0.f));
 		memcpy(normals + vertex_i, &quantizedBottomNormal, sizeof(quantizedBottomNormal));
 	}
@@ -529,7 +530,6 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCylinder(
 	}
 
 	constexpr auto NormalCacheFormat = EF_R8G8B8_SNORM;
-	constexpr auto NormalFormat = EF_R8G8B8A8_SNORM;
 
 	// Create vertex attributes with NONE usage because we have no clue how they'll be used
 	hlsl::float32_t3* positions;
@@ -628,7 +628,7 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCone(
 
 	// Create vertex attributes with NONE usage because we have no clue how they'll be used
 	hlsl::float32_t3* positions;
-	hlsl::vector<int8_t, 4>* normals;
+	snorm_normal_t* normals;
 	{
 		{
 			shapes::AABB<4, float32_t> aabb;
@@ -860,7 +860,7 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createDisk(const f
 			*(uvs++) = uint16_t2(t*UnityUV+0.5f,0);
 		}
 	}
-	std::fill_n(normals,vertexCount,hlsl::vector<int8_t,4>(0,0,127,0));
+	std::fill_n(normals,vertexCount, snorm_positive_z);
 
 	CPolygonGeometryManipulator::recomputeContentHashes(retval.get());
 	return retval;

From 1964b274c9d2e14f66394a08d2bf5461c83c5b01 Mon Sep 17 00:00:00 2001
From: kevyuu <kevin.kayu@gmail.com>
Date: Fri, 18 Jul 2025 21:45:58 +0700
Subject: [PATCH 20/40] Accept tesselation as uint16_t parameter

---
 include/nbl/asset/utils/CGeometryCreator.h |  8 ++--
 src/nbl/asset/utils/CGeometryCreator.cpp   | 52 +++-------------------
 2 files changed, 10 insertions(+), 50 deletions(-)

diff --git a/include/nbl/asset/utils/CGeometryCreator.h b/include/nbl/asset/utils/CGeometryCreator.h
index 1852b0f033..54f2664bc7 100644
--- a/include/nbl/asset/utils/CGeometryCreator.h
+++ b/include/nbl/asset/utils/CGeometryCreator.h
@@ -58,8 +58,8 @@ class NBL_API2 CGeometryCreator final : public core::IReferenceCounted
 		\param colorCone color of the cone
 		\return Generated mesh.
 		*/
-		core::vector<core::smart_refctd_ptr<ICPUPolygonGeometry>> createArrow(const uint32_t tesselationCylinder = 4,
-				const uint32_t tesselationCone = 8, const float height = 1.f,
+		core::vector<core::smart_refctd_ptr<ICPUPolygonGeometry>> createArrow(const uint16_t tesselationCylinder = 4,
+				const uint16_t tesselationCone = 8, const float height = 1.f,
 				const float cylinderHeight = 0.6f, const float widthCylinder = 0.05f,
 				const float widthCone = 0.3f) const;
 
@@ -85,7 +85,7 @@ class NBL_API2 CGeometryCreator final : public core::IReferenceCounted
 		\return Generated mesh.
 		*/
 		core::smart_refctd_ptr<ICPUPolygonGeometry> createCylinder(float radius, float length,
-				uint32_t tesselation,
+				uint16_t tesselation,
 				CQuantNormalCache* const quantNormalCacheOverride=nullptr) const;
 
 		//! Create a cone mesh.
@@ -98,7 +98,7 @@ class NBL_API2 CGeometryCreator final : public core::IReferenceCounted
 		\param oblique (to be documented)
 		\return Generated mesh.
 		*/
-		core::smart_refctd_ptr<ICPUPolygonGeometry> createCone(float radius, float length, uint32_t tesselation,
+		core::smart_refctd_ptr<ICPUPolygonGeometry> createCone(float radius, float length, uint16_t tesselation,
 				float oblique=0.f, CQuantNormalCache* const quantNormalCacheOverride=nullptr) const;
 
 		core::smart_refctd_ptr<ICPUPolygonGeometry> createRectangle(const hlsl::float32_t2 size={0.5f,0.5f}) const;
diff --git a/src/nbl/asset/utils/CGeometryCreator.cpp b/src/nbl/asset/utils/CGeometryCreator.cpp
index a867bf1277..125554c88c 100644
--- a/src/nbl/asset/utils/CGeometryCreator.cpp
+++ b/src/nbl/asset/utils/CGeometryCreator.cpp
@@ -493,13 +493,13 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createSphere(float
 
 core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCylinder(
 	float radius, float length,
-	uint32_t tesselation, CQuantNormalCache* const quantNormalCacheOverride) const
+	uint16_t tesselation, CQuantNormalCache* const quantNormalCacheOverride) const
 {
 	using namespace hlsl;
 
 	CQuantNormalCache* const quantNormalCache = quantNormalCacheOverride == nullptr ? m_params.normalCache.get() : quantNormalCacheOverride;
 
-	const auto halfIx = static_cast<uint16_t>(tesselation);
+	const auto halfIx = tesselation;
 	const uint32_t u32_vertexCount = 2 * tesselation;
 	if (u32_vertexCount > std::numeric_limits<uint16_t>::max())
 		return nullptr;
@@ -585,7 +585,7 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCylinder(
 }
 
 core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCone(
-	float radius, float length, uint32_t tesselation,
+	float radius, float length, uint16_t tesselation,
 	float oblique, CQuantNormalCache* const quantNormalCacheOverride) const
 {
 
@@ -690,8 +690,8 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCone(
 }
 
 core::vector<core::smart_refctd_ptr<ICPUPolygonGeometry>> CGeometryCreator::createArrow(
-	const uint32_t tesselationCylinder,
-	const uint32_t tesselationCone,
+	const uint16_t tesselationCylinder,
+	const uint16_t tesselationCone,
 	const float height,
 	const float cylinderHeight,
 	const float width0,
@@ -880,7 +880,7 @@ class Icosphere
 public:
 	using index_t = unsigned int;
 
-	Icosphere(float radius = 1.0f, int subdivision = 1, bool smooth = false) : radius(radius), subdivision(subdivision), smooth(smooth), interleavedStride(32)
+	Icosphere(float radius = 1.0f, int subdivision = 1, bool smooth = false) : radius(radius), subdivision(subdivision), smooth(smooth)
 	{
 		if (smooth)
 			buildVerticesSmooth();
@@ -909,12 +909,6 @@ class Icosphere
 	const unsigned int* getIndices() const { return indices.data(); }
 	const unsigned int* getLineIndices() const { return lineIndices.data(); }
 
-	// for interleaved vertices: V/N/T
-	unsigned int getInterleavedVertexCount() const { return getPositionCount(); }    // # of vertices
-	unsigned int getInterleavedVertexSize() const { return (unsigned int)interleavedVertices.size() * sizeof(float); }    // # of bytes
-	int getInterleavedStride() const { return interleavedStride; }   // should be 32 bytes
-	const float* getInterleavedVertices() const { return interleavedVertices.data(); }
-
 protected:
 
 private:
@@ -1092,11 +1086,6 @@ class Icosphere
 			vertices[i] *= scale;
 			vertices[i + 1] *= scale;
 			vertices[i + 2] *= scale;
-
-			// for interleaved array
-			interleavedVertices[j] *= scale;
-			interleavedVertices[j + 1] *= scale;
-			interleavedVertices[j + 2] *= scale;
 		}
 	}
 
@@ -1260,9 +1249,6 @@ class Icosphere
 
 		// subdivide icosahedron
 		subdivideVerticesFlat();
-
-		// generate interleaved vertex array as well
-		buildInterleavedVertices();
 	}
 
 	/*
@@ -1485,8 +1471,6 @@ class Icosphere
 		// subdivide icosahedron
 		subdivideVerticesSmooth();
 
-		// generate interleaved vertex array as well
-		buildInterleavedVertices();
 	}
 	/*
 		divide a trinage into 4 sub triangles and repeat N times
@@ -1662,27 +1646,6 @@ class Icosphere
 		stride must be 32 bytes
 	*/
 
-	void buildInterleavedVertices()
-	{
-		core::vector<float>().swap(interleavedVertices);
-
-		std::size_t i, j;
-		std::size_t count = vertices.size();
-		for (i = 0, j = 0; i < count; i += 3, j += 2)
-		{
-			interleavedVertices.push_back(vertices[i]);
-			interleavedVertices.push_back(vertices[i + 1]);
-			interleavedVertices.push_back(vertices[i + 2]);
-
-			interleavedVertices.push_back(normals[i]);
-			interleavedVertices.push_back(normals[i + 1]);
-			interleavedVertices.push_back(normals[i + 2]);
-
-			interleavedVertices.push_back(texCoords[j]);
-			interleavedVertices.push_back(texCoords[j + 1]);
-		}
-	}
-
 	void addVertex(float x, float y, float z)
 	{
 		vertices.push_back(x);
@@ -1826,9 +1789,6 @@ class Icosphere
 	core::vector<uint32_t> lineIndices;
 	std::map<std::pair<float, float>, uint32_t> sharedIndices;   // indices of shared vertices, key is tex coord (s,t)
 
-	// interleaved
-	core::vector<float> interleavedVertices;
-	uint32_t interleavedStride;											// # of bytes to hop to the next vertex (should be 32 bytes)
 
 };
 

From 215723574a26539e0c234e3583729bd6fed8d012 Mon Sep 17 00:00:00 2001
From: kevyuu <kevin.kayu@gmail.com>
Date: Fri, 18 Jul 2025 21:49:02 +0700
Subject: [PATCH 21/40] Remove reciprocal_approxim usage

---
 src/nbl/asset/utils/CGeometryCreator.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/nbl/asset/utils/CGeometryCreator.cpp b/src/nbl/asset/utils/CGeometryCreator.cpp
index 125554c88c..e2be673682 100644
--- a/src/nbl/asset/utils/CGeometryCreator.cpp
+++ b/src/nbl/asset/utils/CGeometryCreator.cpp
@@ -562,7 +562,7 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCylinder(
 		}
 	}
 
-	const float tesselationRec = core::reciprocal_approxim<float>(static_cast<float>(tesselation));
+	const float tesselationRec = 1.f / static_cast<float>(tesselation);
 	const float step = 2.f * core::PI<float>() * tesselationRec;
 	for (uint32_t i = 0u; i < tesselation; ++i)
 	{

From 7728987aaef5749fda6cc9abb0ee0df3e279725b Mon Sep 17 00:00:00 2001
From: kevyuu <kevin.kayu@gmail.com>
Date: Fri, 18 Jul 2025 22:08:09 +0700
Subject: [PATCH 22/40] use hlsl::numbers instead of constant from core

---
 src/nbl/asset/utils/CGeometryCreator.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/nbl/asset/utils/CGeometryCreator.cpp b/src/nbl/asset/utils/CGeometryCreator.cpp
index e2be673682..4c0b24a34a 100644
--- a/src/nbl/asset/utils/CGeometryCreator.cpp
+++ b/src/nbl/asset/utils/CGeometryCreator.cpp
@@ -450,7 +450,7 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createSphere(float
 				//if (y==0)
 				//{
 				if (normal.y != -1.0f && normal.y != 1.0f)
-					tu = static_cast<float>(acos(core::clamp(normal.x / sinay, -1.0, 1.0)) * 0.5 * core::RECIPROCAL_PI<double>());
+					tu = static_cast<float>(acos(core::clamp(normal.x / sinay, -1.0, 1.0)) * 0.5 * numbers::inv_pi<float32_t>());
 				if (normal.z < 0.0f)
 					tu = 1 - tu;
 				//}
@@ -458,7 +458,7 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createSphere(float
 					//tu = ((float*)(tmpMem+(i-polyCountXPitch)*vertexSize))[4];
 
 				positions[vertex_i] = pos;
-				uvs[vertex_i] = { packSnorm(tu), packSnorm(static_cast<float>(ay * core::RECIPROCAL_PI<double>())) };
+				uvs[vertex_i] = { packSnorm(tu), packSnorm(static_cast<float>(ay * numbers::inv_pi<float32_t>())) };
 				memcpy(normals + vertex_i, &quantizedNormal, sizeof(quantizedNormal));
 
 				vertex_i++;
@@ -563,7 +563,7 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCylinder(
 	}
 
 	const float tesselationRec = 1.f / static_cast<float>(tesselation);
-	const float step = 2.f * core::PI<float>() * tesselationRec;
+	const float step = 2.f * numbers::pi<float32_t> * tesselationRec;
 	for (uint32_t i = 0u; i < tesselation; ++i)
 	{
 		const auto f_i = static_cast<float>(i);

From 53f81af9da454d91d7c1973ce6e3afc921d1e08f Mon Sep 17 00:00:00 2001
From: kevyuu <kevin.kayu@gmail.com>
Date: Fri, 18 Jul 2025 22:11:42 +0700
Subject: [PATCH 23/40] Reorder normal calculation so no need to normalize
 position

---
 src/nbl/asset/utils/CGeometryCreator.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/nbl/asset/utils/CGeometryCreator.cpp b/src/nbl/asset/utils/CGeometryCreator.cpp
index 4c0b24a34a..9fe65a5ff2 100644
--- a/src/nbl/asset/utils/CGeometryCreator.cpp
+++ b/src/nbl/asset/utils/CGeometryCreator.cpp
@@ -568,8 +568,8 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCylinder(
 	{
 		const auto f_i = static_cast<float>(i);
 		hlsl::float32_t3 p(std::cos(f_i * step), std::sin(f_i * step), 0.f);
+		const auto n = quantNormalCache->quantize<NormalCacheFormat>(p);
 		p *= radius;
-		const auto n = quantNormalCache->quantize<NormalCacheFormat>(hlsl::normalize(p));
 
 		positions[i] = { p.x, p.y, p.z };
 		memcpy(normals + i, &n, sizeof(n));

From e29bbf9382424219dbfcf145bdb1f5bbd881187d Mon Sep 17 00:00:00 2001
From: kevyuu <kevin.kayu@gmail.com>
Date: Fri, 18 Jul 2025 23:47:47 +0700
Subject: [PATCH 24/40] Remove packSnorm

---
 src/nbl/asset/utils/CGeometryCreator.cpp | 29 ++++++++++++++++--------
 1 file changed, 20 insertions(+), 9 deletions(-)

diff --git a/src/nbl/asset/utils/CGeometryCreator.cpp b/src/nbl/asset/utils/CGeometryCreator.cpp
index 9fe65a5ff2..78560aaaa1 100644
--- a/src/nbl/asset/utils/CGeometryCreator.cpp
+++ b/src/nbl/asset/utils/CGeometryCreator.cpp
@@ -29,11 +29,19 @@ namespace nbl::asset
 
 		constexpr auto snorm_all_ones = hlsl::vector<int8_t, 4>(snorm_one, snorm_one, snorm_one, snorm_one);
 
-}
+    template <typename ElementT>
+      requires(std::is_same_v<ElementT, uint8_t> || std::is_same_v<ElementT, uint16_t>)
+		constexpr E_FORMAT get_uv_format()
+    {
+      if constexpr(std::is_same_v<ElementT, uint8_t>)
+      {
+				return EF_R8G8_UNORM;
+      } else
+      {
+				return EF_R16G16_UNORM;
+      }
+    }
 
-static uint8_t packSnorm(float val)
-{
-	return round(hlsl::clamp(val, -1.0f, 1.0f) * 127);
 }
 
 template <typename ElementT>
@@ -61,13 +69,13 @@ static ICPUPolygonGeometry::SDataView createUvView(size_t vertexCount)
 	if constexpr(std::is_same_v<ElementT, uint8_t>)
 	{
 		retval.composed.encodedDataRange.u8 = aabb;
-		retval.composed.format = EF_R8G8_UNORM;
+		retval.composed.format = get_uv_format<ElementT>();
 		retval.composed.rangeFormat = IGeometryBase::EAABBFormat::U8_NORM;
 	}
 	else if constexpr(std::is_same_v<ElementT, uint16_t>)
 	{
 		retval.composed.encodedDataRange.u16 = aabb;
-		retval.composed.format = EF_R16G16_UNORM;
+		retval.composed.format = get_uv_format<ElementT>();
 		retval.composed.rangeFormat = IGeometryBase::EAABBFormat::U16_NORM;
 	}
 
@@ -458,7 +466,8 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createSphere(float
 					//tu = ((float*)(tmpMem+(i-polyCountXPitch)*vertexSize))[4];
 
 				positions[vertex_i] = pos;
-				uvs[vertex_i] = { packSnorm(tu), packSnorm(static_cast<float>(ay * numbers::inv_pi<float32_t>())) };
+				float32_t2 f32_uv = { tu, static_cast<float>(ay * numbers::inv_pi<float32_t>()) };
+				encodePixels<get_uv_format<uv_element_t>(), float>(uvs + vertex_i, f32_uv.data.data);
 				memcpy(normals + vertex_i, &quantizedNormal, sizeof(quantizedNormal));
 
 				vertex_i++;
@@ -537,6 +546,7 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCylinder(
 	snorm_normal_t* normals;
 
 	using uv_element_t = uint16_t;
+	constexpr auto UnityUV = std::numeric_limits<uv_element_t>::max();
 	hlsl::vector<uv_element_t, 2>* uvs;
 	{
 		{
@@ -573,11 +583,12 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCylinder(
 
 		positions[i] = { p.x, p.y, p.z };
 		memcpy(normals + i, &n, sizeof(n));
-		uvs[i] = { packSnorm(f_i * tesselationRec), packSnorm(0.0) };
+		float32_t2 f32_uv = { f_i * tesselationRec, 0.f };
+		encodePixels<get_uv_format<uv_element_t>(), float>(uvs + i, f32_uv.data.data);
 
 		positions[i + halfIx] = { p.x, p.y, length };
 		normals[i + halfIx] = normals[i];
-		uvs[i + halfIx] = { packSnorm(1.0f), packSnorm(0.0f) };
+		uvs[i + halfIx] = { UnityUV, 0 };
 	}
 
 	CPolygonGeometryManipulator::recomputeContentHashes(retval.get());

From b957ca78944c10c1f5a49dcaf139afa52d344beb Mon Sep 17 00:00:00 2001
From: kevyuu <kevin.kayu@gmail.com>
Date: Fri, 18 Jul 2025 23:50:19 +0700
Subject: [PATCH 25/40] Small impovement on Icosphere index_t

---
 src/nbl/asset/utils/CGeometryCreator.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/nbl/asset/utils/CGeometryCreator.cpp b/src/nbl/asset/utils/CGeometryCreator.cpp
index 78560aaaa1..3596a9353f 100644
--- a/src/nbl/asset/utils/CGeometryCreator.cpp
+++ b/src/nbl/asset/utils/CGeometryCreator.cpp
@@ -889,7 +889,7 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createDisk(const f
 class Icosphere
 {
 public:
-	using index_t = unsigned int;
+	using index_t = uint32_t;
 
 	Icosphere(float radius = 1.0f, int subdivision = 1, bool smooth = false) : radius(radius), subdivision(subdivision), smooth(smooth)
 	{

From 75d486d9578731ece3c021c1b9a2b13348bc5250 Mon Sep 17 00:00:00 2001
From: kevyuu <kevin.kayu@gmail.com>
Date: Fri, 18 Jul 2025 23:52:34 +0700
Subject: [PATCH 26/40] Remove unnecessary method on Icosphere

---
 src/nbl/asset/utils/CGeometryCreator.cpp | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/src/nbl/asset/utils/CGeometryCreator.cpp b/src/nbl/asset/utils/CGeometryCreator.cpp
index 3596a9353f..48a07f0eda 100644
--- a/src/nbl/asset/utils/CGeometryCreator.cpp
+++ b/src/nbl/asset/utils/CGeometryCreator.cpp
@@ -901,9 +901,7 @@ class Icosphere
 
 	~Icosphere() {}
 
-	unsigned int getPositionCount() const { return (unsigned int)vertices.size() / 3; }
-	unsigned int getNormalCount() const { return (unsigned int)normals.size() / 3; }
-	unsigned int getTexCoordCount() const { return (unsigned int)texCoords.size() / 2; }
+	unsigned int getVertexCount() const { return (unsigned int)vertices.size() / 3; }
 	unsigned int getIndexCount() const { return (unsigned int)indices.size(); }
 	unsigned int getLineIndexCount() const { return (unsigned int)lineIndices.size(); }
 	unsigned int getTriangleCount() const { return getIndexCount() / 3; }
@@ -1815,7 +1813,7 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createIcoSphere(fl
 
 	// Create indices
 	{
-		auto indexView = createIndexView<Icosphere::index_t>(icosphere.getIndexCount(), icosphere.getPositionCount() - 1);
+		auto indexView = createIndexView<Icosphere::index_t>(icosphere.getIndexCount(), icosphere.getVertexCount() - 1);
 		memcpy(indexView.src.buffer->getPointer(), icosphere.getIndices(), icosphere.getIndexSize());
 		retval->setIndexView(std::move(indexView));
 	}
@@ -1825,7 +1823,7 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createIcoSphere(fl
 			shapes::AABB<4, float32_t> aabb;
 			aabb.maxVx = float32_t4(radius, radius, radius, 0.f);
 			aabb.minVx = -aabb.maxVx;
-			auto positionView = createPositionView(icosphere.getPositionCount(), aabb);
+			auto positionView = createPositionView(icosphere.getVertexCount(), aabb);
 			memcpy(positionView.src.buffer->getPointer(), icosphere.getPositions(), icosphere.getPositionSize());
 			retval->setPositionView(std::move(positionView));
 		}
@@ -1851,10 +1849,10 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createIcoSphere(fl
 		{
 			using uv_element_t = uint16_t;
       hlsl::vector<uv_element_t, 2>* uvs;
-			auto uvView = createUvView<uv_element_t>(icosphere.getTexCoordCount());
+			auto uvView = createUvView<uv_element_t>(icosphere.getVertexCount());
 			uvs = reinterpret_cast<decltype(uvs)>(uvView.src.buffer->getPointer());
 
-			for (auto uv_i = 0u; uv_i < icosphere.getTexCoordCount(); uv_i++)
+			for (auto uv_i = 0u; uv_i < icosphere.getVertexCount(); uv_i++)
 			{
 				const auto texCoords = icosphere.getTexCoords();
 				const auto f32_uv = float32_t2{ texCoords[2 * uv_i], texCoords[(2 * uv_i) + 1] };

From e0013cbedccffced44918c34bd15a97f48a02079 Mon Sep 17 00:00:00 2001
From: kevyuu <kevin.kayu@gmail.com>
Date: Tue, 22 Jul 2025 06:47:16 +0700
Subject: [PATCH 27/40] Fix normal quantization cache

---
 include/nbl/asset/utils/CDirQuantCacheBase.h | 109 ++++++++++++-------
 1 file changed, 69 insertions(+), 40 deletions(-)

diff --git a/include/nbl/asset/utils/CDirQuantCacheBase.h b/include/nbl/asset/utils/CDirQuantCacheBase.h
index df574d9f3e..302d6ae39b 100644
--- a/include/nbl/asset/utils/CDirQuantCacheBase.h
+++ b/include/nbl/asset/utils/CDirQuantCacheBase.h
@@ -43,13 +43,13 @@ class CDirQuantCacheBase
 				
 				Vector8u3() : x(0u),y(0u),z(0u) {}
 				Vector8u3(const Vector8u3&) = default;
-				explicit Vector8u3(const hlsl::float32_t3& val)
+				explicit Vector8u3(const hlsl::uint32_t4& val)
 				{
 					operator=(val);
 				}
 
 				Vector8u3& operator=(const Vector8u3&) = default;
-				Vector8u3& operator=(const hlsl::float32_t3& val)
+				Vector8u3& operator=(const hlsl::uint32_t4& val)
 				{
 					x = val.x;
 					y = val.y;
@@ -57,9 +57,9 @@ class CDirQuantCacheBase
 					return *this;
 				}
 
-        hlsl::float32_t3 getValue() const
+        hlsl::uint32_t4 getValue() const
 				{
-					return { x, y, z };
+					return { x, y, z, 0 };
 				}
 
 
@@ -75,24 +75,24 @@ class CDirQuantCacheBase
 				
 				Vector8u4() : x(0u),y(0u),z(0u),w(0u) {}
 				Vector8u4(const Vector8u4&) = default;
-				explicit Vector8u4(const hlsl::float32_t3& val)
+				explicit Vector8u4(const hlsl::uint32_t4& val)
 				{
 					operator=(val);
 				}
 
 				Vector8u4& operator=(const Vector8u4&) = default;
-				Vector8u4& operator=(const hlsl::float32_t3& val)
+				Vector8u4& operator=(const hlsl::uint32_t4& val)
 				{
 					x = val.x;
 					y = val.y;
 					z = val.z;
-					w = 0;
+					w = val.w;
 					return *this;
 				}
 
-        hlsl::float32_t3 getValue() const
+        hlsl::uint32_t4 getValue() const
 				{
-					return { x, y, z };
+					return { x, y, z, w };
 				}
 				
 			private:
@@ -109,17 +109,16 @@ class CDirQuantCacheBase
 
 				Vector1010102() : storage(0u) {}
 				Vector1010102(const Vector1010102&) = default;
-				explicit Vector1010102(const hlsl::float32_t3& val)
+				explicit Vector1010102(const hlsl::uint32_t4& val)
 				{
 					operator=(val);
 				}
 
 				Vector1010102& operator=(const Vector1010102&) = default;
-				Vector1010102& operator=(const hlsl::float32_t3& val)
+				Vector1010102& operator=(const hlsl::uint32_t4& val)
 				{
 					constexpr auto storageBits = quantizationBits + 1u;
-					hlsl::uint32_t3 u32_val = { val.x, val.y, val.z };
-					storage = u32_val.x | (u32_val.y << storageBits) | (u32_val.z << (storageBits * 2u));
+					storage = val.x | (val.y << storageBits) | (val.z << (storageBits * 2u));
 					return *this;
 				}
 
@@ -132,11 +131,11 @@ class CDirQuantCacheBase
 					return storage==other.storage;
 				}
 
-        hlsl::float32_t3 getValue() const
+        hlsl::uint32_t4 getValue() const
 				{
 					constexpr auto storageBits = quantizationBits + 1u;
 					const auto mask = (0x1u << storageBits) - 1u;
-					return { storage & mask, (storage >> storageBits) & mask, (storage >> (storageBits * 2)) & mask};
+					return { storage & mask, (storage >> storageBits) & mask, (storage >> (storageBits * 2)) & mask, 0};
 				}
 
 			private:
@@ -151,13 +150,13 @@ class CDirQuantCacheBase
 				
 				Vector16u3() : x(0u),y(0u),z(0u) {}
 				Vector16u3(const Vector16u3&) = default;
-				explicit Vector16u3(const hlsl::float32_t3& val)
+				explicit Vector16u3(const hlsl::uint32_t4& val)
 				{
 					operator=(val);
 				}
 
 				Vector16u3& operator=(const Vector16u3&) = default;
-				Vector16u3& operator=(const hlsl::float32_t3& val)
+				Vector16u3& operator=(const hlsl::uint32_t4& val)
 				{
 					x = val.x;
 					y = val.y;
@@ -165,9 +164,9 @@ class CDirQuantCacheBase
 					return *this;
 				}
 
-        hlsl::float32_t3 getValue() const
+        hlsl::uint32_t4 getValue() const
 				{
-					return { x, y, z };
+					return { x, y, z, 0 };
 				}
 
 			private:
@@ -182,24 +181,24 @@ class CDirQuantCacheBase
 
 				Vector16u4() : x(0u),y(0u),z(0u),w(0u) {}
 				Vector16u4(const Vector16u4&) = default;
-				explicit Vector16u4(const hlsl::float32_t3& val)
+				explicit Vector16u4(const hlsl::uint32_t4& val)
 				{
 					operator=(val);
 				}
 
 				Vector16u4& operator=(const Vector16u4&) = default;
-				Vector16u4& operator=(const hlsl::float32_t3& val)
+				Vector16u4& operator=(const hlsl::uint32_t4& val)
 				{
 					x = val.x;
 					y = val.y;
 					z = val.z;
-					w = 0;
+					w = val.w;
 					return *this;
 				}
 
-        hlsl::float32_t3 getValue() const
+        hlsl::float32_t4 getValue() const
 				{
-					return { x, y, z };
+					return { x, y, z, w };
 				}
 
 			private:
@@ -379,11 +378,28 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl::
 		std::tuple<cache_type_t<Formats>...> cache;
 		
 		template<uint32_t dimensions, E_FORMAT CacheFormat>
-		value_type_t<CacheFormat> quantize(const hlsl::float32_t3& value)
+		value_type_t<CacheFormat> quantize(const hlsl::vector<hlsl::float32_t, dimensions>& value)
 		{
-			const auto negativeMask = lessThan(value, hlsl::float32_t3(0.0f));
-
-			const hlsl::float32_t3 absValue = abs(value);
+			auto to_float32_t4 = [](hlsl::vector<hlsl::float32_t, dimensions> src) -> hlsl::float32_t4
+      {
+        if constexpr(dimensions == 1)
+        {
+          return {src.x, 0, 0, 0};
+        } else if constexpr (dimensions == 2)
+        {
+          return {src.x, src.y, 0, 0};
+        } else if constexpr (dimensions == 3)
+        {
+          return {src.x, src.y, src.z, 0};
+        } else if constexpr (dimensions == 4)
+        {
+          return {src.x, src.y, src.z, src.w};
+        }
+      };
+
+			const auto negativeMask = to_float32_t4(lessThan(value, hlsl::vector<hlsl::float32_t, dimensions>(0.0f)));
+
+			const hlsl::vector<hlsl::float32_t, dimensions> absValue = abs(value);
 			const auto key = Key(absValue);
 
 			constexpr auto quantizationBits = quantization_bits_v<CacheFormat>;
@@ -397,18 +413,31 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl::
 				{
 					const auto fit = findBestFit<dimensions,quantizationBits>(absValue);
 
-					quantized = abs(fit);
+					const auto abs_fit = to_float32_t4(abs(fit));
+          quantized = hlsl::uint32_t4(abs_fit.x, abs_fit.y, abs_fit.z, abs_fit.w);
+
 					insertIntoCache<CacheFormat>(key,quantized);
 				}
 			}
 
-			//return quantized.
-			const auto negativeMulVec = hlsl::float32_t3(negativeMask.x ? -1 : 1, negativeMask.y ? -1 : 1, negativeMask.z ? -1 : 1);
-      return value_type_t<CacheFormat>(negativeMulVec * quantized.getValue());
+			auto switch_vec = [](hlsl::uint32_t4 val1, hlsl::uint32_t4 val2, hlsl::bool4 mask)
+      {
+					hlsl::uint32_t4 retval;
+					retval.x = mask.x ? val2.x : val1.x;
+					retval.y = mask.y ? val2.y : val1.y;
+					retval.z = mask.z ? val2.z : val1.z;
+					retval.w = mask.w ? val2.w : val1.w;
+					return retval;
+      };
+
+      const hlsl::uint32_t4 xorflag((0x1u << (quantizationBits + 1u)) - 1u);
+      auto restoredAsVec = quantized.getValue() ^ switch_vec(hlsl::uint32_t4(0u), hlsl::uint32_t4(xorflag), negativeMask);
+      restoredAsVec += switch_vec(hlsl::uint32_t4(0u), hlsl::uint32_t4(1u), negativeMask);
+      return value_type_t<CacheFormat>(restoredAsVec & xorflag);
 		}
 
 		template<uint32_t dimensions, uint32_t quantizationBits>
-		static inline hlsl::float32_t3 findBestFit(const hlsl::float32_t3& value)
+		static inline hlsl::vector<hlsl::float32_t, dimensions> findBestFit(const hlsl::vector<hlsl::float32_t, dimensions>& value)
 		{
 			static_assert(dimensions>1u,"No point");
 			static_assert(dimensions<=4u,"High Dimensions are Hard!");
@@ -416,10 +445,10 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl::
 			const auto vectorForDots = hlsl::normalize(value);
 
 			//
-			hlsl::float32_t3 fittingVector;
-			hlsl::float32_t3 floorOffset;
+			hlsl::vector<hlsl::float32_t, dimensions> fittingVector;
+			hlsl::vector<hlsl::float32_t, dimensions> floorOffset;
 			constexpr uint32_t cornerCount = (0x1u<<(dimensions-1u))-1u;
-			hlsl::float32_t3 corners[cornerCount] = {};
+			hlsl::vector<hlsl::float32_t, dimensions> corners[cornerCount] = {};
 			{
 				uint32_t maxDirCompIndex = 0u;
 				for (auto i=1u; i<dimensions; i++)
@@ -431,7 +460,7 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl::
 				if (maxDirectionComp < std::sqrtf(0.9998f / float(dimensions)))
 				{
 					_NBL_DEBUG_BREAK_IF(true);
-					return hlsl::float32_t3(0.f);
+					return hlsl::vector<hlsl::float32_t, dimensions>(0.f);
 				}
 				fittingVector = value / maxDirectionComp;
 				floorOffset[maxDirCompIndex] = 0.499f;
@@ -453,9 +482,9 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl::
 				}
 			}
 
-			hlsl::float32_t3 bestFit;
+			hlsl::vector<hlsl::float32_t, dimensions> bestFit;
 			float closestTo1 = -1.f;
-			auto evaluateFit = [&](const hlsl::float32_t3& newFit) -> void
+			auto evaluateFit = [&](const hlsl::vector<hlsl::float32_t, dimensions>& newFit) -> void
 			{
 				auto newFitLen = length(newFit);
 				const float dp = hlsl::dot(newFit,vectorForDots) / (newFitLen);
@@ -467,7 +496,7 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl::
 			};
 
 			constexpr uint32_t cubeHalfSize = (0x1u << quantizationBits) - 1u;
-			const hlsl::float32_t3 cubeHalfSizeND = hlsl::float32_t3(cubeHalfSize);
+			const hlsl::vector<hlsl::float32_t, dimensions> cubeHalfSizeND = hlsl::vector<hlsl::float32_t, dimensions>(cubeHalfSize);
 			for (uint32_t n=cubeHalfSize; n>0u; n--)
 			{
 				//we'd use float addition in the interest of speed, to increment the loop

From 4afd07208d8c9eaf877d8367f2ab768d6468f049 Mon Sep 17 00:00:00 2001
From: kevyuu <kevin.kayu@gmail.com>
Date: Wed, 23 Jul 2025 20:10:05 +0700
Subject: [PATCH 28/40] implement constexpr findLSB

---
 .../hlsl/cpp_compat/impl/intrinsics_impl.hlsl | 10 +++++--
 .../builtin/hlsl/cpp_compat/intrinsics.hlsl   |  2 +-
 include/nbl/core/util/bitflag.h               |  2 +-
 src/nbl/asset/ECommonEnums.cpp                | 29 +++++--------------
 4 files changed, 18 insertions(+), 25 deletions(-)

diff --git a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl
index f92dddfb26..8bfed025ce 100644
--- a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl
+++ b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl
@@ -348,8 +348,14 @@ requires concepts::IntegralScalar<T>
 struct find_lsb_helper<T>
 {
 	using return_t = int32_t;
-	static inline T __call(const T arg)
+	NBL_CONSTEXPR_FUNC static inline T __call(const T arg)
 	{
+		if constexpr (std::is_constant_evaluated())
+		{
+      for (T ix = T(0); ix < sizeof(size_t) * 8; ix++)
+        if ((T(1) << ix) & arg) return ix;
+      return ~T(0);
+		}
 		return glm::findLSB<T>(arg);
 	}
 };
@@ -369,7 +375,7 @@ requires std::is_enum_v<EnumType>
 struct find_lsb_helper<EnumType>
 {
 	using return_t = int32_t;
-	static int32_t __call(NBL_CONST_REF_ARG(EnumType) val)
+	NBL_CONSTEXPR_FUNC static int32_t __call(NBL_CONST_REF_ARG(EnumType) val)
 	{
 		using underlying_t = std::underlying_type_t<EnumType>;
 		return find_lsb_helper<underlying_t>::__call(static_cast<underlying_t>(val));
diff --git a/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl b/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl
index a5747a5fb7..7198bae563 100644
--- a/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl
+++ b/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl
@@ -68,7 +68,7 @@ inline typename matrix_traits<Matrix>::scalar_type determinant(NBL_CONST_REF_ARG
 }
 
 template<typename T>
-inline typename cpp_compat_intrinsics_impl::find_lsb_helper<T>::return_t findLSB(NBL_CONST_REF_ARG(T) val)
+NBL_CONSTEXPR_FUNC inline typename cpp_compat_intrinsics_impl::find_lsb_helper<T>::return_t findLSB(NBL_CONST_REF_ARG(T) val)
 {
 	return cpp_compat_intrinsics_impl::find_lsb_helper<T>::__call(val);
 }
diff --git a/include/nbl/core/util/bitflag.h b/include/nbl/core/util/bitflag.h
index 1731c0cac3..62bec57d49 100644
--- a/include/nbl/core/util/bitflag.h
+++ b/include/nbl/core/util/bitflag.h
@@ -60,7 +60,7 @@ namespace nbl::hlsl::cpp_compat_intrinsics_impl
 	struct find_lsb_helper<core::bitflag<ENUM_TYPE>>
 	{
 		using return_t = int32_t;
-		static return_t __call(NBL_CONST_REF_ARG(core::bitflag<ENUM_TYPE>) val)
+		NBL_CONSTEXPR_FUNC static return_t __call(NBL_CONST_REF_ARG(core::bitflag<ENUM_TYPE>) val)
 		{
 			return find_lsb_helper<ENUM_TYPE>::__call(val.value);
 		}
diff --git a/src/nbl/asset/ECommonEnums.cpp b/src/nbl/asset/ECommonEnums.cpp
index 0f23b9b3fc..2366b25f99 100644
--- a/src/nbl/asset/ECommonEnums.cpp
+++ b/src/nbl/asset/ECommonEnums.cpp
@@ -3,19 +3,6 @@
 namespace nbl::asset
 {
 
-constexpr static int32_t findLSB(size_t val)
-{
-	if constexpr(std::is_constant_evaluated())
-	{
-		for (size_t ix=0ull; ix<sizeof(size_t)*8; ix++)
-			if ((0x1ull << ix) & val) return ix;
-		return ~0u;
-	} else
-	{
-		return hlsl::findLSB(val);
-	}
-}
-
 core::bitflag<PIPELINE_STAGE_FLAGS> allPreviousStages(core::bitflag<PIPELINE_STAGE_FLAGS> stages)
 {
 	struct PerStagePreviousStages
@@ -49,7 +36,7 @@ core::bitflag<PIPELINE_STAGE_FLAGS> allPreviousStages(core::bitflag<PIPELINE_STA
 
 			constexpr void add(PIPELINE_STAGE_FLAGS stageFlag, PIPELINE_STAGE_FLAGS previousStageFlags)
 			{
-				const auto bitIx = findLSB(static_cast<size_t>(stageFlag));
+				const auto bitIx = hlsl::findLSB(static_cast<size_t>(stageFlag));
 				data[bitIx] |= previousStageFlags;
 			}
 
@@ -61,7 +48,7 @@ core::bitflag<PIPELINE_STAGE_FLAGS> allPreviousStages(core::bitflag<PIPELINE_STA
 	core::bitflag<PIPELINE_STAGE_FLAGS> retval = PIPELINE_STAGE_FLAGS::NONE;
 	while (bool(stages.value))
 	{
-		const auto bitIx = findLSB(static_cast<size_t>(stages.value));
+		const auto bitIx = hlsl::findLSB(static_cast<size_t>(stages.value));
 		retval |= bitToAccess[bitIx];
 		stages ^= static_cast<PIPELINE_STAGE_FLAGS>(0x1u<<bitIx);
 	}
@@ -101,7 +88,7 @@ core::bitflag<PIPELINE_STAGE_FLAGS> allLaterStages(core::bitflag<PIPELINE_STAGE_
 
 			constexpr void add(PIPELINE_STAGE_FLAGS stageFlag, PIPELINE_STAGE_FLAGS laterStageFlags)
 			{
-				const auto bitIx = findLSB(static_cast<size_t>(stageFlag));
+				const auto bitIx = hlsl::findLSB(static_cast<size_t>(stageFlag));
 				data[bitIx] |= laterStageFlags;
 			}
 
@@ -113,7 +100,7 @@ core::bitflag<PIPELINE_STAGE_FLAGS> allLaterStages(core::bitflag<PIPELINE_STAGE_
 	core::bitflag<PIPELINE_STAGE_FLAGS> retval = PIPELINE_STAGE_FLAGS::NONE;
 	while (bool(stages.value))
 	{
-		const auto bitIx = findLSB(static_cast<size_t>(stages.value));
+		const auto bitIx = hlsl::findLSB(static_cast<size_t>(stages.value));
 		retval |= bitToAccess[bitIx];
 		stages ^= static_cast<PIPELINE_STAGE_FLAGS>(0x1u<<bitIx);
 	}
@@ -179,7 +166,7 @@ core::bitflag<ACCESS_FLAGS> allAccessesFromStages(core::bitflag<PIPELINE_STAGE_F
 				
 			constexpr void init(PIPELINE_STAGE_FLAGS stageFlag, ACCESS_FLAGS accessFlags)
 			{
-				const auto bitIx = findLSB(static_cast<size_t>(stageFlag));
+				const auto bitIx = hlsl::findLSB(static_cast<size_t>(stageFlag));
 				data[bitIx] = accessFlags;
 			}
 
@@ -193,7 +180,7 @@ core::bitflag<ACCESS_FLAGS> allAccessesFromStages(core::bitflag<PIPELINE_STAGE_F
 	core::bitflag<ACCESS_FLAGS> retval = ACCESS_FLAGS::NONE;
 	while (bool(stages.value))
 	{
-		const auto bitIx = findLSB(static_cast<size_t>(stages.value));
+		const auto bitIx = hlsl::findLSB(static_cast<size_t>(stages.value));
 		retval |= bitToAccess[bitIx];
 		stages ^= static_cast<PIPELINE_STAGE_FLAGS>(0x1u<<bitIx);
 	}
@@ -259,7 +246,7 @@ core::bitflag<PIPELINE_STAGE_FLAGS> allStagesFromAccesses(core::bitflag<ACCESS_F
 		private:
 			constexpr void init(ACCESS_FLAGS accessFlags, PIPELINE_STAGE_FLAGS stageFlags)
 			{
-				const auto bitIx = findLSB(static_cast<size_t>(accessFlags));
+				const auto bitIx = hlsl::findLSB(static_cast<size_t>(accessFlags));
 				data[bitIx] = stageFlags;
 			}
 
@@ -270,7 +257,7 @@ core::bitflag<PIPELINE_STAGE_FLAGS> allStagesFromAccesses(core::bitflag<ACCESS_F
 	core::bitflag<PIPELINE_STAGE_FLAGS> retval = PIPELINE_STAGE_FLAGS::NONE;
 	while (bool(accesses.value))
 	{
-		const auto bitIx = findLSB(static_cast<size_t>(accesses.value));
+		const auto bitIx = hlsl::findLSB(static_cast<size_t>(accesses.value));
 		retval |= bitToStage[bitIx];
 		accesses ^= static_cast<ACCESS_FLAGS>(0x1u<<bitIx);
 	}

From a001472a7c3877a463f8df7f572e2c533ee0389a Mon Sep 17 00:00:00 2001
From: kevyuu <kevin.kayu@gmail.com>
Date: Wed, 23 Jul 2025 20:10:29 +0700
Subject: [PATCH 29/40] Remove unused include

---
 src/nbl/asset/utils/CGeometryCreator.cpp | 108 +++++++----------------
 1 file changed, 32 insertions(+), 76 deletions(-)

diff --git a/src/nbl/asset/utils/CGeometryCreator.cpp b/src/nbl/asset/utils/CGeometryCreator.cpp
index 48a07f0eda..ed1788f543 100644
--- a/src/nbl/asset/utils/CGeometryCreator.cpp
+++ b/src/nbl/asset/utils/CGeometryCreator.cpp
@@ -5,13 +5,11 @@
 
 #include "nbl/asset/utils/CGeometryCreator.h"
 #include "nbl/builtin/hlsl/tgmath.hlsl"
+#include "nbl/builtin/hlsl/math/linalg/transform.hlsl"
 
-#include <iostream>
-#include <iomanip>
 #include <cmath>
 #include <cstdint>
 
-
 namespace nbl::asset
 {
 
@@ -159,6 +157,12 @@ static ICPUPolygonGeometry::SDataView createSnormNormalView(size_t normalCount,
   };
 }
 
+static void encodeUv(hlsl::vector<uint16_t, 2>* uvDst, hlsl::float32_t2 uvSrc)
+{
+  uint32_t u32_uv = hlsl::packUnorm2x16(uvSrc);
+  memcpy(uvDst, &u32_uv, sizeof(uint16_t) * 2);
+}
+
 core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCube(const hlsl::float32_t3 size) const
 {
 	using namespace hlsl;
@@ -458,7 +462,7 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createSphere(float
 				//if (y==0)
 				//{
 				if (normal.y != -1.0f && normal.y != 1.0f)
-					tu = static_cast<float>(acos(core::clamp(normal.x / sinay, -1.0, 1.0)) * 0.5 * numbers::inv_pi<float32_t>());
+					tu = static_cast<float>(acos(core::clamp(normal.x / sinay, -1.0, 1.0)) * 0.5 * numbers::inv_pi<float32_t>);
 				if (normal.z < 0.0f)
 					tu = 1 - tu;
 				//}
@@ -466,8 +470,7 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createSphere(float
 					//tu = ((float*)(tmpMem+(i-polyCountXPitch)*vertexSize))[4];
 
 				positions[vertex_i] = pos;
-				float32_t2 f32_uv = { tu, static_cast<float>(ay * numbers::inv_pi<float32_t>()) };
-				encodePixels<get_uv_format<uv_element_t>(), float>(uvs + vertex_i, f32_uv.data.data);
+				encodeUv(uvs + vertex_i, float32_t2(tu, static_cast<float>(ay* numbers::inv_pi<float32_t>)));
 				memcpy(normals + vertex_i, &quantizedNormal, sizeof(quantizedNormal));
 
 				vertex_i++;
@@ -476,7 +479,7 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createSphere(float
 			// This is the doubled vertex on the initial position
 
 			positions[vertex_i] = positions[old_vertex_i];
-			uvs[vertex_i] = { 127, uvs[old_vertex_i].y };
+			uvs[vertex_i] = { UnityUV, uvs[old_vertex_i].y };
 			normals[vertex_i] = normals[old_vertex_i];
 
 			vertex_i++;
@@ -583,12 +586,11 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCylinder(
 
 		positions[i] = { p.x, p.y, p.z };
 		memcpy(normals + i, &n, sizeof(n));
-		float32_t2 f32_uv = { f_i * tesselationRec, 0.f };
-		encodePixels<get_uv_format<uv_element_t>(), float>(uvs + i, f32_uv.data.data);
+		encodeUv(uvs + i, float32_t2(f_i * tesselationRec, 0.f));
 
 		positions[i + halfIx] = { p.x, p.y, length };
 		normals[i + halfIx] = normals[i];
-		uvs[i + halfIx] = { UnityUV, 0 };
+		uvs[i + halfIx] = { 1.f * tesselationRec, UnityUV };
 	}
 
 	CPolygonGeometryManipulator::recomputeContentHashes(retval.get());
@@ -602,9 +604,7 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCone(
 
 	using namespace hlsl;
 
-	CQuantNormalCache* const quantNormalCache = quantNormalCacheOverride == nullptr ? m_params.normalCache.get() : quantNormalCacheOverride;
-
-	const uint32_t u32_vertexCount = 2 * tesselation;
+	const uint32_t u32_vertexCount = tesselation + 1;
 	if (u32_vertexCount > std::numeric_limits<uint16_t>::max())
 		return nullptr;
 	const auto vertexCount = static_cast<uint16_t>(u32_vertexCount);
@@ -615,31 +615,25 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCone(
 	// Create indices
 	using index_t = uint16_t;
 	{
-		constexpr uint32_t RowCount = 2u;
 		const auto IndexCount = 3 * tesselation;
 
 		auto indexView = createIndexView<index_t>(IndexCount, vertexCount - 1);
 		auto u = reinterpret_cast<index_t*>(indexView.src.buffer->getPointer());
 
-		const uint32_t firstIndexOfBaseVertices = 0;
-		const uint32_t firstIndexOfApexVertices = tesselation;
+		const uint32_t apexVertexIndex = tesselation;
 
 		for (uint32_t i = 0; i < tesselation; i++)
 		{
-			u[i * 3] = firstIndexOfApexVertices + i;
-			u[(i * 3) + 1] = firstIndexOfBaseVertices + i;
-			u[(i * 3) + 2] = i == (tesselation - 1) ? firstIndexOfBaseVertices : firstIndexOfBaseVertices + i + 1;
+			u[i * 3] = apexVertexIndex;
+			u[(i * 3) + 1] = i;
+			u[(i * 3) + 2] = i == (tesselation - 1) ? 0 : i + 1;
 		}
 
 		retval->setIndexView(std::move(indexView));
 	}
 
-	constexpr auto NormalCacheFormat = EF_R8G8B8_SNORM;
-	constexpr auto NormalFormat = EF_R8G8B8A8_SNORM;
-
 	// Create vertex attributes with NONE usage because we have no clue how they'll be used
 	hlsl::float32_t3* positions;
-	snorm_normal_t* normals;
 	{
 		{
 			shapes::AABB<4, float32_t> aabb;
@@ -649,14 +643,6 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCone(
 			positions = reinterpret_cast<decltype(positions)>(positionView.src.buffer->getPointer());
 			retval->setPositionView(std::move(positionView));
 		}
-		{
-			shapes::AABB<4, int8_t> aabb;
-			aabb.maxVx = snorm_all_ones;
-			aabb.minVx = -aabb.maxVx;
-			auto normalView = createSnormNormalView(vertexCount, aabb);
-			normals = reinterpret_cast<decltype(normals)>(normalView.src.buffer->getPointer());
-			retval->setNormalView(std::move(normalView));
-		}
 	}
 
 	const float step = (2.f*core::PI<float>()) / tesselation;
@@ -669,38 +655,15 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCone(
 	{
 		hlsl::float32_t3 v(std::cos(i * step), 0.0f, std::sin(i * step));
 		v *= radius;
-
-		positions[i] = { v.x, v.y, v.z };
-		positions[apexVertexBase_i + i] = { apexVertexCoords.x, apexVertexCoords.y, apexVertexCoords.z };
-
-		const auto simdPosition = hlsl::float32_t3(positions[i].x, positions[i].y, positions[i].z);
-		const hlsl::float32_t3 v0ToApex = apexVertexCoords - simdPosition;
-
-		uint32_t nextVertexIndex = i == (tesselation - 1) ? 0 : i + 1;
-		hlsl::float32_t3 u1 = hlsl::float32_t3(positions[nextVertexIndex].x, positions[nextVertexIndex].y, positions[nextVertexIndex].z);
-		u1 -= simdPosition;
-		float angleWeight = std::acos(hlsl::dot(hlsl::normalize(apexVertexCoords), hlsl::normalize(u1)));
-		u1 = hlsl::normalize(hlsl::cross(v0ToApex, u1)) * angleWeight;
-
-		uint32_t prevVertexIndex = i == 0 ? (tesselation - 1) : i - 1;
-		hlsl::float32_t3 u2 = hlsl::float32_t3(positions[prevVertexIndex].x, positions[prevVertexIndex].y, positions[prevVertexIndex].z);
-		u2 -= simdPosition;
-		angleWeight = std::acos(hlsl::dot(hlsl::normalize(apexVertexCoords), hlsl::normalize(u2)));
-		u2 = hlsl::normalize(hlsl::cross(u2, v0ToApex)) * angleWeight;
-
-
-		const auto baseNormal = quantNormalCache->quantize<NormalCacheFormat>(hlsl::normalize(u1 + u2));
-		memcpy(normals + i, &baseNormal, sizeof(baseNormal));
-
-		const auto apexNormal = quantNormalCache->quantize<NormalCacheFormat>(hlsl::normalize(u1));
-		memcpy(normals + apexVertexBase_i + i, &apexNormal, sizeof(apexNormal));
+		positions[i] = v;
 	}
+  positions[apexVertexBase_i] = apexVertexCoords;
 
 	CPolygonGeometryManipulator::recomputeContentHashes(retval.get());
 	return retval;
 }
 
-core::vector<core::smart_refctd_ptr<ICPUPolygonGeometry>> CGeometryCreator::createArrow(
+core::smart_refctd_ptr<ICPUGeometryCollection> CGeometryCreator::createArrow(
 	const uint16_t tesselationCylinder,
 	const uint16_t tesselationCone,
 	const float height,
@@ -711,25 +674,20 @@ core::vector<core::smart_refctd_ptr<ICPUPolygonGeometry>> CGeometryCreator::crea
 {
 	assert(height > cylinderHeight);
 
-	using position_t = hlsl::float32_t3;
-
 	auto cylinder = createCylinder(width0, cylinderHeight, tesselationCylinder);
 	auto cone = createCone(width1, height-cylinderHeight, tesselationCone);
 
-	auto conePositions = reinterpret_cast<position_t*>(cone->getPositionView().src.buffer->getPointer());
-
-	const auto coneVertexCount = cone->getPositionView().getElementCount();
-	
-	for (auto i = 0ull; i < coneVertexCount; ++i)
-	{
-		auto& conePosition = conePositions[i];
-		core::vector3df_SIMD newPos(conePosition.x, conePosition.y, conePosition.z);
-		newPos.rotateYZByRAD(-1.5707963268);
-
-		conePosition = {newPos.x, newPos.y, newPos.z};
-	}
-
-	return {cylinder, cone};
+	auto collection = core::make_smart_refctd_ptr<ICPUGeometryCollection>();
+	auto* geometries = collection->getGeometries();
+	geometries->push_back({
+		.geometry = cylinder
+  });
+	const auto coneTransform = hlsl::math::linalg::rotation_mat(-1.5707963268f, hlsl::float32_t3(1.f, 0.f, 0.f));
+	geometries->push_back({
+		.transform = hlsl::float32_t3x4(coneTransform),
+		.geometry = cone
+  });
+  return collection;
 
 }
 
@@ -1855,9 +1813,7 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createIcoSphere(fl
 			for (auto uv_i = 0u; uv_i < icosphere.getVertexCount(); uv_i++)
 			{
 				const auto texCoords = icosphere.getTexCoords();
-				const auto f32_uv = float32_t2{ texCoords[2 * uv_i], texCoords[(2 * uv_i) + 1] };
-				const auto u32_uv = packUnorm2x16(f32_uv);
-				memcpy(uvs + uv_i, &u32_uv, sizeof(u32_uv));
+				encodeUv(uvs + uv_i, float32_t2(texCoords[2 * uv_i], texCoords[(2 * uv_i) + 1]));
 			}
 
 			retval->getAuxAttributeViews()->push_back(std::move(uvView));

From f8e837bc885c4ed07bde3bc1836dda0ffb3f0c29 Mon Sep 17 00:00:00 2001
From: kevyuu <kevin.kayu@gmail.com>
Date: Fri, 25 Jul 2025 12:15:06 +0700
Subject: [PATCH 30/40] Fix createArrow to return ICPUGeometryCollection
 instead of vector of polygon geometry

---
 include/nbl/asset/utils/CGeometryCreator.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/include/nbl/asset/utils/CGeometryCreator.h b/include/nbl/asset/utils/CGeometryCreator.h
index 54f2664bc7..52e8fb1495 100644
--- a/include/nbl/asset/utils/CGeometryCreator.h
+++ b/include/nbl/asset/utils/CGeometryCreator.h
@@ -11,6 +11,8 @@
 // legacy, needs to be removed
 #include "SColor.h"
 
+#include "nbl/asset/ICPUGeometryCollection.h"
+
 
 namespace nbl::asset
 {
@@ -58,7 +60,7 @@ class NBL_API2 CGeometryCreator final : public core::IReferenceCounted
 		\param colorCone color of the cone
 		\return Generated mesh.
 		*/
-		core::vector<core::smart_refctd_ptr<ICPUPolygonGeometry>> createArrow(const uint16_t tesselationCylinder = 4,
+    core::smart_refctd_ptr<ICPUGeometryCollection> createArrow(const uint16_t tesselationCylinder = 4,
 				const uint16_t tesselationCone = 8, const float height = 1.f,
 				const float cylinderHeight = 0.6f, const float widthCylinder = 0.05f,
 				const float widthCone = 0.3f) const;

From 6552952083cf380330e0733db5b0f3abfa308bdf Mon Sep 17 00:00:00 2001
From: kevyuu <kevin.kayu@gmail.com>
Date: Fri, 25 Jul 2025 12:15:21 +0700
Subject: [PATCH 31/40] Add more ray tracing intersection query

---
 .../builtin/hlsl/spirv_intrinsics/raytracing.hlsl | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/include/nbl/builtin/hlsl/spirv_intrinsics/raytracing.hlsl b/include/nbl/builtin/hlsl/spirv_intrinsics/raytracing.hlsl
index 3a49450d7c..41f56e225e 100644
--- a/include/nbl/builtin/hlsl/spirv_intrinsics/raytracing.hlsl
+++ b/include/nbl/builtin/hlsl/spirv_intrinsics/raytracing.hlsl
@@ -46,11 +46,21 @@ bool rayQueryProceedKHR([[vk::ext_reference]] RayQueryKHR query);
 [[vk::ext_instruction(spv::OpRayQueryGetIntersectionTypeKHR)]]
 int rayQueryGetIntersectionTypeKHR([[vk::ext_reference]] RayQueryKHR query, uint32_t committed);
 
+[[vk::ext_capability(spv::CapabilityRayQueryKHR)]]
+[[vk::ext_extension("SPV_KHR_ray_query")]]
+[[vk::ext_instruction(spv::OpRayQueryGetIntersectionInstanceCustomIndexKHR)]]
+int rayQueryGetIntersectionInstanceCustomIndexKHR([[vk::ext_reference]] RayQueryKHR query, uint32_t committed);
+
 [[vk::ext_capability(spv::CapabilityRayQueryKHR)]]
 [[vk::ext_extension("SPV_KHR_ray_query")]]
 [[vk::ext_instruction(spv::OpRayQueryGetIntersectionInstanceIdKHR)]]
 int rayQueryGetIntersectionInstanceIdKHR([[vk::ext_reference]] RayQueryKHR query, uint32_t committed);
 
+[[vk::ext_capability(spv::CapabilityRayQueryKHR)]]
+[[vk::ext_extension("SPV_KHR_ray_query")]]
+[[vk::ext_instruction(spv::OpRayQueryGetIntersectionGeometryIndexKHR)]]
+int rayQueryGetIntersectionGeometryIndexKHR([[vk::ext_reference]] RayQueryKHR query, uint32_t committed);
+
 [[vk::ext_capability(spv::CapabilityRayQueryKHR)]]
 [[vk::ext_extension("SPV_KHR_ray_query")]]
 [[vk::ext_instruction(spv::OpRayQueryGetIntersectionPrimitiveIndexKHR)]]
@@ -61,6 +71,11 @@ int rayQueryGetIntersectionPrimitiveIndexKHR([[vk::ext_reference]] RayQueryKHR q
 [[vk::ext_instruction(spv::OpRayQueryGetIntersectionBarycentricsKHR)]]
 float2 rayQueryGetIntersectionBarycentricsKHR([[vk::ext_reference]] RayQueryKHR query, uint32_t committed);
 
+[[vk::ext_capability(spv::CapabilityRayQueryKHR)]]
+[[vk::ext_extension("SPV_KHR_ray_query")]]
+[[vk::ext_instruction(spv::OpRayQueryGetIntersectionFrontFaceKHR)]]
+float2 rayQueryGetIntersectionFrontFaceKHR([[vk::ext_reference]] RayQueryKHR query, uint32_t committed);
+
 // position fetch for ray tracing uses gl_HitTriangleVertexPositionsEXT -> HitTriangleVertexPositionsKHR decorated OpVariable
 [[vk::ext_builtin_input(spv::BuiltInHitTriangleVertexPositionsKHR)]]
 static const float32_t3 HitTriangleVertexPositionsKHR[3];

From e0b30d0d58aa9de31f2a083f75ec5a4aad9c272c Mon Sep 17 00:00:00 2001
From: kevyuu <kevin.kayu@gmail.com>
Date: Fri, 25 Jul 2025 12:15:39 +0700
Subject: [PATCH 32/40] Add transform.hlsl to cmakelists

---
 src/nbl/builtin/CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt
index fcbe58eb41..e3a59b2b50 100644
--- a/src/nbl/builtin/CMakeLists.txt
+++ b/src/nbl/builtin/CMakeLists.txt
@@ -290,6 +290,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/format/shared_exp.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/format.hlsl")
 #linear algebra
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/linalg/fast_affine.hlsl")
+LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/linalg/transform.hlsl")
 # TODO: rename `equations` to `polynomials` probably
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/functions.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/geometry.hlsl")

From c6dd9ac8d0c74db8f4d1564ea85d71cc17667601 Mon Sep 17 00:00:00 2001
From: kevyuu <kevin.kayu@gmail.com>
Date: Fri, 25 Jul 2025 13:19:29 +0700
Subject: [PATCH 33/40] Transform data minimum alignment fix

---
 include/nbl/asset/IAccelerationStructure.h  | 2 ++
 src/nbl/video/utilities/CAssetConverter.cpp | 6 +++---
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/include/nbl/asset/IAccelerationStructure.h b/include/nbl/asset/IAccelerationStructure.h
index 829d10bcd8..6caa63ddfa 100644
--- a/include/nbl/asset/IAccelerationStructure.h
+++ b/include/nbl/asset/IAccelerationStructure.h
@@ -23,6 +23,8 @@ namespace nbl::asset
 class IAccelerationStructure : public virtual core::IReferenceCounted
 {
 	public:
+		static constexpr inline size_t TRANSFORM_DATA_MIN_ALIGNMENT = 16;
+
 		// build flags, we don't expose flags that don't make sense for certain levels
 		enum class BUILD_FLAGS : uint8_t
 		{
diff --git a/src/nbl/video/utilities/CAssetConverter.cpp b/src/nbl/video/utilities/CAssetConverter.cpp
index d004660e42..bd64912bfc 100644
--- a/src/nbl/video/utilities/CAssetConverter.cpp
+++ b/src/nbl/video/utilities/CAssetConverter.cpp
@@ -3024,7 +3024,7 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
 									uint16_t alignment = hlsl::max(0x1u<<hlsl::findLSB(geom.vertexStride),32u);
 									if (geom.hasTransform())
 									{
-										size = core::alignUp(size,alignof(float))+sizeof(hlsl::float32_t3x4);
+										size = core::alignUp(size, IAccelerationStructure::TRANSFORM_DATA_MIN_ALIGNMENT)+sizeof(hlsl::float32_t3x4);
 										alignment = hlsl::max<uint16_t>(alignof(float),alignment);
 									}
 									uint16_t indexSize = 0;
@@ -5061,7 +5061,7 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
 									uint16_t alignment = hlsl::max(0x1u<<hlsl::findLSB(geom.vertexStride),32u);
 									if (geom.hasTransform())
 									{
-										size = core::alignUp(size,alignof(float))+sizeof(hlsl::float32_t3x4);
+										size = core::alignUp(size, IAccelerationStructure::TRANSFORM_DATA_MIN_ALIGNMENT)+sizeof(hlsl::float32_t3x4);
 										alignment = hlsl::max<uint16_t>(alignof(float),alignment);
 									}
 									uint16_t indexSize = 0u;
@@ -5265,7 +5265,7 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
 									}
 									if (geom.hasTransform())
 									{
-										offset = core::alignUp(offset,alignof(float));
+										offset = core::alignUp(offset, IAccelerationStructure::TRANSFORM_DATA_MIN_ALIGNMENT);
 										outGeom.transform = {.offset=offset,.buffer=smart_refctd_ptr<const IGPUBuffer>(scratchBuffer)};
 										memcpyCallback.data = &geom.transform;
 										if (!streamDataToScratch(offset,sizeof(geom.transform),memcpyCallback))

From 86dae5588894d664a494bbc12c3635783720b29e Mon Sep 17 00:00:00 2001
From: kevyuu <kevin.kayu@gmail.com>
Date: Fri, 25 Jul 2025 17:33:54 +0700
Subject: [PATCH 34/40] Fix after merge with master

---
 src/nbl/CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/nbl/CMakeLists.txt b/src/nbl/CMakeLists.txt
index 15c8237585..c567a5b4b7 100755
--- a/src/nbl/CMakeLists.txt
+++ b/src/nbl/CMakeLists.txt
@@ -151,6 +151,7 @@ set(NBL_UI_SOURCES
 )
 set(NBL_ASSET_SOURCES	
 # Assets
+	asset/ECommonEnums.cpp
 	asset/IAsset.cpp
 	asset/IRenderpass.cpp
 	asset/IAssetManager.cpp

From c7caf761e3387dfd019c32bd0bc467b6701508ee Mon Sep 17 00:00:00 2001
From: kevyuu <kevin.kayu@gmail.com>
Date: Sat, 26 Jul 2025 17:26:01 +0700
Subject: [PATCH 35/40] Fix uninitialized bug in quantization cache

---
 include/nbl/asset/utils/CDirQuantCacheBase.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/nbl/asset/utils/CDirQuantCacheBase.h b/include/nbl/asset/utils/CDirQuantCacheBase.h
index 302d6ae39b..d8f6f7b28c 100644
--- a/include/nbl/asset/utils/CDirQuantCacheBase.h
+++ b/include/nbl/asset/utils/CDirQuantCacheBase.h
@@ -496,12 +496,13 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl::
 			};
 
 			constexpr uint32_t cubeHalfSize = (0x1u << quantizationBits) - 1u;
+			const auto test = core::vectorSIMDf(cubeHalfSize);
 			const hlsl::vector<hlsl::float32_t, dimensions> cubeHalfSizeND = hlsl::vector<hlsl::float32_t, dimensions>(cubeHalfSize);
 			for (uint32_t n=cubeHalfSize; n>0u; n--)
 			{
 				//we'd use float addition in the interest of speed, to increment the loop
 				//but adding a small number to a large one loses precision, so multiplication preferrable
-				const auto bottomFit = floor(fittingVector * float(n) + floorOffset);
+				const auto bottomFit = glm::floor(fittingVector * float(n) + floorOffset);
 				if (hlsl::all(glm::lessThanEqual(bottomFit, cubeHalfSizeND)))
 					evaluateFit(bottomFit);
 				for (auto i = 0u; i < cornerCount; i++)

From ae5a7553c71872283968891183782e77ff0cb14c Mon Sep 17 00:00:00 2001
From: kevyuu <kevin.kayu@gmail.com>
Date: Sat, 26 Jul 2025 17:26:13 +0700
Subject: [PATCH 36/40] Add some comment in quantization cache

---
 include/nbl/asset/utils/CDirQuantCacheBase.h | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/include/nbl/asset/utils/CDirQuantCacheBase.h b/include/nbl/asset/utils/CDirQuantCacheBase.h
index d8f6f7b28c..d8e01ed02f 100644
--- a/include/nbl/asset/utils/CDirQuantCacheBase.h
+++ b/include/nbl/asset/utils/CDirQuantCacheBase.h
@@ -429,11 +429,16 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl::
 					retval.w = mask.w ? val2.w : val1.w;
 					return retval;
       };
-
+;
+      // create all one bits
       const hlsl::uint32_t4 xorflag((0x1u << (quantizationBits + 1u)) - 1u);
+
+      // for positive number xoring with 0 keep its value
+      // for negative number we xor with all one which will flip the bits, then we add one later. Flipping the bits then adding one will turn positive number into negative number
       auto restoredAsVec = quantized.getValue() ^ switch_vec(hlsl::uint32_t4(0u), hlsl::uint32_t4(xorflag), negativeMask);
       restoredAsVec += switch_vec(hlsl::uint32_t4(0u), hlsl::uint32_t4(1u), negativeMask);
-      return value_type_t<CacheFormat>(restoredAsVec & xorflag);
+
+      return value_type_t<CacheFormat>(restoredAsVec);
 		}
 
 		template<uint32_t dimensions, uint32_t quantizationBits>
@@ -446,7 +451,7 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl::
 
 			//
 			hlsl::vector<hlsl::float32_t, dimensions> fittingVector;
-			hlsl::vector<hlsl::float32_t, dimensions> floorOffset;
+			hlsl::vector<hlsl::float32_t, dimensions> floorOffset = {};
 			constexpr uint32_t cornerCount = (0x1u<<(dimensions-1u))-1u;
 			hlsl::vector<hlsl::float32_t, dimensions> corners[cornerCount] = {};
 			{

From 02d6c6dc8c9200225f35f8c292fb0edf227503cb Mon Sep 17 00:00:00 2001
From: kevyuu <kevin.kayu@gmail.com>
Date: Mon, 28 Jul 2025 13:59:01 +0700
Subject: [PATCH 37/40] Fix indentation in CGeometryCreator.cpp

---
 src/nbl/asset/utils/CGeometryCreator.cpp | 108 +++++++++++------------
 1 file changed, 54 insertions(+), 54 deletions(-)

diff --git a/src/nbl/asset/utils/CGeometryCreator.cpp b/src/nbl/asset/utils/CGeometryCreator.cpp
index ed1788f543..d844dca857 100644
--- a/src/nbl/asset/utils/CGeometryCreator.cpp
+++ b/src/nbl/asset/utils/CGeometryCreator.cpp
@@ -18,44 +18,44 @@ namespace nbl::asset
 		using snorm_normal_t = hlsl::vector<int8_t, 4>;
 		constexpr int8_t snorm_one = std::numeric_limits<int8_t>::max();
 		constexpr int8_t snorm_neg_one = std::numeric_limits<int8_t>::min();
-    constexpr auto snorm_positive_x = hlsl::vector<int8_t, 4>(snorm_one, 0, 0, 0);
-    constexpr auto snorm_negative_x = hlsl::vector<int8_t, 4>(snorm_neg_one, 0, 0, 0);
+		constexpr auto snorm_positive_x = hlsl::vector<int8_t, 4>(snorm_one, 0, 0, 0);
+		constexpr auto snorm_negative_x = hlsl::vector<int8_t, 4>(snorm_neg_one, 0, 0, 0);
 		constexpr auto snorm_positive_y = hlsl::vector<int8_t, 4>(0, snorm_one, 0, 0);
 		constexpr auto snorm_negative_y = hlsl::vector<int8_t, 4>(0, snorm_neg_one, 0, 0);
-    constexpr auto snorm_positive_z = hlsl::vector<int8_t, 4>(0, 0, snorm_one, 0);
+		constexpr auto snorm_positive_z = hlsl::vector<int8_t, 4>(0, 0, snorm_one, 0);
 		constexpr auto snorm_negative_z = hlsl::vector<int8_t, 4>(0, 0, snorm_neg_one, 0);
 
 		constexpr auto snorm_all_ones = hlsl::vector<int8_t, 4>(snorm_one, snorm_one, snorm_one, snorm_one);
 
-    template <typename ElementT>
-      requires(std::is_same_v<ElementT, uint8_t> || std::is_same_v<ElementT, uint16_t>)
+		template <typename ElementT>
+			requires(std::is_same_v<ElementT, uint8_t> || std::is_same_v<ElementT, uint16_t>)
 		constexpr E_FORMAT get_uv_format()
-    {
-      if constexpr(std::is_same_v<ElementT, uint8_t>)
-      {
+		{
+			if constexpr(std::is_same_v<ElementT, uint8_t>)
+			{
 				return EF_R8G8_UNORM;
-      } else
-      {
+			} else
+			{
 				return EF_R16G16_UNORM;
-      }
-    }
+			}
+		}
 
 }
 
 template <typename ElementT>
-  requires(std::is_same_v<ElementT, uint8_t> || std::is_same_v<ElementT, uint16_t>)
+	requires(std::is_same_v<ElementT, uint8_t> || std::is_same_v<ElementT, uint16_t>)
 static ICPUPolygonGeometry::SDataView createUvView(size_t vertexCount)
 {
 	const auto elementCount = 2;
 	const auto attrSize = sizeof(ElementT) * elementCount;
-  auto buff = ICPUBuffer::create({{attrSize * vertexCount,IBuffer::EUF_NONE}});
-  hlsl::shapes::AABB<4, ElementT> aabb;
-  aabb.minVx = hlsl::vector<ElementT, 4>(0,0,0,0);
-  aabb.maxVx = hlsl::vector<ElementT, 4>(std::numeric_limits<ElementT>::max(), std::numeric_limits<ElementT>::max(), 0, 0);
+	auto buff = ICPUBuffer::create({{attrSize * vertexCount,IBuffer::EUF_NONE}});
+	hlsl::shapes::AABB<4, ElementT> aabb;
+	aabb.minVx = hlsl::vector<ElementT, 4>(0,0,0,0);
+	aabb.maxVx = hlsl::vector<ElementT, 4>(std::numeric_limits<ElementT>::max(), std::numeric_limits<ElementT>::max(), 0, 0);
 
 	auto retval = ICPUPolygonGeometry::SDataView{
 		.composed = {
-      .stride = attrSize,
+			.stride = attrSize,
 		},
 		.src = {
 			.offset = 0,
@@ -81,22 +81,22 @@ static ICPUPolygonGeometry::SDataView createUvView(size_t vertexCount)
 }
 
 template <typename IndexT>
-  requires(std::is_same_v<IndexT, uint16_t> || std::is_same_v<IndexT, uint32_t>)
+	requires(std::is_same_v<IndexT, uint16_t> || std::is_same_v<IndexT, uint32_t>)
 static ICPUPolygonGeometry::SDataView createIndexView(size_t indexCount, size_t maxIndex)
 {
-  
-  const auto bytesize = sizeof(IndexT) * indexCount;
-  auto indices = ICPUBuffer::create({bytesize,IBuffer::EUF_INDEX_BUFFER_BIT});
+	
+	const auto bytesize = sizeof(IndexT) * indexCount;
+	auto indices = ICPUBuffer::create({bytesize,IBuffer::EUF_INDEX_BUFFER_BIT});
 
-  hlsl::shapes::AABB<4,IndexT> aabb;
-  aabb.minVx[0] = 0;
-  aabb.maxVx[0] = maxIndex;
+	hlsl::shapes::AABB<4,IndexT> aabb;
+	aabb.minVx[0] = 0;
+	aabb.maxVx[0] = maxIndex;
 
 	auto retval = ICPUPolygonGeometry::SDataView{
-	  .composed = {
-      .stride = sizeof(IndexT),
-    },
-    .src = {.offset = 0,.size = bytesize,.buffer = std::move(indices)},
+		.composed = {
+			.stride = sizeof(IndexT),
+		},
+		.src = {.offset = 0,.size = bytesize,.buffer = std::move(indices)},
 	};
 
 	if constexpr(std::is_same_v<IndexT, uint16_t>)
@@ -116,20 +116,20 @@ static ICPUPolygonGeometry::SDataView createIndexView(size_t indexCount, size_t
 }
 
 template <size_t ElementCountV = 3>
-  requires(ElementCountV > 0 && ElementCountV <= 4)
+	requires(ElementCountV > 0 && ElementCountV <= 4)
 static ICPUPolygonGeometry::SDataView createPositionView(size_t positionCount, const hlsl::shapes::AABB<4, hlsl::float32_t>& aabb)
 {
 	using position_t = hlsl::vector<hlsl::float32_t, ElementCountV>;
 	constexpr auto AttrSize = sizeof(position_t);
-  auto buff = ICPUBuffer::create({AttrSize * positionCount,IBuffer::EUF_NONE});
+	auto buff = ICPUBuffer::create({AttrSize * positionCount,IBuffer::EUF_NONE});
 
 	constexpr auto format = []()
 	{
-    if constexpr (ElementCountV == 1) return EF_R32_SFLOAT;
-    if constexpr (ElementCountV == 2) return EF_R32G32_SFLOAT;
-    if constexpr (ElementCountV == 3) return EF_R32G32B32_SFLOAT;
-    if constexpr (ElementCountV == 4) return EF_R32G32B32A32_SFLOAT;
-  }();
+		if constexpr (ElementCountV == 1) return EF_R32_SFLOAT;
+		if constexpr (ElementCountV == 2) return EF_R32G32_SFLOAT;
+		if constexpr (ElementCountV == 3) return EF_R32G32B32_SFLOAT;
+		if constexpr (ElementCountV == 4) return EF_R32G32B32A32_SFLOAT;
+	}();
 
 	return {
 		.composed = {
@@ -145,22 +145,22 @@ static ICPUPolygonGeometry::SDataView createPositionView(size_t positionCount, c
 static ICPUPolygonGeometry::SDataView createSnormNormalView(size_t normalCount, const hlsl::shapes::AABB<4, int8_t>& aabb)
 {
 	constexpr auto AttrSize = sizeof(snorm_normal_t);
-  auto buff = ICPUBuffer::create({AttrSize * normalCount,IBuffer::EUF_NONE});
-  return {
-    .composed = {
-      .encodedDataRange = {.s8=aabb},
-      .stride = AttrSize,
-      .format = EF_R8G8B8A8_SNORM,
-      .rangeFormat = IGeometryBase::EAABBFormat::S8_NORM
-    },
-    .src = {.offset=0,.size=buff->getSize(),.buffer=std::move(buff)}
-  };
+	auto buff = ICPUBuffer::create({AttrSize * normalCount,IBuffer::EUF_NONE});
+	return {
+		.composed = {
+			.encodedDataRange = {.s8=aabb},
+			.stride = AttrSize,
+			.format = EF_R8G8B8A8_SNORM,
+			.rangeFormat = IGeometryBase::EAABBFormat::S8_NORM
+		},
+		.src = {.offset=0,.size=buff->getSize(),.buffer=std::move(buff)}
+	};
 }
 
 static void encodeUv(hlsl::vector<uint16_t, 2>* uvDst, hlsl::float32_t2 uvSrc)
 {
-  uint32_t u32_uv = hlsl::packUnorm2x16(uvSrc);
-  memcpy(uvDst, &u32_uv, sizeof(uint16_t) * 2);
+	uint32_t u32_uv = hlsl::packUnorm2x16(uvSrc);
+	memcpy(uvDst, &u32_uv, sizeof(uint16_t) * 2);
 }
 
 core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCube(const hlsl::float32_t3 size) const
@@ -221,7 +221,7 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCube(const h
 		}
 
 		{
-      auto uvView = createUvView<uv_element_t>(CubeUniqueVertices);
+			auto uvView = createUvView<uv_element_t>(CubeUniqueVertices);
 			uvs = reinterpret_cast<decltype(uvs)>(uvView.src.buffer->getPointer());
 			retval->getAuxAttributeViews()->push_back(std::move(uvView));
 		}
@@ -657,7 +657,7 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCone(
 		v *= radius;
 		positions[i] = v;
 	}
-  positions[apexVertexBase_i] = apexVertexCoords;
+	positions[apexVertexBase_i] = apexVertexCoords;
 
 	CPolygonGeometryManipulator::recomputeContentHashes(retval.get());
 	return retval;
@@ -681,13 +681,13 @@ core::smart_refctd_ptr<ICPUGeometryCollection> CGeometryCreator::createArrow(
 	auto* geometries = collection->getGeometries();
 	geometries->push_back({
 		.geometry = cylinder
-  });
+	});
 	const auto coneTransform = hlsl::math::linalg::rotation_mat(-1.5707963268f, hlsl::float32_t3(1.f, 0.f, 0.f));
 	geometries->push_back({
 		.transform = hlsl::float32_t3x4(coneTransform),
 		.geometry = cone
-  });
-  return collection;
+	});
+	return collection;
 
 }
 
@@ -1806,7 +1806,7 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createIcoSphere(fl
 		}
 		{
 			using uv_element_t = uint16_t;
-      hlsl::vector<uv_element_t, 2>* uvs;
+			hlsl::vector<uv_element_t, 2>* uvs;
 			auto uvView = createUvView<uv_element_t>(icosphere.getVertexCount());
 			uvs = reinterpret_cast<decltype(uvs)>(uvView.src.buffer->getPointer());
 

From bb45773371627c13adac4e5fb71599ee030df669 Mon Sep 17 00:00:00 2001
From: kevyuu <kevin.kayu@gmail.com>
Date: Mon, 28 Jul 2025 14:03:18 +0700
Subject: [PATCH 38/40] Fix indentation of CDirQuantCacheBase.h

---
 include/nbl/asset/utils/CDirQuantCacheBase.h | 60 ++++++++++----------
 1 file changed, 30 insertions(+), 30 deletions(-)

diff --git a/include/nbl/asset/utils/CDirQuantCacheBase.h b/include/nbl/asset/utils/CDirQuantCacheBase.h
index d8e01ed02f..5598364ba6 100644
--- a/include/nbl/asset/utils/CDirQuantCacheBase.h
+++ b/include/nbl/asset/utils/CDirQuantCacheBase.h
@@ -57,7 +57,7 @@ class CDirQuantCacheBase
 					return *this;
 				}
 
-        hlsl::uint32_t4 getValue() const
+				hlsl::uint32_t4 getValue() const
 				{
 					return { x, y, z, 0 };
 				}
@@ -90,7 +90,7 @@ class CDirQuantCacheBase
 					return *this;
 				}
 
-        hlsl::uint32_t4 getValue() const
+				hlsl::uint32_t4 getValue() const
 				{
 					return { x, y, z, w };
 				}
@@ -131,7 +131,7 @@ class CDirQuantCacheBase
 					return storage==other.storage;
 				}
 
-        hlsl::uint32_t4 getValue() const
+				hlsl::uint32_t4 getValue() const
 				{
 					constexpr auto storageBits = quantizationBits + 1u;
 					const auto mask = (0x1u << storageBits) - 1u;
@@ -164,7 +164,7 @@ class CDirQuantCacheBase
 					return *this;
 				}
 
-        hlsl::uint32_t4 getValue() const
+				hlsl::uint32_t4 getValue() const
 				{
 					return { x, y, z, 0 };
 				}
@@ -196,7 +196,7 @@ class CDirQuantCacheBase
 					return *this;
 				}
 
-        hlsl::float32_t4 getValue() const
+				hlsl::float32_t4 getValue() const
 				{
 					return { x, y, z, w };
 				}
@@ -381,21 +381,21 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl::
 		value_type_t<CacheFormat> quantize(const hlsl::vector<hlsl::float32_t, dimensions>& value)
 		{
 			auto to_float32_t4 = [](hlsl::vector<hlsl::float32_t, dimensions> src) -> hlsl::float32_t4
-      {
-        if constexpr(dimensions == 1)
-        {
-          return {src.x, 0, 0, 0};
-        } else if constexpr (dimensions == 2)
-        {
-          return {src.x, src.y, 0, 0};
-        } else if constexpr (dimensions == 3)
-        {
-          return {src.x, src.y, src.z, 0};
-        } else if constexpr (dimensions == 4)
-        {
-          return {src.x, src.y, src.z, src.w};
-        }
-      };
+			{
+				if constexpr(dimensions == 1)
+				{
+					return {src.x, 0, 0, 0};
+				} else if constexpr (dimensions == 2)
+				{
+					return {src.x, src.y, 0, 0};
+				} else if constexpr (dimensions == 3)
+				{
+					return {src.x, src.y, src.z, 0};
+				} else if constexpr (dimensions == 4)
+				{
+					return {src.x, src.y, src.z, src.w};
+				}
+			};
 
 			const auto negativeMask = to_float32_t4(lessThan(value, hlsl::vector<hlsl::float32_t, dimensions>(0.0f)));
 
@@ -414,31 +414,31 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl::
 					const auto fit = findBestFit<dimensions,quantizationBits>(absValue);
 
 					const auto abs_fit = to_float32_t4(abs(fit));
-          quantized = hlsl::uint32_t4(abs_fit.x, abs_fit.y, abs_fit.z, abs_fit.w);
+					quantized = hlsl::uint32_t4(abs_fit.x, abs_fit.y, abs_fit.z, abs_fit.w);
 
 					insertIntoCache<CacheFormat>(key,quantized);
 				}
 			}
 
 			auto switch_vec = [](hlsl::uint32_t4 val1, hlsl::uint32_t4 val2, hlsl::bool4 mask)
-      {
+			{
 					hlsl::uint32_t4 retval;
 					retval.x = mask.x ? val2.x : val1.x;
 					retval.y = mask.y ? val2.y : val1.y;
 					retval.z = mask.z ? val2.z : val1.z;
 					retval.w = mask.w ? val2.w : val1.w;
 					return retval;
-      };
+			};
 ;
-      // create all one bits
-      const hlsl::uint32_t4 xorflag((0x1u << (quantizationBits + 1u)) - 1u);
+			// create all one bits
+			const hlsl::uint32_t4 xorflag((0x1u << (quantizationBits + 1u)) - 1u);
 
-      // for positive number xoring with 0 keep its value
-      // for negative number we xor with all one which will flip the bits, then we add one later. Flipping the bits then adding one will turn positive number into negative number
-      auto restoredAsVec = quantized.getValue() ^ switch_vec(hlsl::uint32_t4(0u), hlsl::uint32_t4(xorflag), negativeMask);
-      restoredAsVec += switch_vec(hlsl::uint32_t4(0u), hlsl::uint32_t4(1u), negativeMask);
+			// for positive number xoring with 0 keep its value
+			// for negative number we xor with all one which will flip the bits, then we add one later. Flipping the bits then adding one will turn positive number into negative number
+			auto restoredAsVec = quantized.getValue() ^ switch_vec(hlsl::uint32_t4(0u), hlsl::uint32_t4(xorflag), negativeMask);
+			restoredAsVec += switch_vec(hlsl::uint32_t4(0u), hlsl::uint32_t4(1u), negativeMask);
 
-      return value_type_t<CacheFormat>(restoredAsVec);
+			return value_type_t<CacheFormat>(restoredAsVec);
 		}
 
 		template<uint32_t dimensions, uint32_t quantizationBits>

From 1bedf2dc2acbbf4e36f39f8b7f9bd85158100998 Mon Sep 17 00:00:00 2001
From: kevyuu <kevin.kayu@gmail.com>
Date: Mon, 28 Jul 2025 14:04:31 +0700
Subject: [PATCH 39/40] Fix indentation of CGeometryCreator.h

---
 include/nbl/asset/utils/CGeometryCreator.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/nbl/asset/utils/CGeometryCreator.h b/include/nbl/asset/utils/CGeometryCreator.h
index 52e8fb1495..bd5281cde3 100644
--- a/include/nbl/asset/utils/CGeometryCreator.h
+++ b/include/nbl/asset/utils/CGeometryCreator.h
@@ -60,7 +60,7 @@ class NBL_API2 CGeometryCreator final : public core::IReferenceCounted
 		\param colorCone color of the cone
 		\return Generated mesh.
 		*/
-    core::smart_refctd_ptr<ICPUGeometryCollection> createArrow(const uint16_t tesselationCylinder = 4,
+		core::smart_refctd_ptr<ICPUGeometryCollection> createArrow(const uint16_t tesselationCylinder = 4,
 				const uint16_t tesselationCone = 8, const float height = 1.f,
 				const float cylinderHeight = 0.6f, const float widthCylinder = 0.05f,
 				const float widthCone = 0.3f) const;

From 5de6b84b35226333db92b6da17add9d045d847ca Mon Sep 17 00:00:00 2001
From: kevyuu <kevin.kayu@gmail.com>
Date: Wed, 30 Jul 2025 18:06:07 +0700
Subject: [PATCH 40/40] Move creationFlags to cached params

---
 include/nbl/asset/IRayTracingPipeline.h     | 26 ++++++++++-----------
 include/nbl/video/IGPUComputePipeline.h     |  4 ++++
 include/nbl/video/IGPUGraphicsPipeline.h    |  4 ++++
 include/nbl/video/IGPURayTracingPipeline.h  | 18 +++++++-------
 include/nbl/video/ILogicalDevice.h          |  2 +-
 src/nbl/video/CVulkanLogicalDevice.cpp      |  2 +-
 src/nbl/video/ILogicalDevice.cpp            |  4 ++--
 src/nbl/video/utilities/CAssetConverter.cpp |  8 +++----
 8 files changed, 38 insertions(+), 30 deletions(-)

diff --git a/include/nbl/asset/IRayTracingPipeline.h b/include/nbl/asset/IRayTracingPipeline.h
index b97d8d7002..c0d8d98ce5 100644
--- a/include/nbl/asset/IRayTracingPipeline.h
+++ b/include/nbl/asset/IRayTracingPipeline.h
@@ -14,18 +14,6 @@ namespace nbl::asset
 class IRayTracingPipelineBase : public virtual core::IReferenceCounted
 {
   public:
-    struct SCachedCreationParams final
-    {
-      uint32_t maxRecursionDepth : 6 = 0;
-      uint32_t dynamicStackSize : 1 = false;
-    };
-};
-
-template<typename PipelineLayoutType>
-class IRayTracingPipeline : public IPipeline<PipelineLayoutType>, public IRayTracingPipelineBase
-{
-  public:
-
     #define base_flag(F) static_cast<uint64_t>(IPipelineBase::FLAGS::F)
     enum class CreationFlags : uint64_t
     {
@@ -43,7 +31,19 @@ class IRayTracingPipeline : public IPipeline<PipelineLayoutType>, public IRayTra
       ALLOW_MOTION = 1<<20,
     };
     #undef base_flag
-    using FLAGS = CreationFlags;
+
+    struct SCachedCreationParams final
+    {
+      core::bitflag<CreationFlags> flags = CreationFlags::NONE;
+      uint32_t maxRecursionDepth : 6 = 0;
+      uint32_t dynamicStackSize : 1 = false;
+    };
+};
+
+template<typename PipelineLayoutType>
+class IRayTracingPipeline : public IPipeline<PipelineLayoutType>, public IRayTracingPipelineBase
+{
+  public:
 
     inline const SCachedCreationParams& getCachedCreationParams() const { return m_params; }
 
diff --git a/include/nbl/video/IGPUComputePipeline.h b/include/nbl/video/IGPUComputePipeline.h
index c7343c131a..9854725cd1 100644
--- a/include/nbl/video/IGPUComputePipeline.h
+++ b/include/nbl/video/IGPUComputePipeline.h
@@ -74,6 +74,10 @@ class IGPUComputePipeline : public IGPUPipeline<asset::IComputePipeline<const IG
                 return {};
             }
 
+            inline core::bitflag<FLAGS>& getFlags() { return flags; }
+
+            inline core::bitflag<FLAGS> getFlags() const { return flags; }
+
             const IGPUPipelineLayout* layout = nullptr;
             // TODO: Could guess the required flags from SPIR-V introspection of declared caps
             core::bitflag<FLAGS> flags = FLAGS::NONE;
diff --git a/include/nbl/video/IGPUGraphicsPipeline.h b/include/nbl/video/IGPUGraphicsPipeline.h
index e5dc7c5d7b..79e1337787 100644
--- a/include/nbl/video/IGPUGraphicsPipeline.h
+++ b/include/nbl/video/IGPUGraphicsPipeline.h
@@ -87,6 +87,10 @@ class IGPUGraphicsPipeline : public IGPUPipeline<asset::IGraphicsPipeline<const
                 return stages;
             }
 
+            inline core::bitflag<FLAGS>& getFlags() { return flags; }
+
+            inline core::bitflag<FLAGS> getFlags() const { return flags; }
+
             const IGPUPipelineLayout* layout = nullptr;
             SShaderSpecInfo vertexShader;
             SShaderSpecInfo tesselationControlShader;
diff --git a/include/nbl/video/IGPURayTracingPipeline.h b/include/nbl/video/IGPURayTracingPipeline.h
index ecdc529542..816cc68243 100644
--- a/include/nbl/video/IGPURayTracingPipeline.h
+++ b/include/nbl/video/IGPURayTracingPipeline.h
@@ -24,7 +24,7 @@ class IGPURayTracingPipeline :  public IGPUPipeline<asset::IRayTracingPipeline<c
 
         struct SCreationParams : public SPipelineCreationParams<const IGPURayTracingPipeline>
         {
-            using FLAGS = pipeline_t::FLAGS;
+            using FLAGS = IRayTracingPipelineBase::CreationFlags;
 
             struct SShaderGroupsParams
             {
@@ -45,8 +45,6 @@ class IGPURayTracingPipeline :  public IGPUPipeline<asset::IRayTracingPipeline<c
             SShaderGroupsParams shaderGroups;
 
             SCachedCreationParams cached = {};
-            // TODO: Could guess the required flags from SPIR-V introspection of declared caps
-            core::bitflag<FLAGS> flags = FLAGS::NONE;
 
             inline SSpecializationValidationResult valid() const
             {
@@ -76,7 +74,7 @@ class IGPURayTracingPipeline :  public IGPUPipeline<asset::IRayTracingPipeline<c
                     }
 
                     // https://docs.vulkan.org/spec/latest/chapters/pipelines.html#VUID-VkRayTracingPipelineCreateInfoKHR-flags-03470
-                    if (flags.hasFlags(FLAGS::NO_NULL_ANY_HIT_SHADERS) && !shaderGroup.anyHit.shader)
+                    if (cached.flags.hasFlags(FLAGS::NO_NULL_ANY_HIT_SHADERS) && !shaderGroup.anyHit.shader)
                         return {};
 
                     if (shaderGroup.anyHit.shader) 
@@ -86,7 +84,7 @@ class IGPURayTracingPipeline :  public IGPUPipeline<asset::IRayTracingPipeline<c
                     }
 
                     // https://docs.vulkan.org/spec/latest/chapters/pipelines.html#VUID-VkRayTracingPipelineCreateInfoKHR-flags-03471
-                    if (flags.hasFlags(FLAGS::NO_NULL_CLOSEST_HIT_SHADERS) && !shaderGroup.intersection.shader)
+                    if (cached.flags.hasFlags(FLAGS::NO_NULL_CLOSEST_HIT_SHADERS) && !shaderGroup.intersection.shader)
                         return {};
                 }
 
@@ -137,6 +135,10 @@ class IGPURayTracingPipeline :  public IGPUPipeline<asset::IRayTracingPipeline<c
                 return stages;
             }
 
+            inline core::bitflag<FLAGS>& getFlags() { return cached.flags; }
+
+            inline core::bitflag<FLAGS> getFlags() const { return cached.flags; }
+
         };
 
         struct SShaderGroupHandle
@@ -153,7 +155,7 @@ class IGPURayTracingPipeline :  public IGPUPipeline<asset::IRayTracingPipeline<c
             uint16_t intersection;
         };
 
-        inline core::bitflag<SCreationParams::FLAGS> getCreationFlags() const { return m_flags; }
+        inline core::bitflag<SCreationParams::FLAGS> getCreationFlags() const { return getCachedCreationParams().flags; }
 
         // Vulkan: const VkPipeline*
         virtual const void* getNativeHandle() const = 0;
@@ -170,13 +172,11 @@ class IGPURayTracingPipeline :  public IGPUPipeline<asset::IRayTracingPipeline<c
         virtual uint16_t getDefaultStackSize() const = 0;
 
     protected:
-        IGPURayTracingPipeline(const SCreationParams& params) : IGPUPipeline(core::smart_refctd_ptr<const ILogicalDevice>(params.layout->getOriginDevice()), params.layout, params.cached),
-            m_flags(params.flags)
+        IGPURayTracingPipeline(const SCreationParams& params) : IGPUPipeline(core::smart_refctd_ptr<const ILogicalDevice>(params.layout->getOriginDevice()), params.layout, params.cached)
         {}
 
         virtual ~IGPURayTracingPipeline() = default;
 
-        const core::bitflag<SCreationParams::FLAGS> m_flags;
 };
 
 }
diff --git a/include/nbl/video/ILogicalDevice.h b/include/nbl/video/ILogicalDevice.h
index 6298afeb27..180342e2d4 100644
--- a/include/nbl/video/ILogicalDevice.h
+++ b/include/nbl/video/ILogicalDevice.h
@@ -1258,7 +1258,7 @@ class NBL_API2 ILogicalDevice : public core::IReferenceCounted, public IDeviceMe
                     }
                 }
                 // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkComputePipelineCreateInfo.html#VUID-VkComputePipelineCreateInfo-flags-07985
-                else if (ci.basePipelineIndex < -1 || ci.basePipelineIndex >= i || ci.basePipelineIndex >= 0 && !params[ci.basePipelineIndex].flags.hasFlags(AllowDerivativesFlag))
+                else if (ci.basePipelineIndex < -1 || ci.basePipelineIndex >= i || ci.basePipelineIndex >= 0 && !params[ci.basePipelineIndex].getFlags().hasFlags(AllowDerivativesFlag))
                 {
                     NBL_LOG_ERROR("Invalid basePipeline was specified (params[%d])", i);
                     return {};
diff --git a/src/nbl/video/CVulkanLogicalDevice.cpp b/src/nbl/video/CVulkanLogicalDevice.cpp
index 86eaa4fd51..e1c5d89da8 100644
--- a/src/nbl/video/CVulkanLogicalDevice.cpp
+++ b/src/nbl/video/CVulkanLogicalDevice.cpp
@@ -1132,7 +1132,7 @@ template<typename VkPipelineCreateInfo_t, typename SCreationParams>
 void initPipelineCreateInfo(VkPipelineCreateInfo_t* vk_info, const SCreationParams& info)
 {
     // the new flags type (64bit) is only available with maintenance5
-    vk_info->flags = static_cast<VkPipelineCreateFlags>(info.flags.value);
+    vk_info->flags = static_cast<VkPipelineCreateFlags>(info.getFlags().value);
     vk_info->layout = static_cast<const CVulkanPipelineLayout*>(info.layout)->getInternalObject();
     if (info.isDerivative())
     {
diff --git a/src/nbl/video/ILogicalDevice.cpp b/src/nbl/video/ILogicalDevice.cpp
index a21e00e303..c1b4174541 100644
--- a/src/nbl/video/ILogicalDevice.cpp
+++ b/src/nbl/video/ILogicalDevice.cpp
@@ -1025,8 +1025,8 @@ bool ILogicalDevice::createRayTracingPipelines(IGPUPipelineCache* const pipeline
 
     for (const auto& param : params)
     {
-        const bool skipAABBs = bool(param.flags & IGPURayTracingPipeline::SCreationParams::FLAGS::SKIP_AABBS);
-        const bool skipBuiltin = bool(param.flags & IGPURayTracingPipeline::SCreationParams::FLAGS::SKIP_BUILT_IN_PRIMITIVES);
+        const bool skipAABBs = bool(param.getFlags() & IGPURayTracingPipeline::SCreationParams::FLAGS::SKIP_AABBS);
+        const bool skipBuiltin = bool(param.getFlags() & IGPURayTracingPipeline::SCreationParams::FLAGS::SKIP_BUILT_IN_PRIMITIVES);
 
         if (!features.rayTracingPipeline)
         {
diff --git a/src/nbl/video/utilities/CAssetConverter.cpp b/src/nbl/video/utilities/CAssetConverter.cpp
index 935c8bf4aa..b0eb0a23d5 100644
--- a/src/nbl/video/utilities/CAssetConverter.cpp
+++ b/src/nbl/video/utilities/CAssetConverter.cpp
@@ -3676,6 +3676,7 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
 								hitGroups[hitGroup_i].intersection = GPUShaderSpecInfo::create(visitor.hitGroups.intersections[hitGroup_i], &intersectionEntryMaps[hitGroup_i]);
 							}
 							params.shaderGroups.hits = hitGroups;
+							params.cached = asset->getCachedCreationParams();
 
 							using RayTracingFlags = IGPURayTracingPipeline::SCreationParams::FLAGS;
 							const auto isNullSpecInfo = [](const ICPUPipelineBase::SShaderSpecInfo& specInfo)
@@ -3686,19 +3687,18 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
 								visitor.misses.begin(), 
 								visitor.misses.end(), 
 								isNullSpecInfo);
-							if (noNullMiss) params.flags |= RayTracingFlags::NO_NULL_MISS_SHADERS;
+							if (noNullMiss) params.cached.flags |= RayTracingFlags::NO_NULL_MISS_SHADERS;
 							const auto noNullClosestHit = std::none_of(
 								visitor.hitGroups.closestHits.begin(), 
 								visitor.hitGroups.closestHits.end(),
 								isNullSpecInfo);
-							if (noNullClosestHit) params.flags |= RayTracingFlags::NO_NULL_CLOSEST_HIT_SHADERS;
+							if (noNullClosestHit) params.cached.flags |= RayTracingFlags::NO_NULL_CLOSEST_HIT_SHADERS;
 							const auto noNullAnyHit = std::none_of(
 								visitor.hitGroups.anyHits.begin(),
 								visitor.hitGroups.anyHits.end(),
 								isNullSpecInfo);
-							if (noNullAnyHit) params.flags |= RayTracingFlags::NO_NULL_ANY_HIT_SHADERS;
+							if (noNullAnyHit) params.cached.flags |= RayTracingFlags::NO_NULL_ANY_HIT_SHADERS;
 
-							params.cached = asset->getCachedCreationParams();
 							device->createRayTracingPipelines(inputs.pipelineCache, {&params, 1}, &ppln);
 							conversionRequests.assign(entry.first, entry.second.firstCopyIx, i, std::move(ppln));
 						}