From 4a37ba53b067bba3f04333423299fd4a34080086 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Wed, 25 Jun 2025 18:29:12 +0700 Subject: [PATCH 01/40] Implement createCylinder, createCone, createSphere, CreateArrow --- src/nbl/asset/utils/CGeometryCreator.cpp | 1010 +++++++++++++++------- 1 file changed, 684 insertions(+), 326 deletions(-) diff --git a/src/nbl/asset/utils/CGeometryCreator.cpp b/src/nbl/asset/utils/CGeometryCreator.cpp index c5c6ac6765..c25a222a53 100644 --- a/src/nbl/asset/utils/CGeometryCreator.cpp +++ b/src/nbl/asset/utils/CGeometryCreator.cpp @@ -15,6 +15,11 @@ namespace nbl::asset { +static uint8_t packSnorm(float val) +{ + return round(hlsl::clamp(val, -1.0f, 1.0f) * 127); +} + core::smart_refctd_ptr CGeometryCreator::createCube(const hlsl::float32_t3 size) const { using namespace hlsl; @@ -156,7 +161,7 @@ core::smart_refctd_ptr CGeometryCreator::createCube(const h { const hlsl::vector norm[6] = { - hlsl::vector(0, 0, 127), + hlsl::vector(0, 0, 1), hlsl::vector(127, 0, 0), hlsl::vector(0, 0,-127), hlsl::vector(-127, 0, 0), @@ -186,121 +191,12 @@ core::smart_refctd_ptr CGeometryCreator::createCube(const h return retval; } -#if 0 - -/* - a cylinder, a cone and a cross - point up on (0,1.f, 0.f ) -*/ -core::smart_refctd_ptr CGeometryCreator::createArrow( - const uint32_t tesselationCylinder, - const uint32_t tesselationCone, - const float height, - const float cylinderHeight, - const float width0, - const float width1, - const video::SColor vtxColor0, - const video::SColor vtxColor1 -) const +core::smart_refctd_ptr CGeometryCreator::createSphere(float radius, + uint32_t polyCountX, uint32_t polyCountY, CQuantNormalCache* const quantNormalCacheOverride) const { - assert(height > cylinderHeight); - - auto cylinder = createCylinderMesh(width0, cylinderHeight, tesselationCylinder, vtxColor0); - auto cone = createConeMesh(width1, height-cylinderHeight, tesselationCone, vtxColor1, vtxColor1); - - auto cylinderVertices = reinterpret_cast(cylinder.bindings[0].buffer->getPointer()); - auto coneVertices = reinterpret_cast(cone.bindings[0].buffer->getPointer()); - - auto cylinderIndecies = reinterpret_cast(cylinder.indexBuffer.buffer->getPointer()); - auto coneIndecies = reinterpret_cast(cone.indexBuffer.buffer->getPointer()); - - const auto cylinderVertexCount = cylinder.bindings[0].buffer->getSize() / sizeof(CylinderVertex); - const auto coneVertexCount = cone.bindings[0].buffer->getSize() / sizeof(ConeVertex); - const auto newArrowVertexCount = cylinderVertexCount + coneVertexCount; - - const auto cylinderIndexCount = cylinder.indexBuffer.buffer->getSize() / sizeof(uint16_t); - const auto coneIndexCount = cone.indexBuffer.buffer->getSize() / sizeof(uint16_t); - const auto newArrowIndexCount = cylinderIndexCount + coneIndexCount; - - for (auto i = 0ull; i < coneVertexCount; ++i) - { - core::vector3df_SIMD newPos = coneVertices[i].pos; - newPos.rotateYZByRAD(-1.5707963268); - - for (auto c = 0; c < 3; ++c) - coneVertices[i].pos[c] = newPos[c]; - } - - auto newArrowVertexBuffer = asset::ICPUBuffer::create({ newArrowVertexCount * sizeof(ArrowVertex) }); - newArrowVertexBuffer->setUsageFlags(newArrowVertexBuffer->getUsageFlags() | asset::IBuffer::EUF_VERTEX_BUFFER_BIT); - auto newArrowIndexBuffer = asset::ICPUBuffer::create({ newArrowIndexCount * sizeof(uint16_t) }); - newArrowIndexBuffer->setUsageFlags(newArrowIndexBuffer->getUsageFlags() | asset::IBuffer::EUF_INDEX_BUFFER_BIT); - - for (auto z = 0ull; z < newArrowVertexCount; ++z) - { - auto arrowVertex = reinterpret_cast(newArrowVertexBuffer->getPointer()) + z; - - if (z < cylinderVertexCount) - { - auto cylinderVertex = (cylinderVertices + z); - memcpy(arrowVertex, cylinderVertex, sizeof(ArrowVertex)); - } - else - { - auto coneVertex = (coneVertices + z - cylinderVertexCount); - memcpy(arrowVertex, coneVertex, offsetof(ConeVertex, normal)); // copy position and color - arrowVertex->uv[0] = 0; - arrowVertex->uv[1] = 0; - arrowVertex->normal = coneVertex->normal; - } - } - - { - auto ArrowIndices = reinterpret_cast(newArrowIndexBuffer->getPointer()); - auto newConeIndices = (ArrowIndices + cylinderIndexCount); - - memcpy(ArrowIndices, cylinderIndecies, sizeof(uint16_t) * cylinderIndexCount); - memcpy(newConeIndices, coneIndecies, sizeof(uint16_t) * coneIndexCount); - - for (auto i = 0ull; i < coneIndexCount; ++i) - *(newConeIndices + i) += cylinderVertexCount; - } - - return_type arrow; - - constexpr size_t vertexSize = sizeof(ArrowVertex); - arrow.inputParams = - { 0b1111u,0b1u, - { - {0u,EF_R32G32B32_SFLOAT,offsetof(ArrowVertex,pos)}, - {0u,EF_R8G8B8A8_UNORM,offsetof(ArrowVertex,color)}, - {0u,EF_R32G32_SFLOAT,offsetof(ArrowVertex,uv)}, - {0u,EF_A2B10G10R10_SNORM_PACK32,offsetof(ArrowVertex,normal)} - }, - {vertexSize,SVertexInputBindingParams::EVIR_PER_VERTEX} - }; - - arrow.bindings[0] = { 0, std::move(newArrowVertexBuffer) }; - arrow.indexBuffer = { 0, std::move(newArrowIndexBuffer) }; - arrow.indexCount = newArrowIndexCount; - arrow.indexType = EIT_16BIT; - - return arrow; -} + using namespace hlsl; -/* A sphere with proper normals and texture coords */ -core::smart_refctd_ptr CGeometryCreator::createSphere(float radius, uint32_t polyCountX, uint32_t polyCountY, IMeshManipulator* const meshManipulatorOverride) const -{ - // we are creating the sphere mesh here. - return_type retval; - constexpr size_t vertexSize = sizeof(CGeometryCreator::SphereVertex); - CQuantNormalCache* const quantNormalCache = (meshManipulatorOverride == nullptr) ? defaultMeshManipulator->getQuantNormalCache() : meshManipulatorOverride->getQuantNormalCache(); - retval.inputParams = { 0b1111u,0b1u,{ - {0u,EF_R32G32B32_SFLOAT,offsetof(SphereVertex,pos)}, - {0u,EF_R8G8B8A8_UNORM,offsetof(SphereVertex,color)}, - {0u,EF_R32G32_SFLOAT,offsetof(SphereVertex,uv)}, - {0u,EF_A2B10G10R10_SNORM_PACK32,offsetof(SphereVertex,normal)} - },{vertexSize,SVertexInputBindingParams::EVIR_PER_VERTEX} }; + CQuantNormalCache* const quantNormalCache = quantNormalCacheOverride == nullptr ? m_params.normalCache.get() : quantNormalCacheOverride; if (polyCountX < 2) polyCountX = 2; @@ -308,15 +204,20 @@ core::smart_refctd_ptr CGeometryCreator::createSphere(float polyCountY = 2; const uint32_t polyCountXPitch = polyCountX + 1; // get to same vertex on next level + const size_t vertexCount = (polyCountXPitch * polyCountY) + 2; - retval.indexCount = (polyCountX * polyCountY) * 6; - auto indices = asset::ICPUBuffer::create({ sizeof(uint32_t) * retval.indexCount }); + auto retval = core::make_smart_refctd_ptr(); + retval->setIndexing(IPolygonGeometryBase::TriangleList()); // Create indices + using index_t = uint32_t; { - uint32_t level = 0; + const auto indexCount = (polyCountX * polyCountY) * 6; + const auto bytesize = sizeof(index_t) * indexCount; + auto indices = ICPUBuffer::create({bytesize,IBuffer::EUF_INDEX_BUFFER_BIT}); + auto indexPtr = reinterpret_cast(indices->getPointer()); + uint32_t level = 0; size_t indexAddIx = 0; - uint32_t* indexPtr = (uint32_t*)indices->getPointer(); for (uint32_t p1 = 0; p1 < polyCountY - 1; ++p1) { //main quads, top to bottom @@ -372,23 +273,123 @@ core::smart_refctd_ptr CGeometryCreator::createSphere(float indexPtr[indexAddIx++] = polyCountSqM1 + polyCountX - 1; indexPtr[indexAddIx++] = polyCountSqM1; indexPtr[indexAddIx++] = polyCountSq1; + + shapes::AABB<4,index_t> aabb; + aabb.minVx[0] = 0; + aabb.maxVx[0] = vertexCount - 1; + retval->setIndexView({ + .composed = { + .encodedDataRange = {.u32=aabb}, + .stride = sizeof(index_t), + .format = EF_R16_UINT, + .rangeFormat = IGeometryBase::EAABBFormat::U16 + }, + .src = {.offset=0,.size=bytesize,.buffer=std::move(indices)} + }); } - indices->setUsageFlags(indices->getUsageFlags() | asset::IBuffer::EUF_INDEX_BUFFER_BIT); - retval.indexBuffer = {0ull, std::move(indices)}; - // handle vertices + constexpr auto NormalCacheFormat = EF_R8G8B8_SNORM; + constexpr auto NormalFormat = EF_R8G8B8A8_SNORM; + + // Create vertex attributes with NONE usage because we have no clue how they'll be used + hlsl::float32_t3* positions; + hlsl::vector* normals; + hlsl::vector* uvs; + hlsl::vector* colors; + { + { + constexpr auto AttrSize = sizeof(decltype(*positions)); + auto buff = ICPUBuffer::create({{AttrSize * vertexCount,IBuffer::EUF_NONE}}); + positions = reinterpret_cast(buff->getPointer()); + shapes::AABB<4, float32_t> aabb; + aabb.maxVx = float32_t4(radius, radius, radius, 0.0f); + aabb.minVx = float32_t4(-radius, -radius, -radius, 0.0f); + retval->setPositionView({ + .composed = { + .encodedDataRange = {.f32 = aabb}, + .stride = AttrSize, + .format = EF_R32G32B32_SFLOAT, + .rangeFormat = IGeometryBase::EAABBFormat::F32 + }, + .src = { + .offset=0, + .size = buff->getSize(), + .buffer = std::move(buff), + } + }); + } + { + constexpr auto AttrSize = sizeof(decltype(*normals)); + auto buff = ICPUBuffer::create({{AttrSize * vertexCount,IBuffer::EUF_NONE}}); + normals = reinterpret_cast(buff->getPointer()); + shapes::AABB<4, int8_t> aabb; + aabb.maxVx = hlsl::vector(127,127,127,0); + aabb.minVx = -aabb.maxVx; + retval->setNormalView({ + .composed = { + .encodedDataRange = {.s8=aabb}, + .stride = AttrSize, + .format = NormalFormat, + .rangeFormat = IGeometryBase::EAABBFormat::S8_NORM + }, + .src = { + .offset = 0, + .size = buff->getSize(), + .buffer = std::move(buff) + } + }); + } + { + constexpr auto AttrSize = sizeof(decltype(*uvs)); + auto buff = ICPUBuffer::create({{AttrSize * vertexCount,IBuffer::EUF_NONE}}); + uvs = reinterpret_cast(buff->getPointer()); + shapes::AABB<4, uint8_t> aabb; + aabb.minVx = hlsl::vector(0,0,0,0); + aabb.maxVx = hlsl::vector(255,255,0,0); + retval->getAuxAttributeViews()->push_back({ + .composed = { + .encodedDataRange = {.u8=aabb}, + .stride = AttrSize, + .format = EF_R8G8_UNORM, + .rangeFormat = IGeometryBase::EAABBFormat::U8_NORM + }, + .src = { + .offset = 0, + .size = buff->getSize(), + .buffer = std::move(buff), + } + }); + } + { + constexpr auto AttrSize = sizeof(decltype(*colors)); + auto buff = ICPUBuffer::create({{AttrSize * vertexCount,IBuffer::EUF_NONE}}); + colors = reinterpret_cast(buff->getPointer()); + shapes::AABB<4, uint8_t> aabb; + aabb.minVx = hlsl::vector(0,0,0,0); + aabb.maxVx = hlsl::vector(255,255,0,0); + retval->getAuxAttributeViews()->push_back({ + .composed = { + .encodedDataRange = {.u8=aabb}, + .stride = AttrSize, + .format = EF_R8G8B8A8_UNORM, + .rangeFormat = IGeometryBase::EAABBFormat::U8_NORM + }, + .src = { + .offset = 0, + .size = buff->getSize(), + .buffer = std::move(buff), + } + }); + } + } + + // fill vertices { - size_t vertexSize = 3 * 4 + 4 + 2 * 4 + 4; - size_t vertexCount = (polyCountXPitch * polyCountY) + 2; - auto vtxBuf = asset::ICPUBuffer::create({ vertexCount * vertexSize }); - auto* tmpMem = reinterpret_cast(vtxBuf->getPointer()); for (size_t i = 0; i < vertexCount; i++) { - tmpMem[i * vertexSize + 3 * 4 + 0] = 255; - tmpMem[i * vertexSize + 3 * 4 + 1] = 255; - tmpMem[i * vertexSize + 3 * 4 + 2] = 255; - tmpMem[i * vertexSize + 3 * 4 + 3] = 255; + colors[i] = { 255,255,255,255 }; } + // calculate the angle which separates all points in a circle const float AngleX = 2 * core::PI() / polyCountX; const float AngleY = core::PI() / polyCountY; @@ -398,258 +399,615 @@ core::smart_refctd_ptr CGeometryCreator::createSphere(float // we don't start at 0. double ay = 0;//AngleY / 2; - - using quant_normal_t = CQuantNormalCache::value_type_t; - uint8_t* tmpMemPtr = tmpMem; - for (uint32_t y = 0; y < polyCountY; ++y) - { - ay += AngleY; - const double sinay = sin(ay); - axz = 0; - - // calculate the necessary vertices without the doubled one - uint8_t* oldTmpMemPtr = tmpMemPtr; - for (uint32_t xz = 0; xz < polyCountX; ++xz) - { - // calculate points position - - float32_t3 pos(static_cast(cos(axz) * sinay), - static_cast(cos(ay)), - static_cast(sin(axz) * sinay)); - // for spheres the normal is the position - core::vectorSIMDf normal(&pos.X); - normal.makeSafe3D(); - quant_normal_t quantizedNormal = quantNormalCache->quantize(normal); - pos *= radius; - - // calculate texture coordinates via sphere mapping - // tu is the same on each level, so only calculate once - float tu = 0.5f; - //if (y==0) - //{ - if (normal.Y != -1.0f && normal.Y != 1.0f) - tu = static_cast(acos(core::clamp(normal.X / sinay, -1.0, 1.0)) * 0.5 * core::RECIPROCAL_PI()); - if (normal.Z < 0.0f) - tu = 1 - tu; - //} - //else - //tu = ((float*)(tmpMem+(i-polyCountXPitch)*vertexSize))[4]; - - ((float*)tmpMemPtr)[0] = pos.X; - ((float*)tmpMemPtr)[1] = pos.Y; - ((float*)tmpMemPtr)[2] = pos.Z; - ((float*)tmpMemPtr)[4] = tu; - ((float*)tmpMemPtr)[5] = static_cast(ay * core::RECIPROCAL_PI()); - ((quant_normal_t*)tmpMemPtr)[6] = quantizedNormal; - static_assert(sizeof(quant_normal_t)==4u); - - tmpMemPtr += vertexSize; - axz += AngleX; - } - // This is the doubled vertex on the initial position - - ((float*)tmpMemPtr)[0] = ((float*)oldTmpMemPtr)[0]; - ((float*)tmpMemPtr)[1] = ((float*)oldTmpMemPtr)[1]; - ((float*)tmpMemPtr)[2] = ((float*)oldTmpMemPtr)[2]; - ((float*)tmpMemPtr)[4] = 1.f; - ((float*)tmpMemPtr)[5] = ((float*)oldTmpMemPtr)[5]; - ((uint32_t*)tmpMemPtr)[6] = ((uint32_t*)oldTmpMemPtr)[6]; - tmpMemPtr += vertexSize; - } + auto vertex_i = 0; + for (uint32_t y = 0; y < polyCountY; ++y) + { + ay += AngleY; + const double sinay = sin(ay); + axz = 0; + + // calculate the necessary vertices without the doubled one + const auto old_vertex_i = vertex_i; + for (uint32_t xz = 0; xz < polyCountX; ++xz) + { + // calculate points position + + float32_t3 pos(static_cast(cos(axz) * sinay), + static_cast(cos(ay)), + static_cast(sin(axz) * sinay)); + // for spheres the normal is the position + core::vectorSIMDf normal(&pos.x); + normal.makeSafe3D(); + const auto quantizedNormal = quantNormalCache->quantize(normal); + pos *= radius; + + // calculate texture coordinates via sphere mapping + // tu is the same on each level, so only calculate once + float tu = 0.5f; + //if (y==0) + //{ + if (normal.Y != -1.0f && normal.Y != 1.0f) + tu = static_cast(acos(core::clamp(normal.X / sinay, -1.0, 1.0)) * 0.5 * core::RECIPROCAL_PI()); + if (normal.Z < 0.0f) + tu = 1 - tu; + //} + //else + //tu = ((float*)(tmpMem+(i-polyCountXPitch)*vertexSize))[4]; + + positions[vertex_i] = pos; + uvs[vertex_i] = { packSnorm(tu), packSnorm(static_cast(ay * core::RECIPROCAL_PI())) }; + memcpy(normals + vertex_i, &quantizedNormal, sizeof(quantizedNormal)); + + vertex_i++; + axz += AngleX; + } + // This is the doubled vertex on the initial position + + positions[vertex_i] = positions[old_vertex_i]; + uvs[vertex_i] = { 127, uvs[old_vertex_i].y }; + normals[vertex_i] = normals[old_vertex_i]; + + vertex_i++; + } // the vertex at the top of the sphere - ((float*)tmpMemPtr)[0] = 0.f; - ((float*)tmpMemPtr)[1] = radius; - ((float*)tmpMemPtr)[2] = 0.f; - ((float*)tmpMemPtr)[4] = 0.5f; - ((float*)tmpMemPtr)[5] = 0.f; - ((quant_normal_t*)tmpMemPtr)[6] = quantNormalCache->quantize(core::vectorSIMDf(0.f, 1.f, 0.f)); + positions[vertex_i] = { 0.f, radius, 0.f }; + uvs[vertex_i] = { 0, 63}; + const auto quantizedTopNormal = quantNormalCache->quantize(core::vectorSIMDf(0.f, 1.f, 0.f)); + memcpy(normals + vertex_i, &quantizedTopNormal, sizeof(quantizedTopNormal)); // the vertex at the bottom of the sphere - tmpMemPtr += vertexSize; - ((float*)tmpMemPtr)[0] = 0.f; - ((float*)tmpMemPtr)[1] = -radius; - ((float*)tmpMemPtr)[2] = 0.f; - ((float*)tmpMemPtr)[4] = 0.5f; - ((float*)tmpMemPtr)[5] = 1.f; - ((quant_normal_t*)tmpMemPtr)[6] = quantNormalCache->quantize(core::vectorSIMDf(0.f, -1.f, 0.f)); - - // recalculate bounding box - core::aabbox3df BoundingBox; - BoundingBox.reset(float32_t3(radius)); - BoundingBox.addInternalPoint(-radius, -radius, -radius); - - // set vertex buffer - vtxBuf->setUsageFlags(vtxBuf->getUsageFlags() | asset::IBuffer::EUF_VERTEX_BUFFER_BIT); - retval.bindings[0] = { 0ull,std::move(vtxBuf) }; - retval.indexType = asset::EIT_32BIT; - retval.bbox = BoundingBox; + vertex_i++; + positions[vertex_i] = { 0.f, -radius, 0.f }; + uvs[vertex_i] = { 63, 127}; + const auto quantizedBottomNormal = quantNormalCache->quantize(core::vectorSIMDf(0.f, -1.f, 0.f)); + memcpy(normals + vertex_i, &quantizedBottomNormal, sizeof(quantizedBottomNormal)); } + CPolygonGeometryManipulator::recomputeContentHashes(retval.get()); return retval; } -/* A cylinder with proper normals and texture coords */ core::smart_refctd_ptr CGeometryCreator::createCylinder( float radius, float length, - uint32_t tesselation, const video::SColor& color, IMeshManipulator* const meshManipulatorOverride -) const + uint32_t tesselation, const video::SColor& color, CQuantNormalCache* const quantNormalCacheOverride) const { - return_type retval; - constexpr size_t vertexSize = sizeof(CGeometryCreator::CylinderVertex); - CQuantNormalCache* const quantNormalCache = (meshManipulatorOverride == nullptr) ? defaultMeshManipulator->getQuantNormalCache() : meshManipulatorOverride->getQuantNormalCache(); - retval.inputParams = { 0b1111u,0b1u,{ - {0u,EF_R32G32B32_SFLOAT,offsetof(CylinderVertex,pos)}, - {0u,EF_R8G8B8A8_UNORM,offsetof(CylinderVertex,color)}, - {0u,EF_R32G32_SFLOAT,offsetof(CylinderVertex,uv)}, - {0u,EF_A2B10G10R10_SNORM_PACK32,offsetof(CylinderVertex,normal)} - },{vertexSize,SVertexInputBindingParams::EVIR_PER_VERTEX} }; - - const size_t vtxCnt = 2u*tesselation; - auto vtxBuf = asset::ICPUBuffer::create({ vtxCnt*sizeof(CylinderVertex) }); - - CylinderVertex* vertices = reinterpret_cast(vtxBuf->getPointer()); - for (auto i=0ull; i(tesselation); - const float step = 2.f*core::PI()*tesselationRec; - for (uint32_t i = 0u; iquantize(core::normalize(p)); - - memcpy(vertices[i].pos, p.pointer, 12u); - vertices[i].normal = n; - memcpy(vertices[i].color, glcolor, 4u); - vertices[i].uv[0] = float(i) * tesselationRec; - - vertices[i+halfIx] = vertices[i]; - vertices[i+halfIx].pos[2] = length; - vertices[i+halfIx].uv[1] = 1.f; - } + using namespace hlsl; + + CQuantNormalCache* const quantNormalCache = quantNormalCacheOverride == nullptr ? m_params.normalCache.get() : quantNormalCacheOverride; + + const uint16_t halfIx = static_cast(tesselation); + const uint16_t vertexCount = 2 * static_cast(tesselation); - constexpr uint32_t rows = 2u; - retval.indexCount = rows * 3u * tesselation; - auto idxBuf = asset::ICPUBuffer::create({ retval.indexCount *sizeof(uint16_t) }); - uint16_t* indices = (uint16_t*)idxBuf->getPointer(); + auto retval = core::make_smart_refctd_ptr(); + retval->setIndexing(IPolygonGeometryBase::TriangleList()); - for (uint32_t i = 0u, j = 0u; i < halfIx; ++i) + // Create indices + using index_t = uint16_t; + { + constexpr uint32_t RowCount = 2u; + const auto IndexCount = RowCount * 3 * tesselation; + const auto bytesize = sizeof(index_t) * IndexCount; + auto indices = ICPUBuffer::create({bytesize,IBuffer::EUF_INDEX_BUFFER_BIT}); + auto u = reinterpret_cast(indices->getPointer()); + for (uint16_t i = 0u, j = 0u; i < halfIx; ++i) { - indices[j++] = i; - indices[j++] = (i+1u)!=halfIx ? (i+1u):0u; - indices[j++] = i+halfIx; - indices[j++] = i+halfIx; - indices[j++] = (i+1u)!=halfIx ? (i+1u):0u; - indices[j++] = (i+1u)!=halfIx ? (i+1u+halfIx):halfIx; + u[j++] = i; + u[j++] = (i + 1u) != halfIx ? (i + 1u):0u; + u[j++] = i + halfIx; + u[j++] = i + halfIx; + u[j++] = (i + 1u)!= halfIx ? (i + 1u):0u; + u[j++] = (i + 1u)!= halfIx ? (i + 1u + halfIx) : halfIx; } - // set vertex buffer - idxBuf->setUsageFlags(idxBuf->getUsageFlags() | asset::IBuffer::EUF_INDEX_BUFFER_BIT); - retval.indexBuffer = { 0ull, std::move(idxBuf) }; - vtxBuf->setUsageFlags(vtxBuf->getUsageFlags() | asset::IBuffer::EUF_VERTEX_BUFFER_BIT); - retval.bindings[0] = { 0ull, std::move(vtxBuf) }; - retval.indexType = asset::EIT_16BIT; - //retval.bbox = ?; + shapes::AABB<4,index_t> aabb; + aabb.minVx[0] = 0; + aabb.maxVx[0] = vertexCount - 1; + retval->setIndexView({ + .composed = { + .encodedDataRange = {.u16=aabb}, + .stride = sizeof(index_t), + .format = EF_R16_UINT, + .rangeFormat = IGeometryBase::EAABBFormat::U16 + }, + .src = {.offset=0,.size=bytesize,.buffer=std::move(indices)} + }); + } + + constexpr auto NormalCacheFormat = EF_R8G8B8_SNORM; + constexpr auto NormalFormat = EF_R8G8B8A8_SNORM; + + // Create vertex attributes with NONE usage because we have no clue how they'll be used + hlsl::float32_t3* positions; + hlsl::vector* normals; + hlsl::vector* uvs; + hlsl::vector* colors; + { + { + constexpr auto AttrSize = sizeof(decltype(*positions)); + auto buff = ICPUBuffer::create({{AttrSize * vertexCount,IBuffer::EUF_NONE}}); + positions = reinterpret_cast(buff->getPointer()); + shapes::AABB<4, float32_t> aabb; + aabb.maxVx = float32_t4(radius, radius, length, 0.0f); + aabb.minVx = float32_t4(-radius, -radius, 0.0f, 0.0f); + retval->setPositionView({ + .composed = { + .encodedDataRange = {.f32 = aabb}, + .stride = AttrSize, + .format = EF_R32G32B32_SFLOAT, + .rangeFormat = IGeometryBase::EAABBFormat::F32 + }, + .src = { + .offset=0, + .size = buff->getSize(), + .buffer = std::move(buff), + } + }); + } + { + constexpr auto AttrSize = sizeof(decltype(*normals)); + auto buff = ICPUBuffer::create({{AttrSize * vertexCount,IBuffer::EUF_NONE}}); + normals = reinterpret_cast(buff->getPointer()); + shapes::AABB<4, int8_t> aabb; + aabb.maxVx = hlsl::vector(127,127,127,0); + aabb.minVx = -aabb.maxVx; + retval->setNormalView({ + .composed = { + .encodedDataRange = {.s8=aabb}, + .stride = AttrSize, + .format = NormalFormat, + .rangeFormat = IGeometryBase::EAABBFormat::S8_NORM + }, + .src = { + .offset = 0, + .size = buff->getSize(), + .buffer = std::move(buff) + } + }); + } + { + constexpr auto AttrSize = sizeof(decltype(*uvs)); + auto buff = ICPUBuffer::create({{AttrSize * vertexCount,IBuffer::EUF_NONE}}); + uvs = reinterpret_cast(buff->getPointer()); + shapes::AABB<4, uint8_t> aabb; + aabb.minVx = hlsl::vector(0,0,0,0); + aabb.maxVx = hlsl::vector(255,255,0,0); + retval->getAuxAttributeViews()->push_back({ + .composed = { + .encodedDataRange = {.u8=aabb}, + .stride = AttrSize, + .format = EF_R8G8_UNORM, + .rangeFormat = IGeometryBase::EAABBFormat::U8_NORM + }, + .src = { + .offset = 0, + .size = buff->getSize(), + .buffer = std::move(buff), + } + }); + } + { + constexpr auto AttrSize = sizeof(decltype(*colors)); + auto buff = ICPUBuffer::create({{AttrSize * vertexCount,IBuffer::EUF_NONE}}); + colors = reinterpret_cast(buff->getPointer()); + shapes::AABB<4, uint8_t> aabb; + aabb.minVx = hlsl::vector(0,0,0,0); + aabb.maxVx = hlsl::vector(255,255,0,0); + retval->getAuxAttributeViews()->push_back({ + .composed = { + .encodedDataRange = {.u8=aabb}, + .stride = AttrSize, + .format = EF_R8G8B8A8_UNORM, + .rangeFormat = IGeometryBase::EAABBFormat::U8_NORM + }, + .src = { + .offset = 0, + .size = buff->getSize(), + .buffer = std::move(buff), + } + }); + } + } + + uint8_t glcolor[4]; + color.toOpenGLColor(glcolor); + + const float tesselationRec = core::reciprocal_approxim(static_cast(tesselation)); + const float step = 2.f * core::PI() * tesselationRec; + for (uint32_t i = 0u; i < tesselation; ++i) + { + const auto f_i = static_cast(i); + core::vectorSIMDf p(std::cos(f_i * step), std::sin(f_i * step), 0.f); + p *= radius; + const auto n = quantNormalCache->quantize(core::normalize(p)); + + positions[i] = { p.x, p.y, p.z }; + memcpy(normals + i, &n, sizeof(n)); + uvs[i] = { f_i * tesselationRec, 0.0 }; + colors[i] = { glcolor[0], glcolor[1], glcolor[2], glcolor[3] }; + + positions[i + halfIx] = { p.x, p.y, length }; + normals[i + halfIx] = normals[i]; + uvs[i + halfIx] = { 1.0f, 0.0f }; + colors[i + halfIx] = { glcolor[0], glcolor[1], glcolor[2], glcolor[3] }; + } + CPolygonGeometryManipulator::recomputeContentHashes(retval.get()); return retval; } -/* A cone with proper normals and texture coords */ core::smart_refctd_ptr CGeometryCreator::createCone( - float radius, float length, uint32_t tesselation, - const video::SColor& colorTop, - const video::SColor& colorBottom, - float oblique, - IMeshManipulator* const meshManipulatorOverride -) const + float radius, float length, uint32_t tesselation, + const video::SColor& colorTop, + const video::SColor& colorBottom, + float oblique, CQuantNormalCache* const quantNormalCacheOverride) const { - const size_t vtxCnt = tesselation * 2; - auto vtxBuf = asset::ICPUBuffer::create({ vtxCnt * sizeof(ConeVertex) }); - ConeVertex* vertices = reinterpret_cast(vtxBuf->getPointer()); - ConeVertex* baseVertices = vertices; - ConeVertex* apexVertices = vertices + tesselation; + using namespace hlsl; - std::fill(vertices,vertices+vtxCnt, ConeVertex(core::vectorSIMDf(0.f),{},colorBottom)); - CQuantNormalCache* const quantNormalCache = (meshManipulatorOverride == nullptr) ? defaultMeshManipulator->getQuantNormalCache() : meshManipulatorOverride->getQuantNormalCache(); + CQuantNormalCache* const quantNormalCache = quantNormalCacheOverride == nullptr ? m_params.normalCache.get() : quantNormalCacheOverride; - const float step = (2.f*core::PI()) / tesselation; + const uint16_t vertexCount = 2 * static_cast(tesselation); + + auto retval = core::make_smart_refctd_ptr(); + retval->setIndexing(IPolygonGeometryBase::TriangleList()); + + // Create indices + using index_t = uint16_t; + { + constexpr uint32_t RowCount = 2u; + const auto IndexCount = 3 * tesselation; + const auto bytesize = sizeof(index_t) * IndexCount; + auto indices = ICPUBuffer::create({bytesize,IBuffer::EUF_INDEX_BUFFER_BIT}); + auto u = reinterpret_cast(indices->getPointer()); + const uint32_t firstIndexOfBaseVertices = 0; + const uint32_t firstIndexOfApexVertices = tesselation; + for (uint32_t i = 0; i < tesselation; i++) + { + u[i * 3] = firstIndexOfApexVertices + i; + u[(i * 3) + 1] = firstIndexOfBaseVertices + i; + u[(i * 3) + 2] = i == (tesselation - 1) ? firstIndexOfBaseVertices : firstIndexOfBaseVertices + i + 1; + } + + shapes::AABB<4,index_t> aabb; + aabb.minVx[0] = 0; + aabb.maxVx[0] = vertexCount - 1; + retval->setIndexView({ + .composed = { + .encodedDataRange = {.u16=aabb}, + .stride = sizeof(index_t), + .format = EF_R16_UINT, + .rangeFormat = IGeometryBase::EAABBFormat::U16 + }, + .src = {.offset=0,.size=bytesize,.buffer=std::move(indices)} + }); + } + + constexpr auto NormalCacheFormat = EF_R8G8B8_SNORM; + constexpr auto NormalFormat = EF_R8G8B8A8_SNORM; + + // Create vertex attributes with NONE usage because we have no clue how they'll be used + hlsl::float32_t3* positions; + hlsl::vector* normals; + hlsl::vector* colors; + { + { + constexpr auto AttrSize = sizeof(decltype(*positions)); + auto buff = ICPUBuffer::create({{AttrSize * vertexCount,IBuffer::EUF_NONE}}); + positions = reinterpret_cast(buff->getPointer()); + shapes::AABB<4, float32_t> aabb; + aabb.maxVx = float32_t4(radius, radius, length, 0.0f); + aabb.minVx = float32_t4(-radius, -radius, 0.0f, 0.0f); + retval->setPositionView({ + .composed = { + .encodedDataRange = {.f32 = aabb}, + .stride = AttrSize, + .format = EF_R32G32B32_SFLOAT, + .rangeFormat = IGeometryBase::EAABBFormat::F32 + }, + .src = { + .offset=0, + .size = buff->getSize(), + .buffer = std::move(buff), + } + }); + } + { + constexpr auto AttrSize = sizeof(decltype(*normals)); + auto buff = ICPUBuffer::create({{AttrSize * vertexCount,IBuffer::EUF_NONE}}); + normals = reinterpret_cast(buff->getPointer()); + shapes::AABB<4, int8_t> aabb; + aabb.maxVx = hlsl::vector(127,127,127,0); + aabb.minVx = -aabb.maxVx; + retval->setNormalView({ + .composed = { + .encodedDataRange = {.s8=aabb}, + .stride = AttrSize, + .format = NormalFormat, + .rangeFormat = IGeometryBase::EAABBFormat::S8_NORM + }, + .src = { + .offset = 0, + .size = buff->getSize(), + .buffer = std::move(buff) + } + }); + } + { + constexpr auto AttrSize = sizeof(decltype(*colors)); + auto buff = ICPUBuffer::create({{AttrSize * vertexCount,IBuffer::EUF_NONE}}); + colors = reinterpret_cast(buff->getPointer()); + shapes::AABB<4, uint8_t> aabb; + aabb.minVx = hlsl::vector(0,0,0,0); + aabb.maxVx = hlsl::vector(255,255,0,0); + retval->getAuxAttributeViews()->push_back({ + .composed = { + .encodedDataRange = {.u8=aabb}, + .stride = AttrSize, + .format = EF_R8G8B8A8_UNORM, + .rangeFormat = IGeometryBase::EAABBFormat::U8_NORM + }, + .src = { + .offset = 0, + .size = buff->getSize(), + .buffer = std::move(buff), + } + }); + } + } + + uint8_t glcolor[4]; + colorBottom.toOpenGLColor(glcolor); + vector vertexBottomColor = { glcolor[0], glcolor[1], glcolor[2], glcolor[3] }; + std::fill_n(colors, vertexCount, vertexBottomColor); + + const float step = (2.f*core::PI()) / tesselation; const core::vectorSIMDf apexVertexCoords(oblique, length, 0.0f); - //vertex positions + const auto apexVertexBase_i = tesselation; + for (uint32_t i = 0u; i < tesselation; i++) { core::vectorSIMDf v(std::cos(i * step), 0.0f, std::sin(i * step), 0.0f); v *= radius; - memcpy(baseVertices[i].pos, v.pointer, sizeof(float) * 3); - memcpy(apexVertices[i].pos, apexVertexCoords.pointer, sizeof(float) * 3); - } + positions[i] = { v.x, v.y, v.z }; + positions[apexVertexBase_i + i] = { apexVertexCoords.x, apexVertexCoords.y, apexVertexCoords.z }; - //vertex normals - for (uint32_t i = 0; i < tesselation; i++) - { - const core::vectorSIMDf v0ToApex = apexVertexCoords - core::vectorSIMDf(vertices[i].pos[0], vertices[i].pos[1], vertices[i].pos[2]); + const auto simdPosition = core::vectorSIMDf(positions[i].x, positions[i].y, positions[i].z); + const core::vectorSIMDf v0ToApex = apexVertexCoords - simdPosition; uint32_t nextVertexIndex = i == (tesselation - 1) ? 0 : i + 1; - core::vectorSIMDf u1 = core::vectorSIMDf(baseVertices[nextVertexIndex].pos[0], baseVertices[nextVertexIndex].pos[1], baseVertices[nextVertexIndex].pos[2]); - u1 -= core::vectorSIMDf(baseVertices[i].pos[0], baseVertices[i].pos[1], baseVertices[i].pos[2]); + core::vectorSIMDf u1 = core::vectorSIMDf(positions[nextVertexIndex].x, positions[nextVertexIndex].y, positions[nextVertexIndex].z); + u1 -= simdPosition; float angleWeight = std::acos(core::dot(core::normalize(apexVertexCoords), core::normalize(u1)).x); u1 = core::normalize(core::cross(v0ToApex, u1)) * angleWeight; uint32_t prevVertexIndex = i == 0 ? (tesselation - 1) : i - 1; - core::vectorSIMDf u2 = core::vectorSIMDf(baseVertices[prevVertexIndex].pos[0], baseVertices[prevVertexIndex].pos[1], baseVertices[prevVertexIndex].pos[2]); - u2 -= core::vectorSIMDf(baseVertices[i].pos[0], baseVertices[i].pos[1], baseVertices[i].pos[2]); + core::vectorSIMDf u2 = core::vectorSIMDf(positions[prevVertexIndex].x, positions[prevVertexIndex].y, positions[prevVertexIndex].z); + u2 -= simdPosition; angleWeight = std::acos(core::dot(core::normalize(apexVertexCoords), core::normalize(u2)).x); u2 = core::normalize(core::cross(u2, v0ToApex)) * angleWeight; - baseVertices[i].normal = quantNormalCache->quantize(core::normalize(u1 + u2)); - apexVertices[i].normal = quantNormalCache->quantize(core::normalize(u1)); + + const auto baseNormal = quantNormalCache->quantize(core::normalize(u1 + u2)); + memcpy(normals + i, &baseNormal, sizeof(baseNormal)); + + const auto apexNormal = quantNormalCache->quantize(core::normalize(u1)); + memcpy(normals + apexVertexBase_i + i, &apexNormal, sizeof(apexNormal)); } - auto idxBuf = asset::ICPUBuffer::create({ 3u * tesselation * sizeof(uint16_t) }); - uint16_t* indices = (uint16_t*)idxBuf->getPointer(); + CPolygonGeometryManipulator::recomputeContentHashes(retval.get()); + return retval; +} - const uint32_t firstIndexOfBaseVertices = 0; - const uint32_t firstIndexOfApexVertices = tesselation; - for (uint32_t i = 0; i < tesselation; i++) +core::smart_refctd_ptr CGeometryCreator::createArrow( + const uint32_t tesselationCylinder, + const uint32_t tesselationCone, + const float height, + const float cylinderHeight, + const float width0, + const float width1, + const video::SColor vtxColor0, + const video::SColor vtxColor1 +) const +{ + assert(height > cylinderHeight); + + using position_t = hlsl::float32_t3; + using normal_t = hlsl::vector; + using uv_t = hlsl::vector; + using color_t = hlsl::vector; + + auto cylinder = createCylinder(width0, cylinderHeight, tesselationCylinder, vtxColor0); + auto cone = createCone(width1, height-cylinderHeight, tesselationCone, vtxColor1, vtxColor1); + + auto cylinderPositions = reinterpret_cast(cylinder->getPositionView().src.buffer->getPointer()); + auto conePositions = reinterpret_cast(cone->getPositionView().src.buffer->getPointer()); + + const auto cylinderNormals = reinterpret_cast(cylinder->getNormalView().src.buffer->getPointer()); + const auto coneNormals = reinterpret_cast(cone->getNormalView().src.buffer->getPointer()); + + const auto cylinderUvs = reinterpret_cast(cylinder->getAuxAttributeViews()->front().src.buffer->getPointer()); + const auto coneUvs = reinterpret_cast(cone->getAuxAttributeViews()->front().src.buffer->getPointer()); + + const auto cylinderIndices = cylinder->getIndexView().src.buffer->getPointer(); + const auto coneIndices = cone->getIndexView().src.buffer->getPointer(); + + const auto cylinderVertexCount = cylinder->getPositionView().getElementCount(); + const auto coneVertexCount = cone->getPositionView().getElementCount(); + const auto newArrowVertexCount = cylinderVertexCount + coneVertexCount; + + const auto cylinderIndexCount = cylinder->getVertexReferenceCount(); + const auto coneIndexCount = cone->getVertexReferenceCount(); + const auto newArrowIndexCount = cylinderIndexCount + coneIndexCount; + + using namespace hlsl; + + auto retval = core::make_smart_refctd_ptr(); + retval->setIndexing(IPolygonGeometryBase::TriangleList()); + + // Create indices + using index_t = uint16_t; { - indices[i * 3] = firstIndexOfApexVertices + i; - indices[(i * 3) + 1] = firstIndexOfBaseVertices + i; - indices[(i * 3) + 2] = i == (tesselation - 1) ? firstIndexOfBaseVertices : firstIndexOfBaseVertices + i + 1; + const auto bytesize = sizeof(index_t) * newArrowIndexCount; + auto indices = ICPUBuffer::create({bytesize,IBuffer::EUF_INDEX_BUFFER_BIT}); + auto arrowIndices = reinterpret_cast(indices->getPointer()); + auto newConeIndices = (arrowIndices + cylinderIndexCount); + + memcpy(arrowIndices, cylinderIndices, sizeof(uint16_t) * cylinderIndexCount); + memcpy(newConeIndices, coneIndices, sizeof(uint16_t) * coneIndexCount); + + for (auto i = 0ull; i < coneIndexCount; ++i) + *(newConeIndices + i) += cylinderVertexCount; + + shapes::AABB<4,index_t> aabb; + aabb.minVx[0] = 0; + aabb.maxVx[0] = newArrowVertexCount - 1; + retval->setIndexView({ + .composed = { + .encodedDataRange = {.u16=aabb}, + .stride = sizeof(index_t), + .format = EF_R16_UINT, + .rangeFormat = IGeometryBase::EAABBFormat::U16 + }, + .src = {.offset=0,.size=bytesize,.buffer=std::move(indices)} + }); } - return_type cone; + constexpr auto NormalFormat = EF_R8G8B8A8_SNORM; - constexpr size_t vertexSize = sizeof(ConeVertex); - cone.inputParams = - { 0b111u,0b1u, + // Create vertex attributes with NONE usage because we have no clue how they'll be used + hlsl::float32_t3* positions; + hlsl::vector* normals; + hlsl::vector* colors; + hlsl::vector* uvs; + { + { + constexpr auto AttrSize = sizeof(decltype(*positions)); + auto buff = ICPUBuffer::create({{AttrSize * newArrowVertexCount,IBuffer::EUF_NONE}}); + positions = reinterpret_cast(buff->getPointer()); + shapes::AABB<4, float32_t> aabb; + //TODO(kevyuu): Calculate arrow aabb + aabb.maxVx = hlsl::vector(127,127,127,0); + aabb.minVx = -aabb.maxVx; + retval->setPositionView({ + .composed = { + .encodedDataRange = {.f32 = aabb}, + .stride = AttrSize, + .format = EF_R32G32B32_SFLOAT, + .rangeFormat = IGeometryBase::EAABBFormat::F32 + }, + .src = { + .offset=0, + .size = buff->getSize(), + .buffer = std::move(buff), + } + }); + } + { + constexpr auto AttrSize = sizeof(decltype(*normals)); + auto buff = ICPUBuffer::create({{AttrSize * newArrowVertexCount,IBuffer::EUF_NONE}}); + normals = reinterpret_cast(buff->getPointer()); + shapes::AABB<4, int8_t> aabb; + aabb.maxVx = hlsl::vector(127,127,127,0); + aabb.minVx = -aabb.maxVx; + retval->setNormalView({ + .composed = { + .encodedDataRange = {.s8=aabb}, + .stride = AttrSize, + .format = NormalFormat, + .rangeFormat = IGeometryBase::EAABBFormat::S8_NORM + }, + .src = { + .offset = 0, + .size = buff->getSize(), + .buffer = std::move(buff) + } + }); + } { - {0u,EF_R32G32B32_SFLOAT,offsetof(ConeVertex,pos)}, - {0u,EF_R8G8B8A8_UNORM,offsetof(ConeVertex,color)}, - {0u,EF_A2B10G10R10_SNORM_PACK32,offsetof(ConeVertex,normal)} - }, - {vertexSize,SVertexInputBindingParams::EVIR_PER_VERTEX} - }; + constexpr auto AttrSize = sizeof(decltype(*uvs)); + auto buff = ICPUBuffer::create({{AttrSize * newArrowVertexCount,IBuffer::EUF_NONE}}); + uvs = reinterpret_cast(buff->getPointer()); + shapes::AABB<4, uint8_t> aabb; + aabb.minVx = hlsl::vector(0,0,0,0); + aabb.maxVx = hlsl::vector(255,255,0,0); + retval->getAuxAttributeViews()->push_back({ + .composed = { + .encodedDataRange = {.u8=aabb}, + .stride = AttrSize, + .format = EF_R8G8_UNORM, + .rangeFormat = IGeometryBase::EAABBFormat::U8_NORM + }, + .src = { + .offset = 0, + .size = buff->getSize(), + .buffer = std::move(buff), + } + }); + } + { + constexpr auto AttrSize = sizeof(decltype(*colors)); + auto buff = ICPUBuffer::create({{AttrSize * newArrowVertexCount,IBuffer::EUF_NONE}}); + colors = reinterpret_cast(buff->getPointer()); + shapes::AABB<4, uint8_t> aabb; + aabb.minVx = hlsl::vector(0,0,0,0); + aabb.maxVx = hlsl::vector(255,255,0,0); + retval->getAuxAttributeViews()->push_back({ + .composed = { + .encodedDataRange = {.u8=aabb}, + .stride = AttrSize, + .format = EF_R8G8B8A8_UNORM, + .rangeFormat = IGeometryBase::EAABBFormat::U8_NORM + }, + .src = { + .offset = 0, + .size = buff->getSize(), + .buffer = std::move(buff), + } + }); + } + } + + for (auto i = 0ull; i < coneVertexCount; ++i) + { + auto& conePosition = conePositions[i]; + core::vector3df_SIMD newPos(conePosition.x, conePosition.y, conePosition.z); + newPos.rotateYZByRAD(-1.5707963268); - vtxBuf->addUsageFlags(asset::IBuffer::EUF_VERTEX_BUFFER_BIT); - cone.bindings[0] = { 0, std::move(vtxBuf) }; - idxBuf->addUsageFlags(asset::IBuffer::EUF_INDEX_BUFFER_BIT); - cone.indexBuffer = { 0, std::move(idxBuf) }; - cone.indexCount = cone.indexBuffer.buffer->getSize() / sizeof(uint16_t); - cone.indexType = EIT_16BIT; + conePosition = {newPos.x, newPos.y, newPos.z}; + } - return cone; + uint8_t cylinderGlColor[4]; + vtxColor0.toOpenGLColor(cylinderGlColor); + + uint8_t coneGlColor[4]; + vtxColor1.toOpenGLColor(coneGlColor); + + for (auto z = 0ull; z < newArrowVertexCount; ++z) + { + if (z < cylinderVertexCount) + { + positions[z] = cylinderPositions[z]; + normals[z] = cylinderNormals[z]; + uvs[z] = cylinderUvs[z]; + colors[z] = { cylinderGlColor[0], cylinderGlColor[1], cylinderGlColor[2], cylinderGlColor[3] }; + } + else + { + const auto cone_i = z - cylinderVertexCount; + positions[z] = conePositions[cone_i]; + normals[z] = coneNormals[cone_i]; + uvs[z] = { 0, 0 }; + colors[z] = { coneGlColor[0], coneGlColor[1], coneGlColor[2], coneGlColor[3] }; + } + } + + CPolygonGeometryManipulator::recomputeContentHashes(retval.get()); + return retval; } -#endif core::smart_refctd_ptr CGeometryCreator::createRectangle(const hlsl::float32_t2 size) const { From 2d8b7c4918fd8c93ba86e0a4f23b32fef2cb8468 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Wed, 25 Jun 2025 18:29:46 +0700 Subject: [PATCH 02/40] Implement getIndexType convenience function for IPolygonGeometry --- include/nbl/asset/IPolygonGeometry.h | 40 +++++++++++++++------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/include/nbl/asset/IPolygonGeometry.h b/include/nbl/asset/IPolygonGeometry.h index 4d021c178c..97a6cda7d0 100644 --- a/include/nbl/asset/IPolygonGeometry.h +++ b/include/nbl/asset/IPolygonGeometry.h @@ -203,6 +203,26 @@ class IPolygonGeometry : public IIndexableGeometry, public IPolygonG // For User defined semantics inline const core::vector& getAuxAttributeViews() const {return m_auxAttributeViews;} + inline E_INDEX_TYPE getIndexType() const + { + auto indexType = EIT_UNKNOWN; + // disallowed index format + if (base_t::m_indexView) + { + switch (base_t::m_indexView.composed.format) + { + case EF_R16_UINT: + indexType = EIT_16BIT; + break; + case EF_R32_UINT: [[fallthrough]]; + indexType = EIT_32BIT; + break; + default: + break; + } + } + return indexType; + } // Does not set the `transform` or `geometryFlags` fields, because it doesn't care about it. // Also won't set second set of vertex data, opacity mipmaps, etc. @@ -212,30 +232,12 @@ class IPolygonGeometry : public IIndexableGeometry, public IPolygonG // must be a triangle list, but don't want to compare pointers if (m_indexing && m_indexing->knownTopology()==EPT_TRIANGLE_LIST)// && m_indexing->degree() == TriangleList()->degree() && m_indexing->rate() == TriangleList->rate()) { - auto indexType = EIT_UNKNOWN; - // disallowed index format - if (base_t::m_indexView) - { - switch (base_t::m_indexView.composed.format) - { - case EF_R16_UINT: - indexType = EIT_16BIT; - break; - case EF_R32_UINT: [[fallthrough]]; - indexType = EIT_32BIT; - break; - default: - break; - } - if (indexType==EIT_UNKNOWN) - return retval; - } retval.vertexData[0] = base_t::m_positionView.src; retval.indexData = base_t::m_indexView.src; retval.maxVertex = base_t::m_positionView.getElementCount() - 1; retval.vertexStride = base_t::m_positionView.composed.getStride(); retval.vertexFormat = base_t::m_positionView.composed.format; - retval.indexType = indexType; + retval.indexType = getIndexType(); } return retval; } From cdcaae9008dbda95bf5e7241fd3da13415f411db Mon Sep 17 00:00:00 2001 From: kevyuu Date: Sat, 28 Jun 2025 21:51:50 +0700 Subject: [PATCH 03/40] Implement createIcosphere --- src/nbl/asset/utils/CGeometryCreator.cpp | 140 +++++++++++++++++------ 1 file changed, 106 insertions(+), 34 deletions(-) diff --git a/src/nbl/asset/utils/CGeometryCreator.cpp b/src/nbl/asset/utils/CGeometryCreator.cpp index c25a222a53..775d2e2061 100644 --- a/src/nbl/asset/utils/CGeometryCreator.cpp +++ b/src/nbl/asset/utils/CGeometryCreator.cpp @@ -1211,7 +1211,6 @@ core::smart_refctd_ptr CGeometryCreator::createDisk(const f return retval; } -#if 0 /* Helpful Icosphere class implementation used to compute and create icopshere's vertices and indecies. @@ -1224,6 +1223,8 @@ core::smart_refctd_ptr CGeometryCreator::createDisk(const f class Icosphere { public: + using index_t = unsigned int; + Icosphere(float radius = 1.0f, int subdivision = 1, bool smooth = false) : radius(radius), subdivision(subdivision), smooth(smooth), interleavedStride(32) { if (smooth) @@ -1234,27 +1235,27 @@ class Icosphere ~Icosphere() {} - unsigned int getVertexCount() const { return (unsigned int)vertices.size() / 3; } + unsigned int getPositionCount() const { return (unsigned int)vertices.size() / 3; } unsigned int getNormalCount() const { return (unsigned int)normals.size() / 3; } unsigned int getTexCoordCount() const { return (unsigned int)texCoords.size() / 2; } unsigned int getIndexCount() const { return (unsigned int)indices.size(); } unsigned int getLineIndexCount() const { return (unsigned int)lineIndices.size(); } unsigned int getTriangleCount() const { return getIndexCount() / 3; } - unsigned int getVertexSize() const { return (unsigned int)vertices.size() * sizeof(float); } // # of bytes + unsigned int getPositionSize() const { return (unsigned int)vertices.size() * sizeof(float); } // # of bytes unsigned int getNormalSize() const { return (unsigned int)normals.size() * sizeof(float); } unsigned int getTexCoordSize() const { return (unsigned int)texCoords.size() * sizeof(float); } - unsigned int getIndexSize() const { return (unsigned int)indices.size() * sizeof(unsigned int); } + unsigned int getIndexSize() const { return (unsigned int)indices.size() * sizeof(index_t); } unsigned int getLineIndexSize() const { return (unsigned int)lineIndices.size() * sizeof(unsigned int); } - const float* getVertices() const { return vertices.data(); } + const float* getPositions() const { return vertices.data(); } const float* getNormals() const { return normals.data(); } const float* getTexCoords() const { return texCoords.data(); } const unsigned int* getIndices() const { return indices.data(); } const unsigned int* getLineIndices() const { return lineIndices.data(); } // for interleaved vertices: V/N/T - unsigned int getInterleavedVertexCount() const { return getVertexCount(); } // # of vertices + unsigned int getInterleavedVertexCount() const { return getPositionCount(); } // # of vertices unsigned int getInterleavedVertexSize() const { return (unsigned int)interleavedVertices.size() * sizeof(float); } // # of bytes int getInterleavedStride() const { return interleavedStride; } // should be 32 bytes const float* getInterleavedVertices() const { return interleavedVertices.data(); } @@ -2178,38 +2179,109 @@ class Icosphere core::smart_refctd_ptr CGeometryCreator::createIcoSphere(float radius, uint32_t subdivision, bool smooth) const { - Icosphere IcosphereData(radius, subdivision, smooth); - - return_type icosphereGeometry; - constexpr size_t vertexSize = sizeof(IcosphereVertex); + Icosphere icosphere(radius, subdivision, smooth); + + auto retval = core::make_smart_refctd_ptr(); + retval->setIndexing(IPolygonGeometryBase::TriangleList()); + + using namespace hlsl; + + // Create indices + { + auto indexBuffer = asset::ICPUBuffer::create({ icosphere.getIndexSize() }); + memcpy(indexBuffer->getPointer(), icosphere.getIndices(), indexBuffer->getSize()); + + shapes::AABB<4,Icosphere::index_t> aabb; + aabb.minVx[0] = 0; + aabb.maxVx[0] = icosphere.getPositionCount() - 1; + + static_assert(sizeof(Icosphere::index_t) == 2 || sizeof(Icosphere::index_t) == 4); + const auto isIndex16Bit = sizeof(Icosphere::index_t) == 2; + + retval->setIndexView({ + .composed = { + .encodedDataRange = {.u32=aabb}, + .stride = sizeof(Icosphere::index_t), + .format = isIndex16Bit ? EF_R16_UINT : EF_R32_UINT, + .rangeFormat = isIndex16Bit? IGeometryBase::EAABBFormat::U16 : IGeometryBase::EAABBFormat::U32 + }, + .src = {.offset=0,.size=icosphere.getIndexSize(),.buffer = std::move(indexBuffer)} + }); + } - icosphereGeometry.inputParams = - { 0b111u,0b1u, + { { - {0u, EF_R32G32B32_SFLOAT, offsetof(IcosphereVertex,pos)}, - {0u, EF_R32G32B32_SFLOAT, offsetof(IcosphereVertex,normals)}, - {0u, EF_R32G32_SFLOAT, offsetof(IcosphereVertex,uv)} - }, - {vertexSize,SVertexInputBindingParams::EVIR_PER_VERTEX} - }; - - auto vertexBuffer = asset::ICPUBuffer::create({ IcosphereData.getInterleavedVertexSize() }); - auto indexBuffer = asset::ICPUBuffer::create({ IcosphereData.getIndexSize() }); - - memcpy(vertexBuffer->getPointer(), IcosphereData.getInterleavedVertices(), vertexBuffer->getSize()); - memcpy(indexBuffer->getPointer(), IcosphereData.getIndices(), indexBuffer->getSize()); - - vertexBuffer->addUsageFlags(asset::IBuffer::EUF_VERTEX_BUFFER_BIT); - icosphereGeometry.bindings[0] = { 0, std::move(vertexBuffer) }; - indexBuffer->addUsageFlags(asset::IBuffer::EUF_INDEX_BUFFER_BIT); - icosphereGeometry.indexBuffer = { 0, std::move(indexBuffer) }; - icosphereGeometry.indexCount = IcosphereData.getIndexCount(); - icosphereGeometry.indexType = EIT_32BIT; - - return icosphereGeometry; + using position_t = float32_t3; + constexpr auto AttrSize = sizeof(position_t); + auto buff = ICPUBuffer::create({ icosphere.getPositionCount() * AttrSize, IBuffer::EUF_NONE }); + const auto positions = reinterpret_cast(buff->getPointer()); + memcpy(positions, icosphere.getPositions(), icosphere.getPositionSize()); + shapes::AABB<4, float32_t> aabb; + aabb.maxVx = float32_t4(radius, radius, radius, 0.f); + aabb.minVx = -aabb.maxVx; + retval->setPositionView({ + .composed = { + .encodedDataRange = {.f32 = aabb}, + .stride = AttrSize, + .format = EF_R32G32B32_SFLOAT, + .rangeFormat = IGeometryBase::EAABBFormat::F32 + }, + .src = { + .offset = 0, + .size = buff->getSize(), + .buffer = std::move(buff), + } + }); + } + { + using normal_t = float32_t3; + constexpr auto AttrSize = sizeof(normal_t); + auto buff = ICPUBuffer::create({icosphere.getNormalSize(), IBuffer::EUF_NONE}); + const auto normals = reinterpret_cast(buff->getPointer()); + memcpy(normals, icosphere.getNormals(), icosphere.getNormalSize()); + shapes::AABB<4,float32_t> aabb; + aabb.maxVx = float32_t4(1, 1, 1, 0.f); + aabb.minVx = -aabb.maxVx; + retval->setNormalView({ + .composed = { + .encodedDataRange = {.f32 = aabb}, + .stride = AttrSize, + .format = EF_R32G32B32_SFLOAT, + .rangeFormat = IGeometryBase::EAABBFormat::F32 + }, + .src = {.offset = 0,.size = buff->getSize(),.buffer = std::move(buff)}, + }); + } + { + using uv_t = uint32_t; + constexpr auto AttrSize = sizeof(uv_t); + auto buff = ICPUBuffer::create({AttrSize * icosphere.getTexCoordCount(), IBuffer::EUF_NONE}); + const auto uvs = reinterpret_cast(buff->getPointer()); + shapes::AABB<4, uint16_t> aabb; + aabb.minVx = uint16_t4(0,0,0,0); + aabb.maxVx = uint16_t4(0xFFFF,0xFFFF,0,0); + retval->getAuxAttributeViews()->push_back({ + .composed = { + .encodedDataRange = {.u16=aabb}, + .stride = AttrSize, + .format = EF_R16G16_UNORM, + .rangeFormat = IGeometryBase::EAABBFormat::U16_NORM + }, + .src = {.offset=0,.size=buff->getSize(),.buffer=std::move(buff)} + }); + for (auto uv_i = 0u; uv_i < icosphere.getTexCoordCount(); uv_i++) + { + const auto texCoords = icosphere.getTexCoords(); + const auto f32_uv = float32_t2{ texCoords[2 * uv_i], texCoords[(2 * uv_i) + 1] }; + uvs[uv_i] = packUnorm2x16(f32_uv); + } + } + } + + CPolygonGeometryManipulator::recomputeContentHashes(retval.get()); + return retval; } -#endif } // end namespace nbl::asset From 2e063d73bc98248fa874187bb306729a2f06485b Mon Sep 17 00:00:00 2001 From: kevyuu Date: Sat, 28 Jun 2025 21:52:18 +0700 Subject: [PATCH 04/40] Remove color parameter from create --- include/nbl/asset/utils/CGeometryCreator.h | 7 +- src/nbl/asset/utils/CGeometryCreator.cpp | 124 +-------------------- 2 files changed, 6 insertions(+), 125 deletions(-) diff --git a/include/nbl/asset/utils/CGeometryCreator.h b/include/nbl/asset/utils/CGeometryCreator.h index 87d7a0ef5e..ca6fff1790 100644 --- a/include/nbl/asset/utils/CGeometryCreator.h +++ b/include/nbl/asset/utils/CGeometryCreator.h @@ -61,8 +61,7 @@ class NBL_API2 CGeometryCreator final : public core::IReferenceCounted core::smart_refctd_ptr createArrow(const uint32_t tesselationCylinder = 4, const uint32_t tesselationCone = 8, const float height = 1.f, const float cylinderHeight = 0.6f, const float widthCylinder = 0.05f, - const float widthCone = 0.3f, const video::SColor colorCylinder = 0xFFFFFFFF, - const video::SColor colorCone = 0xFFFFFFFF) const; + const float widthCone = 0.3f) const; //! Create a sphere mesh. @@ -87,7 +86,7 @@ class NBL_API2 CGeometryCreator final : public core::IReferenceCounted */ core::smart_refctd_ptr createCylinder(float radius, float length, uint32_t tesselation, - const video::SColor& color=video::SColor(0xffffffff), CQuantNormalCache* const quantNormalCacheOverride=nullptr) const; + CQuantNormalCache* const quantNormalCacheOverride=nullptr) const; //! Create a cone mesh. /** @@ -100,8 +99,6 @@ class NBL_API2 CGeometryCreator final : public core::IReferenceCounted \return Generated mesh. */ core::smart_refctd_ptr createCone(float radius, float length, uint32_t tesselation, - const video::SColor& colorTop=video::SColor(0xffffffff), - const video::SColor& colorBottom=video::SColor(0xffffffff), float oblique=0.f, CQuantNormalCache* const quantNormalCacheOverride=nullptr) const; core::smart_refctd_ptr createRectangle(const hlsl::float32_t2 size={0.5f,0.5f}) const; diff --git a/src/nbl/asset/utils/CGeometryCreator.cpp b/src/nbl/asset/utils/CGeometryCreator.cpp index 775d2e2061..c012df8826 100644 --- a/src/nbl/asset/utils/CGeometryCreator.cpp +++ b/src/nbl/asset/utils/CGeometryCreator.cpp @@ -295,7 +295,6 @@ core::smart_refctd_ptr CGeometryCreator::createSphere(float hlsl::float32_t3* positions; hlsl::vector* normals; hlsl::vector* uvs; - hlsl::vector* colors; { { constexpr auto AttrSize = sizeof(decltype(*positions)); @@ -360,36 +359,10 @@ core::smart_refctd_ptr CGeometryCreator::createSphere(float } }); } - { - constexpr auto AttrSize = sizeof(decltype(*colors)); - auto buff = ICPUBuffer::create({{AttrSize * vertexCount,IBuffer::EUF_NONE}}); - colors = reinterpret_cast(buff->getPointer()); - shapes::AABB<4, uint8_t> aabb; - aabb.minVx = hlsl::vector(0,0,0,0); - aabb.maxVx = hlsl::vector(255,255,0,0); - retval->getAuxAttributeViews()->push_back({ - .composed = { - .encodedDataRange = {.u8=aabb}, - .stride = AttrSize, - .format = EF_R8G8B8A8_UNORM, - .rangeFormat = IGeometryBase::EAABBFormat::U8_NORM - }, - .src = { - .offset = 0, - .size = buff->getSize(), - .buffer = std::move(buff), - } - }); - } } // fill vertices { - for (size_t i = 0; i < vertexCount; i++) - { - colors[i] = { 255,255,255,255 }; - } - // calculate the angle which separates all points in a circle const float AngleX = 2 * core::PI() / polyCountX; const float AngleY = core::PI() / polyCountY; @@ -470,7 +443,7 @@ core::smart_refctd_ptr CGeometryCreator::createSphere(float core::smart_refctd_ptr CGeometryCreator::createCylinder( float radius, float length, - uint32_t tesselation, const video::SColor& color, CQuantNormalCache* const quantNormalCacheOverride) const + uint32_t tesselation, CQuantNormalCache* const quantNormalCacheOverride) const { using namespace hlsl; @@ -521,7 +494,6 @@ core::smart_refctd_ptr CGeometryCreator::createCylinder( hlsl::float32_t3* positions; hlsl::vector* normals; hlsl::vector* uvs; - hlsl::vector* colors; { { constexpr auto AttrSize = sizeof(decltype(*positions)); @@ -586,32 +558,8 @@ core::smart_refctd_ptr CGeometryCreator::createCylinder( } }); } - { - constexpr auto AttrSize = sizeof(decltype(*colors)); - auto buff = ICPUBuffer::create({{AttrSize * vertexCount,IBuffer::EUF_NONE}}); - colors = reinterpret_cast(buff->getPointer()); - shapes::AABB<4, uint8_t> aabb; - aabb.minVx = hlsl::vector(0,0,0,0); - aabb.maxVx = hlsl::vector(255,255,0,0); - retval->getAuxAttributeViews()->push_back({ - .composed = { - .encodedDataRange = {.u8=aabb}, - .stride = AttrSize, - .format = EF_R8G8B8A8_UNORM, - .rangeFormat = IGeometryBase::EAABBFormat::U8_NORM - }, - .src = { - .offset = 0, - .size = buff->getSize(), - .buffer = std::move(buff), - } - }); - } } - uint8_t glcolor[4]; - color.toOpenGLColor(glcolor); - const float tesselationRec = core::reciprocal_approxim(static_cast(tesselation)); const float step = 2.f * core::PI() * tesselationRec; for (uint32_t i = 0u; i < tesselation; ++i) @@ -624,12 +572,10 @@ core::smart_refctd_ptr CGeometryCreator::createCylinder( positions[i] = { p.x, p.y, p.z }; memcpy(normals + i, &n, sizeof(n)); uvs[i] = { f_i * tesselationRec, 0.0 }; - colors[i] = { glcolor[0], glcolor[1], glcolor[2], glcolor[3] }; positions[i + halfIx] = { p.x, p.y, length }; normals[i + halfIx] = normals[i]; uvs[i + halfIx] = { 1.0f, 0.0f }; - colors[i + halfIx] = { glcolor[0], glcolor[1], glcolor[2], glcolor[3] }; } CPolygonGeometryManipulator::recomputeContentHashes(retval.get()); @@ -638,8 +584,6 @@ core::smart_refctd_ptr CGeometryCreator::createCylinder( core::smart_refctd_ptr CGeometryCreator::createCone( float radius, float length, uint32_t tesselation, - const video::SColor& colorTop, - const video::SColor& colorBottom, float oblique, CQuantNormalCache* const quantNormalCacheOverride) const { @@ -689,7 +633,6 @@ core::smart_refctd_ptr CGeometryCreator::createCone( // Create vertex attributes with NONE usage because we have no clue how they'll be used hlsl::float32_t3* positions; hlsl::vector* normals; - hlsl::vector* colors; { { constexpr auto AttrSize = sizeof(decltype(*positions)); @@ -733,34 +676,8 @@ core::smart_refctd_ptr CGeometryCreator::createCone( } }); } - { - constexpr auto AttrSize = sizeof(decltype(*colors)); - auto buff = ICPUBuffer::create({{AttrSize * vertexCount,IBuffer::EUF_NONE}}); - colors = reinterpret_cast(buff->getPointer()); - shapes::AABB<4, uint8_t> aabb; - aabb.minVx = hlsl::vector(0,0,0,0); - aabb.maxVx = hlsl::vector(255,255,0,0); - retval->getAuxAttributeViews()->push_back({ - .composed = { - .encodedDataRange = {.u8=aabb}, - .stride = AttrSize, - .format = EF_R8G8B8A8_UNORM, - .rangeFormat = IGeometryBase::EAABBFormat::U8_NORM - }, - .src = { - .offset = 0, - .size = buff->getSize(), - .buffer = std::move(buff), - } - }); - } } - uint8_t glcolor[4]; - colorBottom.toOpenGLColor(glcolor); - vector vertexBottomColor = { glcolor[0], glcolor[1], glcolor[2], glcolor[3] }; - std::fill_n(colors, vertexCount, vertexBottomColor); - const float step = (2.f*core::PI()) / tesselation; const core::vectorSIMDf apexVertexCoords(oblique, length, 0.0f); @@ -808,9 +725,7 @@ core::smart_refctd_ptr CGeometryCreator::createArrow( const float height, const float cylinderHeight, const float width0, - const float width1, - const video::SColor vtxColor0, - const video::SColor vtxColor1 + const float width1 ) const { assert(height > cylinderHeight); @@ -818,10 +733,9 @@ core::smart_refctd_ptr CGeometryCreator::createArrow( using position_t = hlsl::float32_t3; using normal_t = hlsl::vector; using uv_t = hlsl::vector; - using color_t = hlsl::vector; - auto cylinder = createCylinder(width0, cylinderHeight, tesselationCylinder, vtxColor0); - auto cone = createCone(width1, height-cylinderHeight, tesselationCone, vtxColor1, vtxColor1); + auto cylinder = createCylinder(width0, cylinderHeight, tesselationCylinder); + auto cone = createCone(width1, height-cylinderHeight, tesselationCone); auto cylinderPositions = reinterpret_cast(cylinder->getPositionView().src.buffer->getPointer()); auto conePositions = reinterpret_cast(cone->getPositionView().src.buffer->getPointer()); @@ -881,7 +795,6 @@ core::smart_refctd_ptr CGeometryCreator::createArrow( // Create vertex attributes with NONE usage because we have no clue how they'll be used hlsl::float32_t3* positions; hlsl::vector* normals; - hlsl::vector* colors; hlsl::vector* uvs; { { @@ -948,27 +861,6 @@ core::smart_refctd_ptr CGeometryCreator::createArrow( } }); } - { - constexpr auto AttrSize = sizeof(decltype(*colors)); - auto buff = ICPUBuffer::create({{AttrSize * newArrowVertexCount,IBuffer::EUF_NONE}}); - colors = reinterpret_cast(buff->getPointer()); - shapes::AABB<4, uint8_t> aabb; - aabb.minVx = hlsl::vector(0,0,0,0); - aabb.maxVx = hlsl::vector(255,255,0,0); - retval->getAuxAttributeViews()->push_back({ - .composed = { - .encodedDataRange = {.u8=aabb}, - .stride = AttrSize, - .format = EF_R8G8B8A8_UNORM, - .rangeFormat = IGeometryBase::EAABBFormat::U8_NORM - }, - .src = { - .offset = 0, - .size = buff->getSize(), - .buffer = std::move(buff), - } - }); - } } for (auto i = 0ull; i < coneVertexCount; ++i) @@ -980,12 +872,6 @@ core::smart_refctd_ptr CGeometryCreator::createArrow( conePosition = {newPos.x, newPos.y, newPos.z}; } - uint8_t cylinderGlColor[4]; - vtxColor0.toOpenGLColor(cylinderGlColor); - - uint8_t coneGlColor[4]; - vtxColor1.toOpenGLColor(coneGlColor); - for (auto z = 0ull; z < newArrowVertexCount; ++z) { if (z < cylinderVertexCount) @@ -993,7 +879,6 @@ core::smart_refctd_ptr CGeometryCreator::createArrow( positions[z] = cylinderPositions[z]; normals[z] = cylinderNormals[z]; uvs[z] = cylinderUvs[z]; - colors[z] = { cylinderGlColor[0], cylinderGlColor[1], cylinderGlColor[2], cylinderGlColor[3] }; } else { @@ -1001,7 +886,6 @@ core::smart_refctd_ptr CGeometryCreator::createArrow( positions[z] = conePositions[cone_i]; normals[z] = coneNormals[cone_i]; uvs[z] = { 0, 0 }; - colors[z] = { coneGlColor[0], coneGlColor[1], coneGlColor[2], coneGlColor[3] }; } } From f0b50642ad95492ba1490ce5884c0d2774edf311 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Sat, 28 Jun 2025 21:57:27 +0700 Subject: [PATCH 05/40] Fix indentation --- src/nbl/asset/utils/CGeometryCreator.cpp | 430 +++++++++++------------ 1 file changed, 215 insertions(+), 215 deletions(-) diff --git a/src/nbl/asset/utils/CGeometryCreator.cpp b/src/nbl/asset/utils/CGeometryCreator.cpp index c012df8826..88e93e4d2c 100644 --- a/src/nbl/asset/utils/CGeometryCreator.cpp +++ b/src/nbl/asset/utils/CGeometryCreator.cpp @@ -204,7 +204,7 @@ core::smart_refctd_ptr CGeometryCreator::createSphere(float polyCountY = 2; const uint32_t polyCountXPitch = polyCountX + 1; // get to same vertex on next level - const size_t vertexCount = (polyCountXPitch * polyCountY) + 2; + const size_t vertexCount = (polyCountXPitch * polyCountY) + 2; auto retval = core::make_smart_refctd_ptr(); retval->setIndexing(IPolygonGeometryBase::TriangleList()); @@ -212,11 +212,11 @@ core::smart_refctd_ptr CGeometryCreator::createSphere(float // Create indices using index_t = uint32_t; { - const auto indexCount = (polyCountX * polyCountY) * 6; + const auto indexCount = (polyCountX * polyCountY) * 6; const auto bytesize = sizeof(index_t) * indexCount; auto indices = ICPUBuffer::create({bytesize,IBuffer::EUF_INDEX_BUFFER_BIT}); auto indexPtr = reinterpret_cast(indices->getPointer()); - uint32_t level = 0; + uint32_t level = 0; size_t indexAddIx = 0; for (uint32_t p1 = 0; p1 < polyCountY - 1; ++p1) { @@ -293,7 +293,7 @@ core::smart_refctd_ptr CGeometryCreator::createSphere(float // Create vertex attributes with NONE usage because we have no clue how they'll be used hlsl::float32_t3* positions; - hlsl::vector* normals; + hlsl::vector* normals; hlsl::vector* uvs; { { @@ -311,9 +311,9 @@ core::smart_refctd_ptr CGeometryCreator::createSphere(float .rangeFormat = IGeometryBase::EAABBFormat::F32 }, .src = { - .offset=0, - .size = buff->getSize(), - .buffer = std::move(buff), + .offset=0, + .size = buff->getSize(), + .buffer = std::move(buff), } }); } @@ -332,9 +332,9 @@ core::smart_refctd_ptr CGeometryCreator::createSphere(float .rangeFormat = IGeometryBase::EAABBFormat::S8_NORM }, .src = { - .offset = 0, - .size = buff->getSize(), - .buffer = std::move(buff) + .offset = 0, + .size = buff->getSize(), + .buffer = std::move(buff) } }); } @@ -353,9 +353,9 @@ core::smart_refctd_ptr CGeometryCreator::createSphere(float .rangeFormat = IGeometryBase::EAABBFormat::U8_NORM }, .src = { - .offset = 0, - .size = buff->getSize(), - .buffer = std::move(buff), + .offset = 0, + .size = buff->getSize(), + .buffer = std::move(buff), } }); } @@ -373,68 +373,68 @@ core::smart_refctd_ptr CGeometryCreator::createSphere(float double ay = 0;//AngleY / 2; auto vertex_i = 0; - for (uint32_t y = 0; y < polyCountY; ++y) - { - ay += AngleY; - const double sinay = sin(ay); - axz = 0; + for (uint32_t y = 0; y < polyCountY; ++y) + { + ay += AngleY; + const double sinay = sin(ay); + axz = 0; - // calculate the necessary vertices without the doubled one + // calculate the necessary vertices without the doubled one const auto old_vertex_i = vertex_i; - for (uint32_t xz = 0; xz < polyCountX; ++xz) - { - // calculate points position - - float32_t3 pos(static_cast(cos(axz) * sinay), - static_cast(cos(ay)), - static_cast(sin(axz) * sinay)); - // for spheres the normal is the position - core::vectorSIMDf normal(&pos.x); - normal.makeSafe3D(); - const auto quantizedNormal = quantNormalCache->quantize(normal); - pos *= radius; - - // calculate texture coordinates via sphere mapping - // tu is the same on each level, so only calculate once - float tu = 0.5f; - //if (y==0) - //{ - if (normal.Y != -1.0f && normal.Y != 1.0f) - tu = static_cast(acos(core::clamp(normal.X / sinay, -1.0, 1.0)) * 0.5 * core::RECIPROCAL_PI()); - if (normal.Z < 0.0f) - tu = 1 - tu; - //} - //else - //tu = ((float*)(tmpMem+(i-polyCountXPitch)*vertexSize))[4]; + for (uint32_t xz = 0; xz < polyCountX; ++xz) + { + // calculate points position + + float32_t3 pos(static_cast(cos(axz) * sinay), + static_cast(cos(ay)), + static_cast(sin(axz) * sinay)); + // for spheres the normal is the position + core::vectorSIMDf normal(&pos.x); + normal.makeSafe3D(); + const auto quantizedNormal = quantNormalCache->quantize(normal); + pos *= radius; + + // calculate texture coordinates via sphere mapping + // tu is the same on each level, so only calculate once + float tu = 0.5f; + //if (y==0) + //{ + if (normal.Y != -1.0f && normal.Y != 1.0f) + tu = static_cast(acos(core::clamp(normal.X / sinay, -1.0, 1.0)) * 0.5 * core::RECIPROCAL_PI()); + if (normal.Z < 0.0f) + tu = 1 - tu; + //} + //else + //tu = ((float*)(tmpMem+(i-polyCountXPitch)*vertexSize))[4]; positions[vertex_i] = pos; uvs[vertex_i] = { packSnorm(tu), packSnorm(static_cast(ay * core::RECIPROCAL_PI())) }; memcpy(normals + vertex_i, &quantizedNormal, sizeof(quantizedNormal)); vertex_i++; - axz += AngleX; - } - // This is the doubled vertex on the initial position + axz += AngleX; + } + // This is the doubled vertex on the initial position - positions[vertex_i] = positions[old_vertex_i]; + positions[vertex_i] = positions[old_vertex_i]; uvs[vertex_i] = { 127, uvs[old_vertex_i].y }; normals[vertex_i] = normals[old_vertex_i]; vertex_i++; - } + } // the vertex at the top of the sphere positions[vertex_i] = { 0.f, radius, 0.f }; uvs[vertex_i] = { 0, 63}; const auto quantizedTopNormal = quantNormalCache->quantize(core::vectorSIMDf(0.f, 1.f, 0.f)); - memcpy(normals + vertex_i, &quantizedTopNormal, sizeof(quantizedTopNormal)); + memcpy(normals + vertex_i, &quantizedTopNormal, sizeof(quantizedTopNormal)); // the vertex at the bottom of the sphere vertex_i++; positions[vertex_i] = { 0.f, -radius, 0.f }; uvs[vertex_i] = { 63, 127}; const auto quantizedBottomNormal = quantNormalCache->quantize(core::vectorSIMDf(0.f, -1.f, 0.f)); - memcpy(normals + vertex_i, &quantizedBottomNormal, sizeof(quantizedBottomNormal)); + memcpy(normals + vertex_i, &quantizedBottomNormal, sizeof(quantizedBottomNormal)); } CPolygonGeometryManipulator::recomputeContentHashes(retval.get()); @@ -449,8 +449,8 @@ core::smart_refctd_ptr CGeometryCreator::createCylinder( CQuantNormalCache* const quantNormalCache = quantNormalCacheOverride == nullptr ? m_params.normalCache.get() : quantNormalCacheOverride; - const uint16_t halfIx = static_cast(tesselation); - const uint16_t vertexCount = 2 * static_cast(tesselation); + const uint16_t halfIx = static_cast(tesselation); + const uint16_t vertexCount = 2 * static_cast(tesselation); auto retval = core::make_smart_refctd_ptr(); retval->setIndexing(IPolygonGeometryBase::TriangleList()); @@ -458,20 +458,20 @@ core::smart_refctd_ptr CGeometryCreator::createCylinder( // Create indices using index_t = uint16_t; { - constexpr uint32_t RowCount = 2u; - const auto IndexCount = RowCount * 3 * tesselation; + constexpr uint32_t RowCount = 2u; + const auto IndexCount = RowCount * 3 * tesselation; const auto bytesize = sizeof(index_t) * IndexCount; auto indices = ICPUBuffer::create({bytesize,IBuffer::EUF_INDEX_BUFFER_BIT}); auto u = reinterpret_cast(indices->getPointer()); - for (uint16_t i = 0u, j = 0u; i < halfIx; ++i) - { - u[j++] = i; - u[j++] = (i + 1u) != halfIx ? (i + 1u):0u; - u[j++] = i + halfIx; - u[j++] = i + halfIx; - u[j++] = (i + 1u)!= halfIx ? (i + 1u):0u; - u[j++] = (i + 1u)!= halfIx ? (i + 1u + halfIx) : halfIx; - } + for (uint16_t i = 0u, j = 0u; i < halfIx; ++i) + { + u[j++] = i; + u[j++] = (i + 1u) != halfIx ? (i + 1u):0u; + u[j++] = i + halfIx; + u[j++] = i + halfIx; + u[j++] = (i + 1u)!= halfIx ? (i + 1u):0u; + u[j++] = (i + 1u)!= halfIx ? (i + 1u + halfIx) : halfIx; + } shapes::AABB<4,index_t> aabb; aabb.minVx[0] = 0; @@ -492,7 +492,7 @@ core::smart_refctd_ptr CGeometryCreator::createCylinder( // Create vertex attributes with NONE usage because we have no clue how they'll be used hlsl::float32_t3* positions; - hlsl::vector* normals; + hlsl::vector* normals; hlsl::vector* uvs; { { @@ -510,9 +510,9 @@ core::smart_refctd_ptr CGeometryCreator::createCylinder( .rangeFormat = IGeometryBase::EAABBFormat::F32 }, .src = { - .offset=0, - .size = buff->getSize(), - .buffer = std::move(buff), + .offset=0, + .size = buff->getSize(), + .buffer = std::move(buff), } }); } @@ -531,9 +531,9 @@ core::smart_refctd_ptr CGeometryCreator::createCylinder( .rangeFormat = IGeometryBase::EAABBFormat::S8_NORM }, .src = { - .offset = 0, - .size = buff->getSize(), - .buffer = std::move(buff) + .offset = 0, + .size = buff->getSize(), + .buffer = std::move(buff) } }); } @@ -552,46 +552,46 @@ core::smart_refctd_ptr CGeometryCreator::createCylinder( .rangeFormat = IGeometryBase::EAABBFormat::U8_NORM }, .src = { - .offset = 0, - .size = buff->getSize(), - .buffer = std::move(buff), + .offset = 0, + .size = buff->getSize(), + .buffer = std::move(buff), } }); } } - const float tesselationRec = core::reciprocal_approxim(static_cast(tesselation)); - const float step = 2.f * core::PI() * tesselationRec; - for (uint32_t i = 0u; i < tesselation; ++i) - { + const float tesselationRec = core::reciprocal_approxim(static_cast(tesselation)); + const float step = 2.f * core::PI() * tesselationRec; + for (uint32_t i = 0u; i < tesselation; ++i) + { const auto f_i = static_cast(i); - core::vectorSIMDf p(std::cos(f_i * step), std::sin(f_i * step), 0.f); - p *= radius; - const auto n = quantNormalCache->quantize(core::normalize(p)); + core::vectorSIMDf p(std::cos(f_i * step), std::sin(f_i * step), 0.f); + p *= radius; + const auto n = quantNormalCache->quantize(core::normalize(p)); - positions[i] = { p.x, p.y, p.z }; + positions[i] = { p.x, p.y, p.z }; memcpy(normals + i, &n, sizeof(n)); uvs[i] = { f_i * tesselationRec, 0.0 }; - positions[i + halfIx] = { p.x, p.y, length }; - normals[i + halfIx] = normals[i]; - uvs[i + halfIx] = { 1.0f, 0.0f }; - } + positions[i + halfIx] = { p.x, p.y, length }; + normals[i + halfIx] = normals[i]; + uvs[i + halfIx] = { 1.0f, 0.0f }; + } CPolygonGeometryManipulator::recomputeContentHashes(retval.get()); return retval; } core::smart_refctd_ptr CGeometryCreator::createCone( - float radius, float length, uint32_t tesselation, - float oblique, CQuantNormalCache* const quantNormalCacheOverride) const + float radius, float length, uint32_t tesselation, + float oblique, CQuantNormalCache* const quantNormalCacheOverride) const { using namespace hlsl; CQuantNormalCache* const quantNormalCache = quantNormalCacheOverride == nullptr ? m_params.normalCache.get() : quantNormalCacheOverride; - const uint16_t vertexCount = 2 * static_cast(tesselation); + const uint16_t vertexCount = 2 * static_cast(tesselation); auto retval = core::make_smart_refctd_ptr(); retval->setIndexing(IPolygonGeometryBase::TriangleList()); @@ -599,19 +599,19 @@ core::smart_refctd_ptr CGeometryCreator::createCone( // Create indices using index_t = uint16_t; { - constexpr uint32_t RowCount = 2u; - const auto IndexCount = 3 * tesselation; + constexpr uint32_t RowCount = 2u; + const auto IndexCount = 3 * tesselation; const auto bytesize = sizeof(index_t) * IndexCount; auto indices = ICPUBuffer::create({bytesize,IBuffer::EUF_INDEX_BUFFER_BIT}); auto u = reinterpret_cast(indices->getPointer()); - const uint32_t firstIndexOfBaseVertices = 0; - const uint32_t firstIndexOfApexVertices = tesselation; - for (uint32_t i = 0; i < tesselation; i++) - { - u[i * 3] = firstIndexOfApexVertices + i; - u[(i * 3) + 1] = firstIndexOfBaseVertices + i; - u[(i * 3) + 2] = i == (tesselation - 1) ? firstIndexOfBaseVertices : firstIndexOfBaseVertices + i + 1; - } + const uint32_t firstIndexOfBaseVertices = 0; + const uint32_t firstIndexOfApexVertices = tesselation; + for (uint32_t i = 0; i < tesselation; i++) + { + u[i * 3] = firstIndexOfApexVertices + i; + u[(i * 3) + 1] = firstIndexOfBaseVertices + i; + u[(i * 3) + 2] = i == (tesselation - 1) ? firstIndexOfBaseVertices : firstIndexOfBaseVertices + i + 1; + } shapes::AABB<4,index_t> aabb; aabb.minVx[0] = 0; @@ -632,53 +632,53 @@ core::smart_refctd_ptr CGeometryCreator::createCone( // Create vertex attributes with NONE usage because we have no clue how they'll be used hlsl::float32_t3* positions; - hlsl::vector* normals; - { - { - constexpr auto AttrSize = sizeof(decltype(*positions)); - auto buff = ICPUBuffer::create({{AttrSize * vertexCount,IBuffer::EUF_NONE}}); - positions = reinterpret_cast(buff->getPointer()); - shapes::AABB<4, float32_t> aabb; - aabb.maxVx = float32_t4(radius, radius, length, 0.0f); - aabb.minVx = float32_t4(-radius, -radius, 0.0f, 0.0f); - retval->setPositionView({ - .composed = { - .encodedDataRange = {.f32 = aabb}, - .stride = AttrSize, - .format = EF_R32G32B32_SFLOAT, - .rangeFormat = IGeometryBase::EAABBFormat::F32 - }, - .src = { - .offset=0, - .size = buff->getSize(), - .buffer = std::move(buff), - } - }); - } - { - constexpr auto AttrSize = sizeof(decltype(*normals)); - auto buff = ICPUBuffer::create({{AttrSize * vertexCount,IBuffer::EUF_NONE}}); - normals = reinterpret_cast(buff->getPointer()); - shapes::AABB<4, int8_t> aabb; - aabb.maxVx = hlsl::vector(127,127,127,0); - aabb.minVx = -aabb.maxVx; - retval->setNormalView({ - .composed = { - .encodedDataRange = {.s8=aabb}, - .stride = AttrSize, - .format = NormalFormat, - .rangeFormat = IGeometryBase::EAABBFormat::S8_NORM - }, - .src = { - .offset = 0, - .size = buff->getSize(), - .buffer = std::move(buff) - } - }); - } - } - - const float step = (2.f*core::PI()) / tesselation; + hlsl::vector* normals; + { + { + constexpr auto AttrSize = sizeof(decltype(*positions)); + auto buff = ICPUBuffer::create({{AttrSize * vertexCount,IBuffer::EUF_NONE}}); + positions = reinterpret_cast(buff->getPointer()); + shapes::AABB<4, float32_t> aabb; + aabb.maxVx = float32_t4(radius, radius, length, 0.0f); + aabb.minVx = float32_t4(-radius, -radius, 0.0f, 0.0f); + retval->setPositionView({ + .composed = { + .encodedDataRange = {.f32 = aabb}, + .stride = AttrSize, + .format = EF_R32G32B32_SFLOAT, + .rangeFormat = IGeometryBase::EAABBFormat::F32 + }, + .src = { + .offset=0, + .size = buff->getSize(), + .buffer = std::move(buff), + } + }); + } + { + constexpr auto AttrSize = sizeof(decltype(*normals)); + auto buff = ICPUBuffer::create({{AttrSize * vertexCount,IBuffer::EUF_NONE}}); + normals = reinterpret_cast(buff->getPointer()); + shapes::AABB<4, int8_t> aabb; + aabb.maxVx = hlsl::vector(127,127,127,0); + aabb.minVx = -aabb.maxVx; + retval->setNormalView({ + .composed = { + .encodedDataRange = {.s8=aabb}, + .stride = AttrSize, + .format = NormalFormat, + .rangeFormat = IGeometryBase::EAABBFormat::S8_NORM + }, + .src = { + .offset = 0, + .size = buff->getSize(), + .buffer = std::move(buff) + } + }); + } + } + + const float step = (2.f*core::PI()) / tesselation; const core::vectorSIMDf apexVertexCoords(oblique, length, 0.0f); @@ -728,14 +728,14 @@ core::smart_refctd_ptr CGeometryCreator::createArrow( const float width1 ) const { - assert(height > cylinderHeight); + assert(height > cylinderHeight); using position_t = hlsl::float32_t3; using normal_t = hlsl::vector; using uv_t = hlsl::vector; - auto cylinder = createCylinder(width0, cylinderHeight, tesselationCylinder); - auto cone = createCone(width1, height-cylinderHeight, tesselationCone); + auto cylinder = createCylinder(width0, cylinderHeight, tesselationCylinder); + auto cone = createCone(width1, height-cylinderHeight, tesselationCone); auto cylinderPositions = reinterpret_cast(cylinder->getPositionView().src.buffer->getPointer()); auto conePositions = reinterpret_cast(cone->getPositionView().src.buffer->getPointer()); @@ -794,52 +794,52 @@ core::smart_refctd_ptr CGeometryCreator::createArrow( // Create vertex attributes with NONE usage because we have no clue how they'll be used hlsl::float32_t3* positions; - hlsl::vector* normals; + hlsl::vector* normals; hlsl::vector* uvs; - { - { - constexpr auto AttrSize = sizeof(decltype(*positions)); - auto buff = ICPUBuffer::create({{AttrSize * newArrowVertexCount,IBuffer::EUF_NONE}}); - positions = reinterpret_cast(buff->getPointer()); - shapes::AABB<4, float32_t> aabb; + { + { + constexpr auto AttrSize = sizeof(decltype(*positions)); + auto buff = ICPUBuffer::create({{AttrSize * newArrowVertexCount,IBuffer::EUF_NONE}}); + positions = reinterpret_cast(buff->getPointer()); + shapes::AABB<4, float32_t> aabb; //TODO(kevyuu): Calculate arrow aabb - aabb.maxVx = hlsl::vector(127,127,127,0); - aabb.minVx = -aabb.maxVx; - retval->setPositionView({ - .composed = { - .encodedDataRange = {.f32 = aabb}, - .stride = AttrSize, - .format = EF_R32G32B32_SFLOAT, - .rangeFormat = IGeometryBase::EAABBFormat::F32 - }, - .src = { - .offset=0, - .size = buff->getSize(), - .buffer = std::move(buff), - } - }); - } - { - constexpr auto AttrSize = sizeof(decltype(*normals)); - auto buff = ICPUBuffer::create({{AttrSize * newArrowVertexCount,IBuffer::EUF_NONE}}); - normals = reinterpret_cast(buff->getPointer()); - shapes::AABB<4, int8_t> aabb; - aabb.maxVx = hlsl::vector(127,127,127,0); - aabb.minVx = -aabb.maxVx; - retval->setNormalView({ - .composed = { - .encodedDataRange = {.s8=aabb}, - .stride = AttrSize, - .format = NormalFormat, - .rangeFormat = IGeometryBase::EAABBFormat::S8_NORM - }, - .src = { - .offset = 0, - .size = buff->getSize(), - .buffer = std::move(buff) - } - }); - } + aabb.maxVx = hlsl::vector(127,127,127,0); + aabb.minVx = -aabb.maxVx; + retval->setPositionView({ + .composed = { + .encodedDataRange = {.f32 = aabb}, + .stride = AttrSize, + .format = EF_R32G32B32_SFLOAT, + .rangeFormat = IGeometryBase::EAABBFormat::F32 + }, + .src = { + .offset=0, + .size = buff->getSize(), + .buffer = std::move(buff), + } + }); + } + { + constexpr auto AttrSize = sizeof(decltype(*normals)); + auto buff = ICPUBuffer::create({{AttrSize * newArrowVertexCount,IBuffer::EUF_NONE}}); + normals = reinterpret_cast(buff->getPointer()); + shapes::AABB<4, int8_t> aabb; + aabb.maxVx = hlsl::vector(127,127,127,0); + aabb.minVx = -aabb.maxVx; + retval->setNormalView({ + .composed = { + .encodedDataRange = {.s8=aabb}, + .stride = AttrSize, + .format = NormalFormat, + .rangeFormat = IGeometryBase::EAABBFormat::S8_NORM + }, + .src = { + .offset = 0, + .size = buff->getSize(), + .buffer = std::move(buff) + } + }); + } { constexpr auto AttrSize = sizeof(decltype(*uvs)); auto buff = ICPUBuffer::create({{AttrSize * newArrowVertexCount,IBuffer::EUF_NONE}}); @@ -855,21 +855,21 @@ core::smart_refctd_ptr CGeometryCreator::createArrow( .rangeFormat = IGeometryBase::EAABBFormat::U8_NORM }, .src = { - .offset = 0, - .size = buff->getSize(), - .buffer = std::move(buff), + .offset = 0, + .size = buff->getSize(), + .buffer = std::move(buff), } }); } - } - + } + for (auto i = 0ull; i < coneVertexCount; ++i) { auto& conePosition = conePositions[i]; core::vector3df_SIMD newPos(conePosition.x, conePosition.y, conePosition.z); newPos.rotateYZByRAD(-1.5707963268); - conePosition = {newPos.x, newPos.y, newPos.z}; + conePosition = {newPos.x, newPos.y, newPos.z}; } for (auto z = 0ull; z < newArrowVertexCount; ++z) @@ -1100,8 +1100,8 @@ core::smart_refctd_ptr CGeometryCreator::createDisk(const f and create icopshere's vertices and indecies. Polyhedron subdividing icosahedron (20 tris) by N-times iteration - The icosphere with N=1 (default) has 80 triangles by subdividing a triangle - of icosahedron into 4 triangles. If N=0, it is identical to icosahedron. + The icosphere with N=1 (default) has 80 triangles by subdividing a triangle + of icosahedron into 4 triangles. If N=0, it is identical to icosahedron. */ class Icosphere @@ -1240,14 +1240,14 @@ class Icosphere texture coordinate is shared or no. If it is on the line segments, it is also non-shared point - 00 01 02 03 04 - /\ /\ /\ /\ /\ - / \/ \/ \/ \/ \ + 00 01 02 03 04 + /\ /\ /\ /\ /\ + / \/ \/ \/ \/ \ 05 06 07 08 09 \ - \ 10 11 12 13 14 + \ 10 11 12 13 14 \ /\ /\ /\ /\ / \/ \/ \/ \/ \/ - 15 16 17 18 19 + 15 16 17 18 19 */ static inline bool isSharedTexCoord(const float t[2]) @@ -1813,7 +1813,7 @@ class Icosphere v1 / \ newV1 *---* newV3 - / \ / \ + / \ / \ v2---*---v3 newV2 */ @@ -1979,8 +1979,8 @@ class Icosphere add 7 sub edge lines per triangle to array using 6 indices (CCW) i1 / : (i1, i2) - i2---i6 : (i2, i6) - / \ / : (i2, i3), (i2, i4), (i6, i4) + i2---i6 : (i2, i6) + / \ / : (i2, i3), (i2, i4), (i6, i4) i3---i4---i5 : (i3, i4), (i4, i5) */ @@ -2073,8 +2073,8 @@ core::smart_refctd_ptr CGeometryCreator::createIcoSphere(fl // Create indices { - auto indexBuffer = asset::ICPUBuffer::create({ icosphere.getIndexSize() }); - memcpy(indexBuffer->getPointer(), icosphere.getIndices(), indexBuffer->getSize()); + auto indexBuffer = asset::ICPUBuffer::create({ icosphere.getIndexSize() }); + memcpy(indexBuffer->getPointer(), icosphere.getIndices(), indexBuffer->getSize()); shapes::AABB<4,Icosphere::index_t> aabb; aabb.minVx[0] = 0; @@ -2118,7 +2118,7 @@ core::smart_refctd_ptr CGeometryCreator::createIcoSphere(fl } }); } - { + { using normal_t = float32_t3; constexpr auto AttrSize = sizeof(normal_t); auto buff = ICPUBuffer::create({icosphere.getNormalSize(), IBuffer::EUF_NONE}); @@ -2136,8 +2136,8 @@ core::smart_refctd_ptr CGeometryCreator::createIcoSphere(fl }, .src = {.offset = 0,.size = buff->getSize(),.buffer = std::move(buff)}, }); - } - { + } + { using uv_t = uint32_t; constexpr auto AttrSize = sizeof(uv_t); auto buff = ICPUBuffer::create({AttrSize * icosphere.getTexCoordCount(), IBuffer::EUF_NONE}); @@ -2160,7 +2160,7 @@ core::smart_refctd_ptr CGeometryCreator::createIcoSphere(fl const auto f32_uv = float32_t2{ texCoords[2 * uv_i], texCoords[(2 * uv_i) + 1] }; uvs[uv_i] = packUnorm2x16(f32_uv); } - } + } } CPolygonGeometryManipulator::recomputeContentHashes(retval.get()); From 68a689cbbaefb3210a5715940fae0d82f38f47b7 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Sun, 29 Jun 2025 15:21:20 +0700 Subject: [PATCH 06/40] Fix normal and uv type --- src/nbl/asset/utils/CGeometryCreator.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/nbl/asset/utils/CGeometryCreator.cpp b/src/nbl/asset/utils/CGeometryCreator.cpp index 88e93e4d2c..0a5dd5920e 100644 --- a/src/nbl/asset/utils/CGeometryCreator.cpp +++ b/src/nbl/asset/utils/CGeometryCreator.cpp @@ -210,8 +210,8 @@ core::smart_refctd_ptr CGeometryCreator::createSphere(float retval->setIndexing(IPolygonGeometryBase::TriangleList()); // Create indices - using index_t = uint32_t; { + using index_t = uint32_t; const auto indexCount = (polyCountX * polyCountY) * 6; const auto bytesize = sizeof(index_t) * indexCount; auto indices = ICPUBuffer::create({bytesize,IBuffer::EUF_INDEX_BUFFER_BIT}); @@ -293,8 +293,8 @@ core::smart_refctd_ptr CGeometryCreator::createSphere(float // Create vertex attributes with NONE usage because we have no clue how they'll be used hlsl::float32_t3* positions; - hlsl::vector* normals; - hlsl::vector* uvs; + hlsl::vector* normals; + hlsl::vector* uvs; { { constexpr auto AttrSize = sizeof(decltype(*positions)); @@ -342,12 +342,12 @@ core::smart_refctd_ptr CGeometryCreator::createSphere(float constexpr auto AttrSize = sizeof(decltype(*uvs)); auto buff = ICPUBuffer::create({{AttrSize * vertexCount,IBuffer::EUF_NONE}}); uvs = reinterpret_cast(buff->getPointer()); - shapes::AABB<4, uint8_t> aabb; + shapes::AABB<4, uint16_t> aabb; aabb.minVx = hlsl::vector(0,0,0,0); aabb.maxVx = hlsl::vector(255,255,0,0); retval->getAuxAttributeViews()->push_back({ .composed = { - .encodedDataRange = {.u8=aabb}, + .encodedDataRange = {.u16=aabb}, .stride = AttrSize, .format = EF_R8G8_UNORM, .rangeFormat = IGeometryBase::EAABBFormat::U8_NORM @@ -492,8 +492,8 @@ core::smart_refctd_ptr CGeometryCreator::createCylinder( // Create vertex attributes with NONE usage because we have no clue how they'll be used hlsl::float32_t3* positions; - hlsl::vector* normals; - hlsl::vector* uvs; + hlsl::vector* normals; + hlsl::vector* uvs; { { constexpr auto AttrSize = sizeof(decltype(*positions)); @@ -632,7 +632,7 @@ core::smart_refctd_ptr CGeometryCreator::createCone( // Create vertex attributes with NONE usage because we have no clue how they'll be used hlsl::float32_t3* positions; - hlsl::vector* normals; + hlsl::vector* normals; { { constexpr auto AttrSize = sizeof(decltype(*positions)); From ca7f1822540a71b4ddfa8ce5d4b38d5bd481a3c1 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Sun, 29 Jun 2025 15:21:51 +0700 Subject: [PATCH 07/40] Return nullptr if vertexCount overflow --- src/nbl/asset/utils/CGeometryCreator.cpp | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/nbl/asset/utils/CGeometryCreator.cpp b/src/nbl/asset/utils/CGeometryCreator.cpp index 0a5dd5920e..fcd31041c4 100644 --- a/src/nbl/asset/utils/CGeometryCreator.cpp +++ b/src/nbl/asset/utils/CGeometryCreator.cpp @@ -449,8 +449,11 @@ core::smart_refctd_ptr CGeometryCreator::createCylinder( CQuantNormalCache* const quantNormalCache = quantNormalCacheOverride == nullptr ? m_params.normalCache.get() : quantNormalCacheOverride; - const uint16_t halfIx = static_cast(tesselation); - const uint16_t vertexCount = 2 * static_cast(tesselation); + const auto halfIx = static_cast(tesselation); + const uint32_t u32_vertexCount = 2 * tesselation; + if (u32_vertexCount > std::numeric_limits::max()) + return nullptr; + const auto vertexCount = static_cast(u32_vertexCount); auto retval = core::make_smart_refctd_ptr(); retval->setIndexing(IPolygonGeometryBase::TriangleList()); @@ -571,11 +574,11 @@ core::smart_refctd_ptr CGeometryCreator::createCylinder( positions[i] = { p.x, p.y, p.z }; memcpy(normals + i, &n, sizeof(n)); - uvs[i] = { f_i * tesselationRec, 0.0 }; + uvs[i] = { packSnorm(f_i * tesselationRec), packSnorm(0.0) }; positions[i + halfIx] = { p.x, p.y, length }; normals[i + halfIx] = normals[i]; - uvs[i + halfIx] = { 1.0f, 0.0f }; + uvs[i + halfIx] = { packSnorm(1.0f), packSnorm(0.0f) }; } CPolygonGeometryManipulator::recomputeContentHashes(retval.get()); @@ -591,7 +594,10 @@ core::smart_refctd_ptr CGeometryCreator::createCone( CQuantNormalCache* const quantNormalCache = quantNormalCacheOverride == nullptr ? m_params.normalCache.get() : quantNormalCacheOverride; - const uint16_t vertexCount = 2 * static_cast(tesselation); + const uint32_t u32_vertexCount = 2 * tesselation; + if (u32_vertexCount > std::numeric_limits::max()) + return nullptr; + const auto vertexCount = static_cast(u32_vertexCount); auto retval = core::make_smart_refctd_ptr(); retval->setIndexing(IPolygonGeometryBase::TriangleList()); From a2b7b04a9540fb2ca38620c24744174881062d35 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 1 Jul 2025 18:09:08 +0700 Subject: [PATCH 08/40] Remove simd vector from normal quantization cache --- include/nbl/asset/utils/CDirQuantCacheBase.h | 111 ++++++++++--------- include/nbl/asset/utils/CQuantNormalCache.h | 5 +- 2 files changed, 58 insertions(+), 58 deletions(-) diff --git a/include/nbl/asset/utils/CDirQuantCacheBase.h b/include/nbl/asset/utils/CDirQuantCacheBase.h index 462d414a73..a0e656d50e 100644 --- a/include/nbl/asset/utils/CDirQuantCacheBase.h +++ b/include/nbl/asset/utils/CDirQuantCacheBase.h @@ -43,13 +43,13 @@ class CDirQuantCacheBase Vector8u3() : x(0u),y(0u),z(0u) {} Vector8u3(const Vector8u3&) = default; - explicit Vector8u3(const core::vectorSIMDu32& val) + explicit Vector8u3(const hlsl::float32_t3& val) { operator=(val); } Vector8u3& operator=(const Vector8u3&) = default; - Vector8u3& operator=(const core::vectorSIMDu32& val) + Vector8u3& operator=(const hlsl::float32_t3& val) { x = val.x; y = val.y; @@ -57,11 +57,12 @@ class CDirQuantCacheBase return *this; } - inline core::vectorSIMDu32 getValue() const + hlsl::float32_t3 getValue() const { - return core::vectorSIMDu32(x,y,z); + return { x, y, z }; } + private: uint8_t x; uint8_t y; @@ -74,24 +75,24 @@ class CDirQuantCacheBase Vector8u4() : x(0u),y(0u),z(0u),w(0u) {} Vector8u4(const Vector8u4&) = default; - explicit Vector8u4(const core::vectorSIMDu32& val) + explicit Vector8u4(const hlsl::float32_t3& val) { operator=(val); } Vector8u4& operator=(const Vector8u4&) = default; - Vector8u4& operator=(const core::vectorSIMDu32& val) + Vector8u4& operator=(const hlsl::float32_t3& val) { x = val.x; y = val.y; z = val.z; - w = val.w; + w = 0; return *this; } - inline core::vectorSIMDu32 getValue() const + hlsl::float32_t3 getValue() const { - return core::vectorSIMDu32(x,y,z,w); + return { x, y, z }; } private: @@ -108,16 +109,17 @@ class CDirQuantCacheBase Vector1010102() : storage(0u) {} Vector1010102(const Vector1010102&) = default; - explicit Vector1010102(const core::vectorSIMDu32& val) + explicit Vector1010102(const hlsl::float32_t3& val) { operator=(val); } Vector1010102& operator=(const Vector1010102&) = default; - Vector1010102& operator=(const core::vectorSIMDu32& val) + Vector1010102& operator=(const hlsl::float32_t3& val) { - constexpr auto storageBits = quantizationBits+1u; - storage = val.x|(val.y<>storageBits,storage>>(storageBits*2u))&mask; + constexpr auto storageBits = quantizationBits + 1u; + const auto mask = (0x1u << storageBits) - 1u; + return { storage & mask, (storage >> storageBits) & mask, (storage >> (storageBits * 2)) & mask}; } - + private: uint32_t storage; }; @@ -149,13 +151,13 @@ class CDirQuantCacheBase Vector16u3() : x(0u),y(0u),z(0u) {} Vector16u3(const Vector16u3&) = default; - explicit Vector16u3(const core::vectorSIMDu32& val) + explicit Vector16u3(const hlsl::float32_t3& val) { operator=(val); } Vector16u3& operator=(const Vector16u3&) = default; - Vector16u3& operator=(const core::vectorSIMDu32& val) + Vector16u3& operator=(const hlsl::float32_t3& val) { x = val.x; y = val.y; @@ -163,11 +165,11 @@ class CDirQuantCacheBase return *this; } - inline core::vectorSIMDu32 getValue() const + hlsl::float32_t3 getValue() const { - return core::vectorSIMDu32(x,y,z); + return { x, y, z }; } - + private: uint16_t x; uint16_t y; @@ -180,26 +182,26 @@ class CDirQuantCacheBase Vector16u4() : x(0u),y(0u),z(0u),w(0u) {} Vector16u4(const Vector16u4&) = default; - explicit Vector16u4(const core::vectorSIMDu32& val) + explicit Vector16u4(const hlsl::float32_t3& val) { operator=(val); } Vector16u4& operator=(const Vector16u4&) = default; - Vector16u4& operator=(const core::vectorSIMDu32& val) + Vector16u4& operator=(const hlsl::float32_t3& val) { x = val.x; y = val.y; z = val.z; - w = val.w; + w = 0; return *this; } - inline core::vectorSIMDu32 getValue() const + hlsl::float32_t3 getValue() const { - return core::vectorSIMDu32(x,y,z,w); + return { x, y, z }; } - + private: uint16_t x; uint16_t y; @@ -377,11 +379,11 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl:: std::tuple...> cache; template - value_type_t quantize(const core::vectorSIMDf& value) + value_type_t quantize(const hlsl::float32_t3& value) { - const auto negativeMask = value < core::vectorSIMDf(0.0f); + const auto negativeMask = lessThan(value, hlsl::float32_t3(0.0f)); - const core::vectorSIMDf absValue = abs(value); + const hlsl::float32_t3 absValue = abs(value); const auto key = Key(absValue); constexpr auto quantizationBits = quantization_bits_v; @@ -393,32 +395,31 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl:: quantized = found->second; else { - const core::vectorSIMDf fit = findBestFit(absValue); + const auto fit = findBestFit(absValue); - quantized = core::vectorSIMDu32(core::abs(fit)); + quantized = abs(fit); insertIntoCache(key,quantized); } } - const core::vectorSIMDu32 xorflag((0x1u<<(quantizationBits+1u))-1u); - auto restoredAsVec = quantized.getValue()^core::mix(core::vectorSIMDu32(0u),xorflag,negativeMask); - restoredAsVec += core::mix(core::vectorSIMDu32(0u),core::vectorSIMDu32(1u),negativeMask); - return value_type_t(restoredAsVec&xorflag); + //return quantized. + const auto negativeMulVec = hlsl::float32_t3(negativeMask.x ? -1 : 1, negativeMask.y ? -1 : 1, negativeMask.z ? -1 : 1); + return value_type_t(negativeMulVec * quantized.getValue()); } template - static inline core::vectorSIMDf findBestFit(const core::vectorSIMDf& value) + static inline hlsl::float32_t3 findBestFit(const hlsl::float32_t3& value) { static_assert(dimensions>1u,"No point"); static_assert(dimensions<=4u,"High Dimensions are Hard!"); - // precise normalize - const auto vectorForDots = value.preciseDivision(length(value)); + + const auto vectorForDots = hlsl::normalize(value); // - core::vectorSIMDf fittingVector; - core::vectorSIMDf floorOffset; + hlsl::float32_t3 fittingVector; + hlsl::float32_t3 floorOffset; constexpr uint32_t cornerCount = (0x1u<<(dimensions-1u))-1u; - core::vectorSIMDf corners[cornerCount] = {}; + hlsl::float32_t3 corners[cornerCount] = {}; { uint32_t maxDirCompIndex = 0u; for (auto i=1u; i void + auto evaluateFit = [&](const hlsl::float32_t3& newFit) -> void { - auto newFitLen = core::length(newFit); - const float dp = core::dot(newFit,vectorForDots).preciseDivision(newFitLen)[0]; + auto newFitLen = length(newFit); + const float dp = hlsl::dot(newFit,vectorForDots) / (newFitLen); if (dp > closestTo1) { closestTo1 = dp; @@ -466,18 +467,18 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl:: }; constexpr uint32_t cubeHalfSize = (0x1u << quantizationBits) - 1u; - const core::vectorSIMDf cubeHalfSizeND = core::vectorSIMDf(cubeHalfSize); + const hlsl::float32_t3 cubeHalfSizeND = hlsl::float32_t3(cubeHalfSize); for (uint32_t n=cubeHalfSize; n>0u; n--) { //we'd use float addition in the interest of speed, to increment the loop //but adding a small number to a large one loses precision, so multiplication preferrable - core::vectorSIMDf bottomFit = core::floor(fittingVector*float(n)+floorOffset); - if ((bottomFit<=cubeHalfSizeND).all()) + const auto bottomFit = floor(fittingVector * float(n) + floorOffset); + if (hlsl::all(glm::lessThanEqual(bottomFit, cubeHalfSizeND))) evaluateFit(bottomFit); - for (auto i=0u; i - value_type_t quantize(core::vectorSIMDf normal) + value_type_t quantize(hlsl::float32_t3 normal) { - normal.makeSafe3D(); return Base::quantize<3u,CacheFormat>(normal); } }; From 83f39d31c869f84049fb58e839c014f3a857c03e Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 1 Jul 2025 18:55:29 +0700 Subject: [PATCH 09/40] Fix SBuferRange to SBufferBinding conversion requirement --- include/nbl/asset/IBuffer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/nbl/asset/IBuffer.h b/include/nbl/asset/IBuffer.h index 8c3b8f95ef..6f8c1bb35b 100644 --- a/include/nbl/asset/IBuffer.h +++ b/include/nbl/asset/IBuffer.h @@ -100,7 +100,7 @@ struct SBufferRange inline operator SBufferRange&() {return *reinterpret_cast*>(this);} inline operator const SBufferRange&() const {return *reinterpret_cast*>(this);} - template requires std::is_same_v,BufferType> + template requires (std::is_const_v && std::is_base_of_v>) inline operator SBufferBinding() const { return {.offset=offset,.buffer=buffer}; } explicit inline operator bool() const {return isValid();} From 983ace987435d1a9349d282c64600b93cb8eef8e Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 1 Jul 2025 21:25:46 +0700 Subject: [PATCH 10/40] createArrow multiple geometries --- include/nbl/asset/utils/CGeometryCreator.h | 2 +- src/nbl/asset/utils/CGeometryCreator.cpp | 186 +++------------------ 2 files changed, 23 insertions(+), 165 deletions(-) diff --git a/include/nbl/asset/utils/CGeometryCreator.h b/include/nbl/asset/utils/CGeometryCreator.h index ca6fff1790..1852b0f033 100644 --- a/include/nbl/asset/utils/CGeometryCreator.h +++ b/include/nbl/asset/utils/CGeometryCreator.h @@ -58,7 +58,7 @@ class NBL_API2 CGeometryCreator final : public core::IReferenceCounted \param colorCone color of the cone \return Generated mesh. */ - core::smart_refctd_ptr createArrow(const uint32_t tesselationCylinder = 4, + core::vector> createArrow(const uint32_t tesselationCylinder = 4, const uint32_t tesselationCone = 8, const float height = 1.f, const float cylinderHeight = 0.6f, const float widthCylinder = 0.05f, const float widthCone = 0.3f) const; diff --git a/src/nbl/asset/utils/CGeometryCreator.cpp b/src/nbl/asset/utils/CGeometryCreator.cpp index fcd31041c4..31ddbac88c 100644 --- a/src/nbl/asset/utils/CGeometryCreator.cpp +++ b/src/nbl/asset/utils/CGeometryCreator.cpp @@ -389,8 +389,7 @@ core::smart_refctd_ptr CGeometryCreator::createSphere(float static_cast(cos(ay)), static_cast(sin(axz) * sinay)); // for spheres the normal is the position - core::vectorSIMDf normal(&pos.x); - normal.makeSafe3D(); + const auto normal = pos; const auto quantizedNormal = quantNormalCache->quantize(normal); pos *= radius; @@ -399,9 +398,9 @@ core::smart_refctd_ptr CGeometryCreator::createSphere(float float tu = 0.5f; //if (y==0) //{ - if (normal.Y != -1.0f && normal.Y != 1.0f) - tu = static_cast(acos(core::clamp(normal.X / sinay, -1.0, 1.0)) * 0.5 * core::RECIPROCAL_PI()); - if (normal.Z < 0.0f) + if (normal.y != -1.0f && normal.y != 1.0f) + tu = static_cast(acos(core::clamp(normal.x / sinay, -1.0, 1.0)) * 0.5 * core::RECIPROCAL_PI()); + if (normal.z < 0.0f) tu = 1 - tu; //} //else @@ -426,14 +425,14 @@ core::smart_refctd_ptr CGeometryCreator::createSphere(float // the vertex at the top of the sphere positions[vertex_i] = { 0.f, radius, 0.f }; uvs[vertex_i] = { 0, 63}; - const auto quantizedTopNormal = quantNormalCache->quantize(core::vectorSIMDf(0.f, 1.f, 0.f)); + const auto quantizedTopNormal = quantNormalCache->quantize(hlsl::float32_t3(0.f, 1.f, 0.f)); memcpy(normals + vertex_i, &quantizedTopNormal, sizeof(quantizedTopNormal)); // the vertex at the bottom of the sphere vertex_i++; positions[vertex_i] = { 0.f, -radius, 0.f }; uvs[vertex_i] = { 63, 127}; - const auto quantizedBottomNormal = quantNormalCache->quantize(core::vectorSIMDf(0.f, -1.f, 0.f)); + const auto quantizedBottomNormal = quantNormalCache->quantize(hlsl::float32_t3(0.f, -1.f, 0.f)); memcpy(normals + vertex_i, &quantizedBottomNormal, sizeof(quantizedBottomNormal)); } @@ -568,9 +567,9 @@ core::smart_refctd_ptr CGeometryCreator::createCylinder( for (uint32_t i = 0u; i < tesselation; ++i) { const auto f_i = static_cast(i); - core::vectorSIMDf p(std::cos(f_i * step), std::sin(f_i * step), 0.f); + hlsl::float32_t3 p(std::cos(f_i * step), std::sin(f_i * step), 0.f); p *= radius; - const auto n = quantNormalCache->quantize(core::normalize(p)); + const auto n = quantNormalCache->quantize(hlsl::normalize(p)); positions[i] = { p.x, p.y, p.z }; memcpy(normals + i, &n, sizeof(n)); @@ -686,38 +685,38 @@ core::smart_refctd_ptr CGeometryCreator::createCone( const float step = (2.f*core::PI()) / tesselation; - const core::vectorSIMDf apexVertexCoords(oblique, length, 0.0f); + const hlsl::float32_t3 apexVertexCoords(oblique, length, 0.0f); const auto apexVertexBase_i = tesselation; for (uint32_t i = 0u; i < tesselation; i++) { - core::vectorSIMDf v(std::cos(i * step), 0.0f, std::sin(i * step), 0.0f); + hlsl::float32_t3 v(std::cos(i * step), 0.0f, std::sin(i * step)); v *= radius; positions[i] = { v.x, v.y, v.z }; positions[apexVertexBase_i + i] = { apexVertexCoords.x, apexVertexCoords.y, apexVertexCoords.z }; - const auto simdPosition = core::vectorSIMDf(positions[i].x, positions[i].y, positions[i].z); - const core::vectorSIMDf v0ToApex = apexVertexCoords - simdPosition; + const auto simdPosition = hlsl::float32_t3(positions[i].x, positions[i].y, positions[i].z); + const hlsl::float32_t3 v0ToApex = apexVertexCoords - simdPosition; uint32_t nextVertexIndex = i == (tesselation - 1) ? 0 : i + 1; - core::vectorSIMDf u1 = core::vectorSIMDf(positions[nextVertexIndex].x, positions[nextVertexIndex].y, positions[nextVertexIndex].z); + hlsl::float32_t3 u1 = hlsl::float32_t3(positions[nextVertexIndex].x, positions[nextVertexIndex].y, positions[nextVertexIndex].z); u1 -= simdPosition; - float angleWeight = std::acos(core::dot(core::normalize(apexVertexCoords), core::normalize(u1)).x); - u1 = core::normalize(core::cross(v0ToApex, u1)) * angleWeight; + float angleWeight = std::acos(hlsl::dot(hlsl::normalize(apexVertexCoords), hlsl::normalize(u1))); + u1 = hlsl::normalize(hlsl::cross(v0ToApex, u1)) * angleWeight; uint32_t prevVertexIndex = i == 0 ? (tesselation - 1) : i - 1; - core::vectorSIMDf u2 = core::vectorSIMDf(positions[prevVertexIndex].x, positions[prevVertexIndex].y, positions[prevVertexIndex].z); + hlsl::float32_t3 u2 = hlsl::float32_t3(positions[prevVertexIndex].x, positions[prevVertexIndex].y, positions[prevVertexIndex].z); u2 -= simdPosition; - angleWeight = std::acos(core::dot(core::normalize(apexVertexCoords), core::normalize(u2)).x); - u2 = core::normalize(core::cross(u2, v0ToApex)) * angleWeight; + angleWeight = std::acos(hlsl::dot(hlsl::normalize(apexVertexCoords), hlsl::normalize(u2))); + u2 = hlsl::normalize(hlsl::cross(u2, v0ToApex)) * angleWeight; - const auto baseNormal = quantNormalCache->quantize(core::normalize(u1 + u2)); + const auto baseNormal = quantNormalCache->quantize(hlsl::normalize(u1 + u2)); memcpy(normals + i, &baseNormal, sizeof(baseNormal)); - const auto apexNormal = quantNormalCache->quantize(core::normalize(u1)); + const auto apexNormal = quantNormalCache->quantize(hlsl::normalize(u1)); memcpy(normals + apexVertexBase_i + i, &apexNormal, sizeof(apexNormal)); } @@ -725,7 +724,7 @@ core::smart_refctd_ptr CGeometryCreator::createCone( return retval; } -core::smart_refctd_ptr CGeometryCreator::createArrow( +core::vector> CGeometryCreator::createArrow( const uint32_t tesselationCylinder, const uint32_t tesselationCone, const float height, @@ -737,137 +736,13 @@ core::smart_refctd_ptr CGeometryCreator::createArrow( assert(height > cylinderHeight); using position_t = hlsl::float32_t3; - using normal_t = hlsl::vector; - using uv_t = hlsl::vector; auto cylinder = createCylinder(width0, cylinderHeight, tesselationCylinder); auto cone = createCone(width1, height-cylinderHeight, tesselationCone); - auto cylinderPositions = reinterpret_cast(cylinder->getPositionView().src.buffer->getPointer()); auto conePositions = reinterpret_cast(cone->getPositionView().src.buffer->getPointer()); - const auto cylinderNormals = reinterpret_cast(cylinder->getNormalView().src.buffer->getPointer()); - const auto coneNormals = reinterpret_cast(cone->getNormalView().src.buffer->getPointer()); - - const auto cylinderUvs = reinterpret_cast(cylinder->getAuxAttributeViews()->front().src.buffer->getPointer()); - const auto coneUvs = reinterpret_cast(cone->getAuxAttributeViews()->front().src.buffer->getPointer()); - - const auto cylinderIndices = cylinder->getIndexView().src.buffer->getPointer(); - const auto coneIndices = cone->getIndexView().src.buffer->getPointer(); - - const auto cylinderVertexCount = cylinder->getPositionView().getElementCount(); const auto coneVertexCount = cone->getPositionView().getElementCount(); - const auto newArrowVertexCount = cylinderVertexCount + coneVertexCount; - - const auto cylinderIndexCount = cylinder->getVertexReferenceCount(); - const auto coneIndexCount = cone->getVertexReferenceCount(); - const auto newArrowIndexCount = cylinderIndexCount + coneIndexCount; - - using namespace hlsl; - - auto retval = core::make_smart_refctd_ptr(); - retval->setIndexing(IPolygonGeometryBase::TriangleList()); - - // Create indices - using index_t = uint16_t; - { - const auto bytesize = sizeof(index_t) * newArrowIndexCount; - auto indices = ICPUBuffer::create({bytesize,IBuffer::EUF_INDEX_BUFFER_BIT}); - auto arrowIndices = reinterpret_cast(indices->getPointer()); - auto newConeIndices = (arrowIndices + cylinderIndexCount); - - memcpy(arrowIndices, cylinderIndices, sizeof(uint16_t) * cylinderIndexCount); - memcpy(newConeIndices, coneIndices, sizeof(uint16_t) * coneIndexCount); - - for (auto i = 0ull; i < coneIndexCount; ++i) - *(newConeIndices + i) += cylinderVertexCount; - - shapes::AABB<4,index_t> aabb; - aabb.minVx[0] = 0; - aabb.maxVx[0] = newArrowVertexCount - 1; - retval->setIndexView({ - .composed = { - .encodedDataRange = {.u16=aabb}, - .stride = sizeof(index_t), - .format = EF_R16_UINT, - .rangeFormat = IGeometryBase::EAABBFormat::U16 - }, - .src = {.offset=0,.size=bytesize,.buffer=std::move(indices)} - }); - } - - constexpr auto NormalFormat = EF_R8G8B8A8_SNORM; - - // Create vertex attributes with NONE usage because we have no clue how they'll be used - hlsl::float32_t3* positions; - hlsl::vector* normals; - hlsl::vector* uvs; - { - { - constexpr auto AttrSize = sizeof(decltype(*positions)); - auto buff = ICPUBuffer::create({{AttrSize * newArrowVertexCount,IBuffer::EUF_NONE}}); - positions = reinterpret_cast(buff->getPointer()); - shapes::AABB<4, float32_t> aabb; - //TODO(kevyuu): Calculate arrow aabb - aabb.maxVx = hlsl::vector(127,127,127,0); - aabb.minVx = -aabb.maxVx; - retval->setPositionView({ - .composed = { - .encodedDataRange = {.f32 = aabb}, - .stride = AttrSize, - .format = EF_R32G32B32_SFLOAT, - .rangeFormat = IGeometryBase::EAABBFormat::F32 - }, - .src = { - .offset=0, - .size = buff->getSize(), - .buffer = std::move(buff), - } - }); - } - { - constexpr auto AttrSize = sizeof(decltype(*normals)); - auto buff = ICPUBuffer::create({{AttrSize * newArrowVertexCount,IBuffer::EUF_NONE}}); - normals = reinterpret_cast(buff->getPointer()); - shapes::AABB<4, int8_t> aabb; - aabb.maxVx = hlsl::vector(127,127,127,0); - aabb.minVx = -aabb.maxVx; - retval->setNormalView({ - .composed = { - .encodedDataRange = {.s8=aabb}, - .stride = AttrSize, - .format = NormalFormat, - .rangeFormat = IGeometryBase::EAABBFormat::S8_NORM - }, - .src = { - .offset = 0, - .size = buff->getSize(), - .buffer = std::move(buff) - } - }); - } - { - constexpr auto AttrSize = sizeof(decltype(*uvs)); - auto buff = ICPUBuffer::create({{AttrSize * newArrowVertexCount,IBuffer::EUF_NONE}}); - uvs = reinterpret_cast(buff->getPointer()); - shapes::AABB<4, uint8_t> aabb; - aabb.minVx = hlsl::vector(0,0,0,0); - aabb.maxVx = hlsl::vector(255,255,0,0); - retval->getAuxAttributeViews()->push_back({ - .composed = { - .encodedDataRange = {.u8=aabb}, - .stride = AttrSize, - .format = EF_R8G8_UNORM, - .rangeFormat = IGeometryBase::EAABBFormat::U8_NORM - }, - .src = { - .offset = 0, - .size = buff->getSize(), - .buffer = std::move(buff), - } - }); - } - } for (auto i = 0ull; i < coneVertexCount; ++i) { @@ -878,25 +753,8 @@ core::smart_refctd_ptr CGeometryCreator::createArrow( conePosition = {newPos.x, newPos.y, newPos.z}; } - for (auto z = 0ull; z < newArrowVertexCount; ++z) - { - if (z < cylinderVertexCount) - { - positions[z] = cylinderPositions[z]; - normals[z] = cylinderNormals[z]; - uvs[z] = cylinderUvs[z]; - } - else - { - const auto cone_i = z - cylinderVertexCount; - positions[z] = conePositions[cone_i]; - normals[z] = coneNormals[cone_i]; - uvs[z] = { 0, 0 }; - } - } + return {cylinder, cone}; - CPolygonGeometryManipulator::recomputeContentHashes(retval.get()); - return retval; } core::smart_refctd_ptr CGeometryCreator::createRectangle(const hlsl::float32_t2 size) const From 090dae2ac53cb2122fdf33cdc51962053a89ac39 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Wed, 2 Jul 2025 15:42:07 +0700 Subject: [PATCH 11/40] DRY findLSB --- include/nbl/asset/ECommonEnums.h | 287 +------------------------------ src/nbl/asset/ECommonEnums.cpp | 281 ++++++++++++++++++++++++++++++ 2 files changed, 285 insertions(+), 283 deletions(-) create mode 100644 src/nbl/asset/ECommonEnums.cpp diff --git a/include/nbl/asset/ECommonEnums.h b/include/nbl/asset/ECommonEnums.h index c07a0ced6a..f830b270d3 100644 --- a/include/nbl/asset/ECommonEnums.h +++ b/include/nbl/asset/ECommonEnums.h @@ -185,292 +185,13 @@ struct SMemoryBarrier } }; -inline core::bitflag allPreviousStages(core::bitflag stages) -{ - struct PerStagePreviousStages - { - public: - constexpr PerStagePreviousStages() - { - // set all stage to have itself as their previous stages - for (auto i = 0; i < std::numeric_limits::digits; i++) - data[i] = static_cast(i); - - add(PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT); - - add(PIPELINE_STAGE_FLAGS::RAY_TRACING_SHADER_BIT, PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT); - - // graphics primitive pipeline - PIPELINE_STAGE_FLAGS primitivePrevStage = PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT; - for (auto pipelineStage : {PIPELINE_STAGE_FLAGS::INDEX_INPUT_BIT, PIPELINE_STAGE_FLAGS::VERTEX_ATTRIBUTE_INPUT_BIT, PIPELINE_STAGE_FLAGS::VERTEX_SHADER_BIT, PIPELINE_STAGE_FLAGS::TESSELLATION_CONTROL_SHADER_BIT, PIPELINE_STAGE_FLAGS::TESSELLATION_EVALUATION_SHADER_BIT, PIPELINE_STAGE_FLAGS::GEOMETRY_SHADER_BIT, PIPELINE_STAGE_FLAGS::SHADING_RATE_ATTACHMENT_BIT, PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT, PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, PIPELINE_STAGE_FLAGS::LATE_FRAGMENT_TESTS_BIT, PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT}) - { - if (pipelineStage == PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT) - primitivePrevStage |= PIPELINE_STAGE_FLAGS::FRAGMENT_DENSITY_PROCESS_BIT; - add(pipelineStage, primitivePrevStage); - primitivePrevStage |= pipelineStage; - } - - - } - constexpr const auto& operator[](const size_t ix) const {return data[ix];} - - private: - constexpr static uint8_t findLSB(size_t val) - { - for (size_t ix=0ull; ix(stageFlag)); - data[bitIx] |= previousStageFlags; - } - - PIPELINE_STAGE_FLAGS data[std::numeric_limits>::digits] = {}; - }; - - constexpr PerStagePreviousStages bitToAccess = {}; - - core::bitflag retval = PIPELINE_STAGE_FLAGS::NONE; - while (bool(stages.value)) - { - const auto bitIx = hlsl::findLSB(stages); - retval |= bitToAccess[bitIx]; - stages ^= static_cast(0x1u< allLaterStages(core::bitflag stages) -{ - struct PerStageLaterStages - { - public: - constexpr PerStageLaterStages() - { - // set all stage to have itself as their next stages - for (auto i = 0; i < std::numeric_limits::digits; i++) - data[i] = static_cast(i); - - add(PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT, PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT); - add(PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT, PIPELINE_STAGE_FLAGS::RAY_TRACING_SHADER_BIT); - - // graphics primitive pipeline - PIPELINE_STAGE_FLAGS laterStage = PIPELINE_STAGE_FLAGS::NONE; - const auto graphicsPrimitivePipelineOrders = std::array{ PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT, PIPELINE_STAGE_FLAGS::INDEX_INPUT_BIT, PIPELINE_STAGE_FLAGS::VERTEX_ATTRIBUTE_INPUT_BIT, PIPELINE_STAGE_FLAGS::VERTEX_SHADER_BIT, PIPELINE_STAGE_FLAGS::TESSELLATION_CONTROL_SHADER_BIT, PIPELINE_STAGE_FLAGS::TESSELLATION_EVALUATION_SHADER_BIT, PIPELINE_STAGE_FLAGS::GEOMETRY_SHADER_BIT, PIPELINE_STAGE_FLAGS::SHADING_RATE_ATTACHMENT_BIT, PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT, PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, PIPELINE_STAGE_FLAGS::LATE_FRAGMENT_TESTS_BIT, PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT }; - for (auto iter = graphicsPrimitivePipelineOrders.rbegin(); iter < graphicsPrimitivePipelineOrders.rend(); iter++) - { - const auto pipelineStage = *iter; - add(pipelineStage, laterStage); - laterStage |= pipelineStage; - } - - add(PIPELINE_STAGE_FLAGS::FRAGMENT_DENSITY_PROCESS_BIT, PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT); - } - constexpr const auto& operator[](const size_t ix) const {return data[ix];} - - private: - constexpr static uint8_t findLSB(size_t val) - { - for (size_t ix=0ull; ix(stageFlag)); - data[bitIx] |= laterStageFlags; - } - - PIPELINE_STAGE_FLAGS data[std::numeric_limits>::digits] = {}; - }; - - constexpr PerStageLaterStages bitToAccess = {}; - - core::bitflag retval = PIPELINE_STAGE_FLAGS::NONE; - while (bool(stages.value)) - { - const auto bitIx = hlsl::findLSB(stages); - retval |= bitToAccess[bitIx]; - stages ^= static_cast(0x1u< allAccessesFromStages(core::bitflag stages) -{ - struct PerStageAccesses - { - public: - constexpr PerStageAccesses() - { - init(PIPELINE_STAGE_FLAGS::HOST_BIT,ACCESS_FLAGS::HOST_READ_BIT|ACCESS_FLAGS::HOST_WRITE_BIT); - - constexpr auto TransferRW = ACCESS_FLAGS::TRANSFER_READ_BIT|ACCESS_FLAGS::TRANSFER_WRITE_BIT; - init(PIPELINE_STAGE_FLAGS::COPY_BIT,TransferRW); - init(PIPELINE_STAGE_FLAGS::CLEAR_BIT,ACCESS_FLAGS::TRANSFER_WRITE_BIT); - - constexpr auto MicromapRead = ACCESS_FLAGS::SHADER_READ_BITS;//|ACCESS_FLAGS::MICROMAP_READ_BIT; -// init(PIPELINE_STAGE_FLAGS::MICROMAP_BUILD_BIT,MicromapRead|ACCESS_FLAGS::MICROMAP_WRITE_BIT); // can micromaps be built indirectly? - - constexpr auto AccelerationStructureRW = ACCESS_FLAGS::ACCELERATION_STRUCTURE_READ_BIT|ACCESS_FLAGS::ACCELERATION_STRUCTURE_WRITE_BIT; - init(PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_COPY_BIT,TransferRW|AccelerationStructureRW); - init(PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_BUILD_BIT,ACCESS_FLAGS::INDIRECT_COMMAND_READ_BIT|MicromapRead|AccelerationStructureRW); - - init(PIPELINE_STAGE_FLAGS::COMMAND_PREPROCESS_BIT,ACCESS_FLAGS::COMMAND_PREPROCESS_READ_BIT|ACCESS_FLAGS::COMMAND_PREPROCESS_WRITE_BIT); - init(PIPELINE_STAGE_FLAGS::CONDITIONAL_RENDERING_BIT,ACCESS_FLAGS::CONDITIONAL_RENDERING_READ_BIT); - init(PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT,ACCESS_FLAGS::INDIRECT_COMMAND_READ_BIT); - - constexpr auto ShaderRW = ACCESS_FLAGS::SHADER_READ_BITS|ACCESS_FLAGS::SHADER_WRITE_BITS; - constexpr auto AllShaderStagesRW = ShaderRW^(ACCESS_FLAGS::INPUT_ATTACHMENT_READ_BIT|ACCESS_FLAGS::SHADER_BINDING_TABLE_READ_BIT); - init(PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT,AllShaderStagesRW); - init(PIPELINE_STAGE_FLAGS::INDEX_INPUT_BIT,ACCESS_FLAGS::INDEX_READ_BIT); - init(PIPELINE_STAGE_FLAGS::VERTEX_ATTRIBUTE_INPUT_BIT,ACCESS_FLAGS::VERTEX_ATTRIBUTE_READ_BIT); - init(PIPELINE_STAGE_FLAGS::VERTEX_SHADER_BIT,AllShaderStagesRW); - init(PIPELINE_STAGE_FLAGS::TESSELLATION_CONTROL_SHADER_BIT,AllShaderStagesRW); - init(PIPELINE_STAGE_FLAGS::TESSELLATION_EVALUATION_SHADER_BIT,AllShaderStagesRW); - init(PIPELINE_STAGE_FLAGS::GEOMETRY_SHADER_BIT,AllShaderStagesRW); -// init(PIPELINE_STAGE_FLAGS::TASK_SHADER_BIT,AllShaderStagesRW); -// init(PIPELINE_STAGE_FLAGS::MESH_SHADER_BIT,AllShaderStagesRW); - init(PIPELINE_STAGE_FLAGS::FRAGMENT_DENSITY_PROCESS_BIT,ACCESS_FLAGS::FRAGMENT_DENSITY_MAP_READ_BIT); - init(PIPELINE_STAGE_FLAGS::SHADING_RATE_ATTACHMENT_BIT,ACCESS_FLAGS::SHADING_RATE_ATTACHMENT_READ_BIT); - constexpr auto DepthStencilRW = ACCESS_FLAGS::DEPTH_STENCIL_ATTACHMENT_READ_BIT|ACCESS_FLAGS::DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; - init(PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT,DepthStencilRW); - init(PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT,AllShaderStagesRW|ACCESS_FLAGS::INPUT_ATTACHMENT_READ_BIT); - init(PIPELINE_STAGE_FLAGS::LATE_FRAGMENT_TESTS_BIT,DepthStencilRW); - init(PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT,ACCESS_FLAGS::COLOR_ATTACHMENT_READ_BIT|ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT); - - init(PIPELINE_STAGE_FLAGS::RAY_TRACING_SHADER_BIT,AllShaderStagesRW|ACCESS_FLAGS::SHADER_BINDING_TABLE_READ_BIT); - - init(PIPELINE_STAGE_FLAGS::RESOLVE_BIT,TransferRW); - init(PIPELINE_STAGE_FLAGS::BLIT_BIT,TransferRW); - -// init(PIPELINE_STAGE_FLAGS::VIDEO_DECODE_BIT,ACCESS_FLAGS::VIDEO_DECODE_READ_BIT|ACCESS_FLAGS::VIDEO_DECODE_WRITE_BIT); -// init(PIPELINE_STAGE_FLAGS::VIDEO_ENCODE_BIT,ACCESS_FLAGS::VIDEO_ENCODE_READ_BIT|ACCESS_FLAGS::VIDEO_ENCODE_WRITE_BIT); -// init(PIPELINE_STAGE_FLAGS::OPTICAL_FLOW_BIT,ACCESS_FLAGS::OPTICAL_FLOW_READ_BIT|ACCESS_FLAGS::OPTICAL_FLOW_WRITE_BIT); - } - constexpr const auto& operator[](const size_t ix) const {return data[ix];} +core::bitflag allPreviousStages(core::bitflag stages); - private: - constexpr static uint8_t findLSB(size_t val) - { - for (size_t ix=0ull; ix(stageFlag)); - data[bitIx] = accessFlags; - } +core::bitflag allLaterStages(core::bitflag stages); - ACCESS_FLAGS data[32] = {}; - }; - constexpr PerStageAccesses bitToAccess = {}; +core::bitflag allAccessesFromStages(core::bitflag stages); - // TODO: add logically later or previous stages to make sure all other accesses remain valid - // or ideally expand the stages before calling `allAccessesFromStages` (TODO: add a `allLaterStages` and `allPreviouStages` basically) - - core::bitflag retval = ACCESS_FLAGS::NONE; - while (bool(stages.value)) - { - const auto bitIx = hlsl::findLSB(stages); - retval |= bitToAccess[bitIx]; - stages ^= static_cast(0x1u< allStagesFromAccesses(core::bitflag accesses) -{ - struct PerAccessStages - { - public: - constexpr PerAccessStages() - { - init(ACCESS_FLAGS::HOST_READ_BIT,PIPELINE_STAGE_FLAGS::HOST_BIT); - init(ACCESS_FLAGS::HOST_WRITE_BIT,PIPELINE_STAGE_FLAGS::HOST_BIT); - - init(ACCESS_FLAGS::TRANSFER_READ_BIT,PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS^PIPELINE_STAGE_FLAGS::CLEAR_BIT); - init(ACCESS_FLAGS::TRANSFER_WRITE_BIT,PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS); - - constexpr auto MicromapAccelerationStructureBuilds = PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_BUILD_BIT;//|PIPELINE_STAGE_FLAGS::MICROMAP_BUILD_BIT; -// init(ACCESS_FLAGS::MICROMAP_READ_BIT,MicromapAccelerationStructureBuilds); -// init(ACCESS_FLAGS::MICROMAP_WRITE_BIT,PIPELINE_STAGE_FLAGS::MICROMAP_BUILD_BIT); - - constexpr auto AllShaders = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT|PIPELINE_STAGE_FLAGS::PRE_RASTERIZATION_SHADERS_BITS|PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT|PIPELINE_STAGE_FLAGS::RAY_TRACING_SHADER_BIT; - constexpr auto AccelerationStructureOperations = PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_COPY_BIT|PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_BUILD_BIT; - init(ACCESS_FLAGS::ACCELERATION_STRUCTURE_READ_BIT,AccelerationStructureOperations|AllShaders); - init(ACCESS_FLAGS::ACCELERATION_STRUCTURE_WRITE_BIT,AccelerationStructureOperations); - - init(ACCESS_FLAGS::COMMAND_PREPROCESS_READ_BIT,PIPELINE_STAGE_FLAGS::COMMAND_PREPROCESS_BIT); - init(ACCESS_FLAGS::COMMAND_PREPROCESS_WRITE_BIT,PIPELINE_STAGE_FLAGS::COMMAND_PREPROCESS_BIT); - init(ACCESS_FLAGS::CONDITIONAL_RENDERING_READ_BIT,PIPELINE_STAGE_FLAGS::CONDITIONAL_RENDERING_BIT); - init(ACCESS_FLAGS::INDIRECT_COMMAND_READ_BIT,PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_BUILD_BIT|PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT); - - init(ACCESS_FLAGS::UNIFORM_READ_BIT,AllShaders); - init(ACCESS_FLAGS::SAMPLED_READ_BIT,AllShaders);//|PIPELINE_STAGE_FLAGS::MICROMAP_BUILD_BIT); - init(ACCESS_FLAGS::STORAGE_READ_BIT,AllShaders|MicromapAccelerationStructureBuilds); - init(ACCESS_FLAGS::STORAGE_WRITE_BIT,AllShaders); - - init(ACCESS_FLAGS::INDEX_READ_BIT,PIPELINE_STAGE_FLAGS::INDEX_INPUT_BIT); - init(ACCESS_FLAGS::VERTEX_ATTRIBUTE_READ_BIT,PIPELINE_STAGE_FLAGS::VERTEX_ATTRIBUTE_INPUT_BIT); - - init(ACCESS_FLAGS::FRAGMENT_DENSITY_MAP_READ_BIT,PIPELINE_STAGE_FLAGS::FRAGMENT_DENSITY_PROCESS_BIT); - init(ACCESS_FLAGS::SHADING_RATE_ATTACHMENT_READ_BIT,PIPELINE_STAGE_FLAGS::SHADING_RATE_ATTACHMENT_BIT); - constexpr auto FragmentTests = PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT|PIPELINE_STAGE_FLAGS::LATE_FRAGMENT_TESTS_BIT; - init(ACCESS_FLAGS::DEPTH_STENCIL_ATTACHMENT_READ_BIT,FragmentTests); - init(ACCESS_FLAGS::DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,FragmentTests); - init(ACCESS_FLAGS::INPUT_ATTACHMENT_READ_BIT,PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT); - init(ACCESS_FLAGS::COLOR_ATTACHMENT_READ_BIT,PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT); - init(ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT,PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT); - - init(ACCESS_FLAGS::SHADER_BINDING_TABLE_READ_BIT,PIPELINE_STAGE_FLAGS::RAY_TRACING_SHADER_BIT); - -// init(ACCESS_FLAGS::VIDEO_DECODE_READ_BIT,PIPELINE_STAGE_FLAGS::VIDEO_DECODE_BIT); -// init(ACCESS_FLAGS::VIDEO_DECODE_WRITE_BIT,PIPELINE_STAGE_FLAGS::VIDEO_DECODE_BIT); -// init(ACCESS_FLAGS::VIDEO_ENCODE_READ_BIT,PIPELINE_STAGE_FLAGS::VIDEO_ENCODE_BIT); -// init(ACCESS_FLAGS::VIDEO_ENCODE_WRITE_BIT,PIPELINE_STAGE_FLAGS::VIDEO_ENCODE_BIT); -// init(ACCESS_FLAGS::OPTICAL_FLOW_READ_BIT,PIPELINE_STAGE_FLAGS::OPTICAL_FLOW_BIT); -// init(ACCESS_FLAGS::OPTICAL_FLOW_WRITE_BIT,PIPELINE_STAGE_FLAGS::OPTICAL_FLOW_BIT); - } - constexpr const auto& operator[](const size_t ix) const {return data[ix];} - - private: - constexpr static uint8_t findLSB(size_t val) - { - for (size_t ix=0ull; ix(accessFlags)); - data[bitIx] = stageFlags; - } - - PIPELINE_STAGE_FLAGS data[32] = {}; - }; - constexpr PerAccessStages bitToStage = {}; - - core::bitflag retval = PIPELINE_STAGE_FLAGS::NONE; - while (bool(accesses.value)) - { - const auto bitIx = hlsl::findLSB(accesses); - retval |= bitToStage[bitIx]; - accesses ^= static_cast(0x1u< allStagesFromAccesses(core::bitflag accesses); } diff --git a/src/nbl/asset/ECommonEnums.cpp b/src/nbl/asset/ECommonEnums.cpp new file mode 100644 index 0000000000..0f23b9b3fc --- /dev/null +++ b/src/nbl/asset/ECommonEnums.cpp @@ -0,0 +1,281 @@ +#include "nbl/asset/ECommonEnums.h" + +namespace nbl::asset +{ + +constexpr static int32_t findLSB(size_t val) +{ + if constexpr(std::is_constant_evaluated()) + { + for (size_t ix=0ull; ix allPreviousStages(core::bitflag stages) +{ + struct PerStagePreviousStages + { + public: + constexpr PerStagePreviousStages() + { + // set all stage to have itself as their previous stages + for (auto i = 0; i < std::numeric_limits::digits; i++) + data[i] = static_cast(i); + + add(PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT); + + add(PIPELINE_STAGE_FLAGS::RAY_TRACING_SHADER_BIT, PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT); + + // graphics primitive pipeline + PIPELINE_STAGE_FLAGS primitivePrevStage = PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT; + for (auto pipelineStage : {PIPELINE_STAGE_FLAGS::INDEX_INPUT_BIT, PIPELINE_STAGE_FLAGS::VERTEX_ATTRIBUTE_INPUT_BIT, PIPELINE_STAGE_FLAGS::VERTEX_SHADER_BIT, PIPELINE_STAGE_FLAGS::TESSELLATION_CONTROL_SHADER_BIT, PIPELINE_STAGE_FLAGS::TESSELLATION_EVALUATION_SHADER_BIT, PIPELINE_STAGE_FLAGS::GEOMETRY_SHADER_BIT, PIPELINE_STAGE_FLAGS::SHADING_RATE_ATTACHMENT_BIT, PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT, PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, PIPELINE_STAGE_FLAGS::LATE_FRAGMENT_TESTS_BIT, PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT}) + { + if (pipelineStage == PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT) + primitivePrevStage |= PIPELINE_STAGE_FLAGS::FRAGMENT_DENSITY_PROCESS_BIT; + add(pipelineStage, primitivePrevStage); + primitivePrevStage |= pipelineStage; + } + + + } + constexpr const auto& operator[](const size_t ix) const {return data[ix];} + + private: + + constexpr void add(PIPELINE_STAGE_FLAGS stageFlag, PIPELINE_STAGE_FLAGS previousStageFlags) + { + const auto bitIx = findLSB(static_cast(stageFlag)); + data[bitIx] |= previousStageFlags; + } + + PIPELINE_STAGE_FLAGS data[std::numeric_limits>::digits] = {}; + }; + + constexpr PerStagePreviousStages bitToAccess = {}; + + core::bitflag retval = PIPELINE_STAGE_FLAGS::NONE; + while (bool(stages.value)) + { + const auto bitIx = findLSB(static_cast(stages.value)); + retval |= bitToAccess[bitIx]; + stages ^= static_cast(0x1u< allLaterStages(core::bitflag stages) +{ + struct PerStageLaterStages + { + public: + constexpr PerStageLaterStages() + { + // set all stage to have itself as their next stages + for (auto i = 0; i < std::numeric_limits::digits; i++) + data[i] = static_cast(i); + + add(PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT, PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT); + add(PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT, PIPELINE_STAGE_FLAGS::RAY_TRACING_SHADER_BIT); + + // graphics primitive pipeline + PIPELINE_STAGE_FLAGS laterStage = PIPELINE_STAGE_FLAGS::NONE; + const auto graphicsPrimitivePipelineOrders = std::array{ PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT, PIPELINE_STAGE_FLAGS::INDEX_INPUT_BIT, PIPELINE_STAGE_FLAGS::VERTEX_ATTRIBUTE_INPUT_BIT, PIPELINE_STAGE_FLAGS::VERTEX_SHADER_BIT, PIPELINE_STAGE_FLAGS::TESSELLATION_CONTROL_SHADER_BIT, PIPELINE_STAGE_FLAGS::TESSELLATION_EVALUATION_SHADER_BIT, PIPELINE_STAGE_FLAGS::GEOMETRY_SHADER_BIT, PIPELINE_STAGE_FLAGS::SHADING_RATE_ATTACHMENT_BIT, PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT, PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, PIPELINE_STAGE_FLAGS::LATE_FRAGMENT_TESTS_BIT, PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT }; + for (auto iter = graphicsPrimitivePipelineOrders.rbegin(); iter < graphicsPrimitivePipelineOrders.rend(); iter++) + { + const auto pipelineStage = *iter; + add(pipelineStage, laterStage); + laterStage |= pipelineStage; + } + + add(PIPELINE_STAGE_FLAGS::FRAGMENT_DENSITY_PROCESS_BIT, PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT); + } + constexpr const auto& operator[](const size_t ix) const {return data[ix];} + + private: + + constexpr void add(PIPELINE_STAGE_FLAGS stageFlag, PIPELINE_STAGE_FLAGS laterStageFlags) + { + const auto bitIx = findLSB(static_cast(stageFlag)); + data[bitIx] |= laterStageFlags; + } + + PIPELINE_STAGE_FLAGS data[std::numeric_limits>::digits] = {}; + }; + + constexpr PerStageLaterStages bitToAccess = {}; + + core::bitflag retval = PIPELINE_STAGE_FLAGS::NONE; + while (bool(stages.value)) + { + const auto bitIx = findLSB(static_cast(stages.value)); + retval |= bitToAccess[bitIx]; + stages ^= static_cast(0x1u< allAccessesFromStages(core::bitflag stages) +{ + struct PerStageAccesses + { + public: + constexpr PerStageAccesses() + { + init(PIPELINE_STAGE_FLAGS::HOST_BIT,ACCESS_FLAGS::HOST_READ_BIT|ACCESS_FLAGS::HOST_WRITE_BIT); + + constexpr auto TransferRW = ACCESS_FLAGS::TRANSFER_READ_BIT|ACCESS_FLAGS::TRANSFER_WRITE_BIT; + init(PIPELINE_STAGE_FLAGS::COPY_BIT,TransferRW); + init(PIPELINE_STAGE_FLAGS::CLEAR_BIT,ACCESS_FLAGS::TRANSFER_WRITE_BIT); + + constexpr auto MicromapRead = ACCESS_FLAGS::SHADER_READ_BITS;//|ACCESS_FLAGS::MICROMAP_READ_BIT; +// init(PIPELINE_STAGE_FLAGS::MICROMAP_BUILD_BIT,MicromapRead|ACCESS_FLAGS::MICROMAP_WRITE_BIT); // can micromaps be built indirectly? + + constexpr auto AccelerationStructureRW = ACCESS_FLAGS::ACCELERATION_STRUCTURE_READ_BIT|ACCESS_FLAGS::ACCELERATION_STRUCTURE_WRITE_BIT; + init(PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_COPY_BIT,TransferRW|AccelerationStructureRW); + init(PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_BUILD_BIT,ACCESS_FLAGS::INDIRECT_COMMAND_READ_BIT|MicromapRead|AccelerationStructureRW); + + init(PIPELINE_STAGE_FLAGS::COMMAND_PREPROCESS_BIT,ACCESS_FLAGS::COMMAND_PREPROCESS_READ_BIT|ACCESS_FLAGS::COMMAND_PREPROCESS_WRITE_BIT); + init(PIPELINE_STAGE_FLAGS::CONDITIONAL_RENDERING_BIT,ACCESS_FLAGS::CONDITIONAL_RENDERING_READ_BIT); + init(PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT,ACCESS_FLAGS::INDIRECT_COMMAND_READ_BIT); + + constexpr auto ShaderRW = ACCESS_FLAGS::SHADER_READ_BITS|ACCESS_FLAGS::SHADER_WRITE_BITS; + constexpr auto AllShaderStagesRW = ShaderRW^(ACCESS_FLAGS::INPUT_ATTACHMENT_READ_BIT|ACCESS_FLAGS::SHADER_BINDING_TABLE_READ_BIT); + init(PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT,AllShaderStagesRW); + init(PIPELINE_STAGE_FLAGS::INDEX_INPUT_BIT,ACCESS_FLAGS::INDEX_READ_BIT); + init(PIPELINE_STAGE_FLAGS::VERTEX_ATTRIBUTE_INPUT_BIT,ACCESS_FLAGS::VERTEX_ATTRIBUTE_READ_BIT); + init(PIPELINE_STAGE_FLAGS::VERTEX_SHADER_BIT,AllShaderStagesRW); + init(PIPELINE_STAGE_FLAGS::TESSELLATION_CONTROL_SHADER_BIT,AllShaderStagesRW); + init(PIPELINE_STAGE_FLAGS::TESSELLATION_EVALUATION_SHADER_BIT,AllShaderStagesRW); + init(PIPELINE_STAGE_FLAGS::GEOMETRY_SHADER_BIT,AllShaderStagesRW); +// init(PIPELINE_STAGE_FLAGS::TASK_SHADER_BIT,AllShaderStagesRW); +// init(PIPELINE_STAGE_FLAGS::MESH_SHADER_BIT,AllShaderStagesRW); + init(PIPELINE_STAGE_FLAGS::FRAGMENT_DENSITY_PROCESS_BIT,ACCESS_FLAGS::FRAGMENT_DENSITY_MAP_READ_BIT); + init(PIPELINE_STAGE_FLAGS::SHADING_RATE_ATTACHMENT_BIT,ACCESS_FLAGS::SHADING_RATE_ATTACHMENT_READ_BIT); + constexpr auto DepthStencilRW = ACCESS_FLAGS::DEPTH_STENCIL_ATTACHMENT_READ_BIT|ACCESS_FLAGS::DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + init(PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT,DepthStencilRW); + init(PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT,AllShaderStagesRW|ACCESS_FLAGS::INPUT_ATTACHMENT_READ_BIT); + init(PIPELINE_STAGE_FLAGS::LATE_FRAGMENT_TESTS_BIT,DepthStencilRW); + init(PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT,ACCESS_FLAGS::COLOR_ATTACHMENT_READ_BIT|ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT); + + init(PIPELINE_STAGE_FLAGS::RAY_TRACING_SHADER_BIT,AllShaderStagesRW|ACCESS_FLAGS::SHADER_BINDING_TABLE_READ_BIT); + + init(PIPELINE_STAGE_FLAGS::RESOLVE_BIT,TransferRW); + init(PIPELINE_STAGE_FLAGS::BLIT_BIT,TransferRW); + +// init(PIPELINE_STAGE_FLAGS::VIDEO_DECODE_BIT,ACCESS_FLAGS::VIDEO_DECODE_READ_BIT|ACCESS_FLAGS::VIDEO_DECODE_WRITE_BIT); +// init(PIPELINE_STAGE_FLAGS::VIDEO_ENCODE_BIT,ACCESS_FLAGS::VIDEO_ENCODE_READ_BIT|ACCESS_FLAGS::VIDEO_ENCODE_WRITE_BIT); +// init(PIPELINE_STAGE_FLAGS::OPTICAL_FLOW_BIT,ACCESS_FLAGS::OPTICAL_FLOW_READ_BIT|ACCESS_FLAGS::OPTICAL_FLOW_WRITE_BIT); + } + constexpr const auto& operator[](const size_t ix) const {return data[ix];} + + private: + + constexpr void init(PIPELINE_STAGE_FLAGS stageFlag, ACCESS_FLAGS accessFlags) + { + const auto bitIx = findLSB(static_cast(stageFlag)); + data[bitIx] = accessFlags; + } + + ACCESS_FLAGS data[32] = {}; + }; + constexpr PerStageAccesses bitToAccess = {}; + + // TODO: add logically later or previous stages to make sure all other accesses remain valid + // or ideally expand the stages before calling `allAccessesFromStages` (TODO: add a `allLaterStages` and `allPreviouStages` basically) + + core::bitflag retval = ACCESS_FLAGS::NONE; + while (bool(stages.value)) + { + const auto bitIx = findLSB(static_cast(stages.value)); + retval |= bitToAccess[bitIx]; + stages ^= static_cast(0x1u< allStagesFromAccesses(core::bitflag accesses) +{ + struct PerAccessStages + { + public: + constexpr PerAccessStages() + { + init(ACCESS_FLAGS::HOST_READ_BIT,PIPELINE_STAGE_FLAGS::HOST_BIT); + init(ACCESS_FLAGS::HOST_WRITE_BIT,PIPELINE_STAGE_FLAGS::HOST_BIT); + + init(ACCESS_FLAGS::TRANSFER_READ_BIT,PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS^PIPELINE_STAGE_FLAGS::CLEAR_BIT); + init(ACCESS_FLAGS::TRANSFER_WRITE_BIT,PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS); + + constexpr auto MicromapAccelerationStructureBuilds = PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_BUILD_BIT;//|PIPELINE_STAGE_FLAGS::MICROMAP_BUILD_BIT; +// init(ACCESS_FLAGS::MICROMAP_READ_BIT,MicromapAccelerationStructureBuilds); +// init(ACCESS_FLAGS::MICROMAP_WRITE_BIT,PIPELINE_STAGE_FLAGS::MICROMAP_BUILD_BIT); + + constexpr auto AllShaders = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT|PIPELINE_STAGE_FLAGS::PRE_RASTERIZATION_SHADERS_BITS|PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT|PIPELINE_STAGE_FLAGS::RAY_TRACING_SHADER_BIT; + constexpr auto AccelerationStructureOperations = PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_COPY_BIT|PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_BUILD_BIT; + init(ACCESS_FLAGS::ACCELERATION_STRUCTURE_READ_BIT,AccelerationStructureOperations|AllShaders); + init(ACCESS_FLAGS::ACCELERATION_STRUCTURE_WRITE_BIT,AccelerationStructureOperations); + + init(ACCESS_FLAGS::COMMAND_PREPROCESS_READ_BIT,PIPELINE_STAGE_FLAGS::COMMAND_PREPROCESS_BIT); + init(ACCESS_FLAGS::COMMAND_PREPROCESS_WRITE_BIT,PIPELINE_STAGE_FLAGS::COMMAND_PREPROCESS_BIT); + init(ACCESS_FLAGS::CONDITIONAL_RENDERING_READ_BIT,PIPELINE_STAGE_FLAGS::CONDITIONAL_RENDERING_BIT); + init(ACCESS_FLAGS::INDIRECT_COMMAND_READ_BIT,PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_BUILD_BIT|PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT); + + init(ACCESS_FLAGS::UNIFORM_READ_BIT,AllShaders); + init(ACCESS_FLAGS::SAMPLED_READ_BIT,AllShaders);//|PIPELINE_STAGE_FLAGS::MICROMAP_BUILD_BIT); + init(ACCESS_FLAGS::STORAGE_READ_BIT,AllShaders|MicromapAccelerationStructureBuilds); + init(ACCESS_FLAGS::STORAGE_WRITE_BIT,AllShaders); + + init(ACCESS_FLAGS::INDEX_READ_BIT,PIPELINE_STAGE_FLAGS::INDEX_INPUT_BIT); + init(ACCESS_FLAGS::VERTEX_ATTRIBUTE_READ_BIT,PIPELINE_STAGE_FLAGS::VERTEX_ATTRIBUTE_INPUT_BIT); + + init(ACCESS_FLAGS::FRAGMENT_DENSITY_MAP_READ_BIT,PIPELINE_STAGE_FLAGS::FRAGMENT_DENSITY_PROCESS_BIT); + init(ACCESS_FLAGS::SHADING_RATE_ATTACHMENT_READ_BIT,PIPELINE_STAGE_FLAGS::SHADING_RATE_ATTACHMENT_BIT); + constexpr auto FragmentTests = PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT|PIPELINE_STAGE_FLAGS::LATE_FRAGMENT_TESTS_BIT; + init(ACCESS_FLAGS::DEPTH_STENCIL_ATTACHMENT_READ_BIT,FragmentTests); + init(ACCESS_FLAGS::DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,FragmentTests); + init(ACCESS_FLAGS::INPUT_ATTACHMENT_READ_BIT,PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT); + init(ACCESS_FLAGS::COLOR_ATTACHMENT_READ_BIT,PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT); + init(ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT,PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT); + + init(ACCESS_FLAGS::SHADER_BINDING_TABLE_READ_BIT,PIPELINE_STAGE_FLAGS::RAY_TRACING_SHADER_BIT); + +// init(ACCESS_FLAGS::VIDEO_DECODE_READ_BIT,PIPELINE_STAGE_FLAGS::VIDEO_DECODE_BIT); +// init(ACCESS_FLAGS::VIDEO_DECODE_WRITE_BIT,PIPELINE_STAGE_FLAGS::VIDEO_DECODE_BIT); +// init(ACCESS_FLAGS::VIDEO_ENCODE_READ_BIT,PIPELINE_STAGE_FLAGS::VIDEO_ENCODE_BIT); +// init(ACCESS_FLAGS::VIDEO_ENCODE_WRITE_BIT,PIPELINE_STAGE_FLAGS::VIDEO_ENCODE_BIT); +// init(ACCESS_FLAGS::OPTICAL_FLOW_READ_BIT,PIPELINE_STAGE_FLAGS::OPTICAL_FLOW_BIT); +// init(ACCESS_FLAGS::OPTICAL_FLOW_WRITE_BIT,PIPELINE_STAGE_FLAGS::OPTICAL_FLOW_BIT); + } + constexpr const auto& operator[](const size_t ix) const {return data[ix];} + + private: + constexpr void init(ACCESS_FLAGS accessFlags, PIPELINE_STAGE_FLAGS stageFlags) + { + const auto bitIx = findLSB(static_cast(accessFlags)); + data[bitIx] = stageFlags; + } + + PIPELINE_STAGE_FLAGS data[32] = {}; + }; + constexpr PerAccessStages bitToStage = {}; + + core::bitflag retval = PIPELINE_STAGE_FLAGS::NONE; + while (bool(accesses.value)) + { + const auto bitIx = findLSB(static_cast(accesses.value)); + retval |= bitToStage[bitIx]; + accesses ^= static_cast(0x1u< Date: Thu, 3 Jul 2025 21:03:10 +0700 Subject: [PATCH 12/40] Add missing ECommonEnums.cpp to CMakelists.txt --- src/nbl/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/src/nbl/CMakeLists.txt b/src/nbl/CMakeLists.txt index cd9572daa5..7819ca830c 100755 --- a/src/nbl/CMakeLists.txt +++ b/src/nbl/CMakeLists.txt @@ -151,6 +151,7 @@ set(NBL_UI_SOURCES ) set(NBL_ASSET_SOURCES # Assets + ${NBL_ROOT_PATH}/src/nbl/asset/ECommonEnums.cpp ${NBL_ROOT_PATH}/src/nbl/asset/IAsset.cpp ${NBL_ROOT_PATH}/src/nbl/asset/IRenderpass.cpp ${NBL_ROOT_PATH}/src/nbl/asset/IAssetManager.cpp From d92b274d9979fe78f389c1e5b065e1bf739ded6b Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 14 Jul 2025 11:46:08 +0700 Subject: [PATCH 13/40] Cpu ray tracing pipeline asset conversion --- include/nbl/asset/ICPURayTracingPipeline.h | 38 +- include/nbl/video/asset_traits.h | 16 + include/nbl/video/utilities/CAssetConverter.h | 2 + src/nbl/asset/utils/IShaderCompiler.cpp | 4 +- src/nbl/video/utilities/CAssetConverter.cpp | 364 ++++++++++++++---- 5 files changed, 332 insertions(+), 92 deletions(-) diff --git a/include/nbl/asset/ICPURayTracingPipeline.h b/include/nbl/asset/ICPURayTracingPipeline.h index 17c53557e1..2776939fad 100644 --- a/include/nbl/asset/ICPURayTracingPipeline.h +++ b/include/nbl/asset/ICPURayTracingPipeline.h @@ -25,14 +25,12 @@ class ICPURayTracingPipeline final : public ICPUPipeline intersections; }; - static core::smart_refctd_ptr create(const ICPUPipelineLayout* layout) + static core::smart_refctd_ptr create(ICPUPipelineLayout* layout) { auto retval = new ICPURayTracingPipeline(layout); return core::smart_refctd_ptr(retval,core::dont_grab); } - - constexpr static inline auto AssetType = ET_RAYTRACING_PIPELINE; inline E_TYPE getAssetType() const override { return AssetType; } @@ -83,12 +81,13 @@ class ICPURayTracingPipeline final : public ICPUPipelinevalid()) return false; if (m_raygen.valid() == SShaderSpecInfo::INVALID_SPEC_INFO) return false; + if (m_hitGroups.anyHits.size() != m_hitGroups.closestHits.size()) return false; + if (m_hitGroups.anyHits.size() != m_hitGroups.intersections.size()) return false; return true; } @@ -102,7 +101,23 @@ class ICPURayTracingPipeline final : public ICPUPipeline m_callables; - explicit ICPURayTracingPipeline(const ICPUPipelineLayout* layout) + explicit ICPURayTracingPipeline(ICPUPipelineLayout* layout) : base_t(layout, {}) {} inline void visitDependents_impl(std::function visit) const override { - if (!visit(m_raygen.shader.get()) return; - for (const auto& missInfo : self->m_misses) if (!visit(missInfo.shader.get())) return; - for (const auto& anyHitInfo : self->m_hitGroups.anyHits) if (!visit(anyHitInfo.shader.get())) return; - for (const auto& closestHitInfo : self->m_hitGroups.closestHits) if (!visit(closestHitInfo.shader.get())) return; - for (const auto& intersectionInfo : self->m_hitGroups.intersections) if (!visit(intersectionInfo.shader.get())) return; - for (const auto& callableInfo : self->m_callables) if(!visit(callableInfo.shader.get())) return; + if (!visit(m_layout.get())) return; + if (!visit(m_raygen.shader.get())) return; + for (const auto& missInfo : m_misses) if (!visit(missInfo.shader.get())) return; + for (const auto& anyHitInfo : m_hitGroups.anyHits) if (!visit(anyHitInfo.shader.get())) return; + for (const auto& closestHitInfo : m_hitGroups.closestHits) if (!visit(closestHitInfo.shader.get())) return; + for (const auto& intersectionInfo : m_hitGroups.intersections) if (!visit(intersectionInfo.shader.get())) return; + for (const auto& callableInfo : m_callables) if(!visit(callableInfo.shader.get())) return; } inline core::smart_refctd_ptr clone_impl(core::smart_refctd_ptr&& layout, uint32_t depth) const override final diff --git a/include/nbl/video/asset_traits.h b/include/nbl/video/asset_traits.h index faf5322798..c4a6c25ca5 100644 --- a/include/nbl/video/asset_traits.h +++ b/include/nbl/video/asset_traits.h @@ -21,6 +21,8 @@ #include "nbl/video/IGPUAccelerationStructure.h" #include "nbl/asset/ICPUPolygonGeometry.h" #include "nbl/video/IGPUPolygonGeometry.h" +#include "nbl/asset/ICPURayTracingPipeline.h" +#include "nbl/video/IGPURayTracingPipeline.h" namespace nbl::video @@ -244,6 +246,20 @@ struct asset_traits }; +template<> +struct asset_traits +{ + // the asset type + using asset_t = asset::ICPURayTracingPipeline; + // Depends on shader and layout + constexpr static inline bool HasChildren = true; + // the video type + using video_t = IGPURayTracingPipeline; + // lookup type + using lookup_t = const video_t*; +}; + + /* TODO template<> struct asset_traits; diff --git a/include/nbl/video/utilities/CAssetConverter.h b/include/nbl/video/utilities/CAssetConverter.h index 3f0225a78e..a360e3b0f5 100644 --- a/include/nbl/video/utilities/CAssetConverter.h +++ b/include/nbl/video/utilities/CAssetConverter.h @@ -48,6 +48,7 @@ class CAssetConverter : public core::IReferenceCounted asset::ICPUPipelineLayout, asset::ICPUPipelineCache, asset::ICPUComputePipeline, + asset::ICPURayTracingPipeline, asset::ICPURenderpass, asset::ICPUGraphicsPipeline, asset::ICPUDescriptorSet, @@ -690,6 +691,7 @@ class CAssetConverter : public core::IReferenceCounted bool operator()(lookup_t); bool operator()(lookup_t); bool operator()(lookup_t); + bool operator()(lookup_t); bool operator()(lookup_t); bool operator()(lookup_t); bool operator()(lookup_t); diff --git a/src/nbl/asset/utils/IShaderCompiler.cpp b/src/nbl/asset/utils/IShaderCompiler.cpp index 5cfd36eced..e60bf31b5c 100644 --- a/src/nbl/asset/utils/IShaderCompiler.cpp +++ b/src/nbl/asset/utils/IShaderCompiler.cpp @@ -342,7 +342,7 @@ core::smart_refctd_ptr IShaderCompiler::CCache::serialize() const memcpy(retVal.data() + SHADER_BUFFER_SIZE_BYTES + shaderBufferSize, dumpedContainerJson.data(), dumpedContainerJsonLength); auto memoryResource = core::make_smart_refctd_ptr>(std::move(retVal)); - return ICPUBuffer::create({ { retValSize }, memoryResource->getBacker().data(),std::move(memoryResource)}); + return ICPUBuffer::create({ { retValSize }, memoryResource->getBacker().data(),std::move(memoryResource)}, core::adopt_memory); } core::smart_refctd_ptr IShaderCompiler::CCache::deserialize(const std::span serializedCache) @@ -416,7 +416,7 @@ bool nbl::asset::IShaderCompiler::CCache::SEntry::setContent(const asset::ICPUBu compressedSpirv.resize(propsSize + destLen); auto memoryResource = core::make_smart_refctd_ptr>(std::move(compressedSpirv)); - spirv = ICPUBuffer::create({ { propsSize + destLen }, memoryResource->getBacker().data(),std::move(memoryResource)}); + spirv = ICPUBuffer::create({ { propsSize + destLen }, memoryResource->getBacker().data(),std::move(memoryResource)}, core::adopt_memory); return true; } diff --git a/src/nbl/video/utilities/CAssetConverter.cpp b/src/nbl/video/utilities/CAssetConverter.cpp index d004660e42..3980a7a3a4 100644 --- a/src/nbl/video/utilities/CAssetConverter.cpp +++ b/src/nbl/video/utilities/CAssetConverter.cpp @@ -543,6 +543,31 @@ class AssetVisitor : public CRTP return false; return true; } + inline bool impl(const instance_t& instance, const CAssetConverter::patch_t& userPatch) + { + const auto* asset = instance.asset; + const auto* layout = asset->getLayout(); + if (!layout || !descend(layout,{layout})) + return false; + using stage_t = hlsl::ShaderStage; + for (stage_t stage : {hlsl::ShaderStage::ESS_RAYGEN, hlsl::ShaderStage::ESS_MISS, hlsl::ShaderStage::ESS_ANY_HIT, hlsl::ShaderStage::ESS_CLOSEST_HIT, hlsl::ShaderStage::ESS_INTERSECTION, hlsl::ShaderStage::ESS_CALLABLE}) + { + const auto& specInfos = asset->getSpecInfos(stage); + for (auto specInfo_i = 0; specInfo_i < specInfos.size(); specInfo_i++) + { + const auto& specInfo = specInfos[specInfo_i]; + const auto* shader = specInfo.shader.get(); + if (!shader) + { + if (stage == stage_t::ESS_RAYGEN) return false; + CRTP::template nullOptional(); + continue; + } + if (!descend(shader,{shader}, specInfo, stage, specInfo_i)) return false; + } + } + return true; + } inline bool impl(const instance_t& instance, const CAssetConverter::patch_t& userPatch) { const auto* asset = instance.asset; @@ -1370,6 +1395,25 @@ bool CAssetConverter::CHashCache::hash_impl::operator()(lookup_t lookup) +{ + const auto* asset = lookup.asset; + // + hasher << asset->getMissGroupCount(); + hasher << asset->getHitGroupCount(); + hasher << asset->getCallableGroupCount(); + AssetVisitor> visitor = { + *this, + {asset,static_cast(patchOverride)->uniqueCopyGroupID}, + *lookup.patch + }; + if (!visitor()) + return false; + const auto& params = asset->getCachedCreationParams(); + hasher << params.maxRecursionDepth; + hasher << params.dynamicStackSize; + return true; +} bool CAssetConverter::CHashCache::hash_impl::operator()(lookup_t lookup) { const auto* asset = lookup.asset; @@ -1688,6 +1732,7 @@ void CAssetConverter::CHashCache::eraseStale(const IPatchOverride* patchOverride rehash.operator()(); rehash.operator()(); rehash.operator()(); + rehash.template operator()(); // graphics pipeline needs a renderpass rehash.template operator()(); rehash.template operator()(); @@ -2041,103 +2086,184 @@ class GetDependantVisit : public GetDependantVisitBase) + if constexpr (std::is_same_v) { outInfo.info.image.imageLayout = std::get<0>(argTuple); - if (type==IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER) + if (type == IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER) { assert(lastCombinedSampler); outInfo.info.combinedImageSampler.sampler = smart_refctd_ptr(lastCombinedSampler); - lastCombinedSampler = nullptr; // for debuggability + lastCombinedSampler = nullptr; } } outInfo.desc = std::move(depObj); return true; - } -}; -template<> -class GetDependantVisit : public GetDependantVisitBase + } + }; + template<> + class GetDependantVisit : public GetDependantVisitBase + { + public: + bool finalize() + { + if (!creationParams.indexing) + return false; + creationParams.jointWeightViews = jointWeightViews; + creationParams.auxAttributeViews = auxAttributeViews; + return true; + } + + IGPUPolygonGeometry::SCreationParams creationParams = {}; + // has to be public because of aggregate init, but its only for internal usage! + core::vector jointWeightViews = {}; + core::vector auxAttributeViews = {}; + + protected: + bool descend_impl( + const instance_t& user, const CAssetConverter::patch_t& userPatch, + const instance_t& dep, const CAssetConverter::patch_t& soloPatch, + const EPolygonGeometryViewType type, const uint32_t index + ) + { + auto depObj = getDependant(dep,soloPatch); + if (!depObj) + return false; + const auto* asset = user.asset; + switch (type) + { + case EPolygonGeometryViewType::Position: + // obligatory attribute, handle basic setup here too + creationParams.indexing = asset->getIndexingCallback(); + creationParams.aabb = asset->getAABBStorage(); + creationParams.jointCount = asset->getJointCount(); + creationParams.positionView = getView(asset->getPositionView(),std::move(depObj)); + break; + case EPolygonGeometryViewType::Index: + creationParams.indexView = getView(asset->getIndexView(),std::move(depObj)); + break; + case EPolygonGeometryViewType::Normal: + creationParams.normalView = getView(asset->getNormalView(),std::move(depObj)); + break; + case EPolygonGeometryViewType::JointOBB: + creationParams.jointOBBView = getView(*asset->getJointOBBView(),std::move(depObj)); + break; + case EPolygonGeometryViewType::JointIndices: + jointWeightViews.resize(index+1); + jointWeightViews[index].indices = getView(asset->getJointWeightViews()[index].indices,std::move(depObj)); + break; + case EPolygonGeometryViewType::JointWeights: + jointWeightViews.resize(index+1); + jointWeightViews[index].weights = getView(asset->getJointWeightViews()[index].weights,std::move(depObj)); + break; + case EPolygonGeometryViewType::Aux: + auxAttributeViews.push_back(getView(asset->getAuxAttributeViews()[index],std::move(depObj))); + break; + default: + return false; + } + // abuse this pointer to signal invalid state + return creationParams.indexing; + } + + private: + IGPUPolygonGeometry::SDataView getView(const ICPUPolygonGeometry::SDataView& orig, core::smart_refctd_ptr&& buff) + { + IGPUPolygonGeometry::SDataView retval = { + .composed = orig.composed, + .src = { + .offset = orig.src.offset, + .size = orig.src.actualSize(), + .buffer = std::move(buff) + } + }; + if (orig && !retval) + creationParams.indexing = nullptr; + return retval; + } + }; + template<> +class GetDependantVisit : public GetDependantVisitBase { - public: - bool finalize() - { - if (!creationParams.indexing) - return false; - creationParams.jointWeightViews = jointWeightViews; - creationParams.auxAttributeViews = auxAttributeViews; - return true; - } +public: - IGPUPolygonGeometry::SCreationParams creationParams = {}; - // has to be public because of aggregate init, but its only for internal usage! - core::vector jointWeightViews = {}; - core::vector auxAttributeViews = {}; + inline void allocateShaders(size_t missCount, size_t hitGroupCount, size_t callableGroupCount) + { + misses.resize(missCount); + hitGroups.anyHits.resize(hitGroupCount); + hitGroups.closestHits.resize(hitGroupCount); + hitGroups.intersections.resize(hitGroupCount); + callables.resize(callableGroupCount); + } - protected: - bool descend_impl( - const instance_t& user, const CAssetConverter::patch_t& userPatch, - const instance_t& dep, const CAssetConverter::patch_t& soloPatch, - const EPolygonGeometryViewType type, const uint32_t index - ) - { - auto depObj = getDependant(dep,soloPatch); - if (!depObj) - return false; - const auto* asset = user.asset; - switch (type) - { - case EPolygonGeometryViewType::Position: - // obligatory attribute, handle basic setup here too - creationParams.indexing = asset->getIndexingCallback(); - creationParams.aabb = asset->getAABBStorage(); - creationParams.jointCount = asset->getJointCount(); - creationParams.positionView = getView(asset->getPositionView(),std::move(depObj)); - break; - case EPolygonGeometryViewType::Index: - creationParams.indexView = getView(asset->getIndexView(),std::move(depObj)); - break; - case EPolygonGeometryViewType::Normal: - creationParams.normalView = getView(asset->getNormalView(),std::move(depObj)); - break; - case EPolygonGeometryViewType::JointOBB: - creationParams.jointOBBView = getView(*asset->getJointOBBView(),std::move(depObj)); - break; - case EPolygonGeometryViewType::JointIndices: - jointWeightViews.resize(index+1); - jointWeightViews[index].indices = getView(asset->getJointWeightViews()[index].indices,std::move(depObj)); - break; - case EPolygonGeometryViewType::JointWeights: - jointWeightViews.resize(index+1); - jointWeightViews[index].weights = getView(asset->getJointWeightViews()[index].weights,std::move(depObj)); - break; - case EPolygonGeometryViewType::Aux: - auxAttributeViews.push_back(getView(asset->getAuxAttributeViews()[index],std::move(depObj))); - break; - default: - return false; - } - // abuse this pointer to signal invalid state - return creationParams.indexing; - } + inline core::vector* getSpecInfoVector(const hlsl::ShaderStage stage) + { + switch (stage) + { + // raygen is not stored as vector so we can't return it here. Use getSpecInfo + case hlsl::ShaderStage::ESS_MISS: + return &misses; + case hlsl::ShaderStage::ESS_ANY_HIT: + return &hitGroups.anyHits; + case hlsl::ShaderStage::ESS_CLOSEST_HIT: + return &hitGroups.closestHits; + case hlsl::ShaderStage::ESS_INTERSECTION: + return &hitGroups.intersections; + case hlsl::ShaderStage::ESS_CALLABLE: + return &callables; + } + return nullptr; + } + + // ok to do non owning since some cache owns anyway + IGPUPipelineLayout* layout = nullptr; + ICPUPipelineBase::SShaderSpecInfo raygen; + core::vector misses; + ICPURayTracingPipeline::SHitGroupSpecInfos hitGroups; + core::vector callables; + +protected: + bool descend_impl( + const instance_t& user, const CAssetConverter::patch_t& userPatch, + const instance_t& dep, const CAssetConverter::patch_t& soloPatch + ) + { + auto depObj = getDependant(dep, soloPatch); + if (!depObj) + return false; + layout = depObj.get(); + return true; + } + bool descend_impl( + const instance_t& user, const CAssetConverter::patch_t& userPatch, + const instance_t& dep, const CAssetConverter::patch_t& soloPatch, const ICPUPipelineBase::SShaderSpecInfo& inSpecInfo, hlsl::ShaderStage stage, uint32_t groupIndex + ) + { + auto depObj = getDependant(dep, soloPatch); - private: - IGPUPolygonGeometry::SDataView getView(const ICPUPolygonGeometry::SDataView& orig, core::smart_refctd_ptr&& buff) + if (stage == hlsl::ShaderStage::ESS_RAYGEN) { - IGPUPolygonGeometry::SDataView retval = { - .composed = orig.composed, - .src = { - .offset = orig.src.offset, - .size = orig.src.actualSize(), - .buffer = std::move(buff) - } + assert(groupIndex == 0); + raygen = ICPUPipelineBase::SShaderSpecInfo{ + .shader = depObj, + .entryPoint = inSpecInfo.entryPoint, + .requiredSubgroupSize = inSpecInfo.requiredSubgroupSize, + .entries = inSpecInfo.entries, }; - if (orig && !retval) - creationParams.indexing = nullptr; - return retval; + } else + { + auto& shaderGroups = *getSpecInfoVector(stage); + assert(groupIndex < shaderGroups.size()); + shaderGroups[groupIndex] = ICPUPipelineBase::SShaderSpecInfo{ + .shader = depObj, + .entryPoint = inSpecInfo.entryPoint, + .requiredSubgroupSize = inSpecInfo.requiredSubgroupSize, + .entries = inSpecInfo.entries, + }; } + return true; + } }; - // Needed both for reservation and conversion class MetaDeviceMemoryAllocator final { @@ -2774,6 +2900,9 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult case ICPUGraphicsPipeline::AssetType: visit.template operator()(entry); break; + case ICPURayTracingPipeline::AssetType: + visit.template operator()(entry); + break; case ICPUDescriptorSet::AssetType: visit.template operator()(entry); break; @@ -3474,6 +3603,81 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult } } } + if constexpr (std::is_same_v) + { + for (auto& entry : conversionRequests.contentHashToCanonical) + { + const ICPURayTracingPipeline* asset = entry.second.canonicalAsset; + // there is no patching possible for this asset + for (auto i=0ull; i> visitor = { + {visitBase}, + {asset,uniqueCopyGroupID}, + {} + }; + visitor.allocateShaders( + asset->getMissGroupCount(), + asset->getHitGroupCount(), + asset->getCallableGroupCount()); + if (!visitor()) + continue; + // ILogicalDevice::createComputePipelines is rather aggressive on the spec constant validation, so we create one pipeline at a time + core::smart_refctd_ptr ppln; + { + // no derivatives, special flags, etc. + IGPURayTracingPipeline::SCreationParams params = {}; + using SShaderEntryMap = IGPUPipelineBase::SShaderEntryMap; + using stage_t = hlsl::ShaderStage; + using GPUShaderSpecInfo = IGPUPipelineBase::SShaderSpecInfo; + + params.layout = visitor.layout; + + SShaderEntryMap raygenEntryMap; + params.shaderGroups.raygen = GPUShaderSpecInfo::create(visitor.raygen, &raygenEntryMap); + + struct GPUSpecEntryVec + { + core::vector entryMaps; + core::vector specs; + + explicit GPUSpecEntryVec(std::span cpuSpecs) + : entryMaps(cpuSpecs.size()), specs(cpuSpecs.size()) + { + for (auto spec_i = 0u; spec_i < cpuSpecs.size(); spec_i++) + specs[spec_i] = GPUShaderSpecInfo::create(cpuSpecs[spec_i], &entryMaps[spec_i]); + } + }; + + GPUSpecEntryVec missSpecEntry(visitor.misses); + params.shaderGroups.misses = missSpecEntry.specs; + + GPUSpecEntryVec callableSpecEntry(visitor.callables); + params.shaderGroups.callables = callableSpecEntry.specs; + + core::vector hitGroups(visitor.hitGroups.closestHits.size()); + core::vector closestHitEntryMaps(visitor.hitGroups.closestHits.size()); + core::vector anyHitEntryMaps(visitor.hitGroups.anyHits.size()); + core::vector intersectionEntryMaps(visitor.hitGroups.intersections.size()); + assert(anyHitEntryMaps.size() == closestHitEntryMaps.size()); + assert(anyHitEntryMaps.size() == intersectionEntryMaps.size()); + for (auto hitGroup_i = 0u ; hitGroup_i < hitGroups.size(); hitGroup_i++) + { + hitGroups[hitGroup_i].closestHit = GPUShaderSpecInfo::create(visitor.hitGroups.closestHits[hitGroup_i], &closestHitEntryMaps[hitGroup_i]); + hitGroups[hitGroup_i].anyHit = GPUShaderSpecInfo::create(visitor.hitGroups.anyHits[hitGroup_i], &anyHitEntryMaps[hitGroup_i]); + hitGroups[hitGroup_i].intersection = GPUShaderSpecInfo::create(visitor.hitGroups.intersections[hitGroup_i], &intersectionEntryMaps[hitGroup_i]); + } + params.shaderGroups.hits = hitGroups; + + params.cached = asset->getCachedCreationParams(); + device->createRayTracingPipelines(inputs.pipelineCache, {¶ms, 1}, &ppln); + conversionRequests.assign(entry.first, entry.second.firstCopyIx, i, std::move(ppln)); + } + } + } + } if constexpr (std::is_same_v) { // Why we're not grouping multiple descriptor sets into few pools and doing 1 pool per descriptor set. @@ -3675,6 +3879,7 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult dedupCreateProp.template operator()(); dedupCreateProp.template operator()(); dedupCreateProp.template operator()(); + dedupCreateProp.template operator()(); dedupCreateProp.template operator()(); dedupCreateProp.template operator()(); dedupCreateProp.template operator()(); @@ -3758,6 +3963,7 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult pruneStaging.template operator()(); pruneStaging.template operator()(); pruneStaging.template operator()(); + pruneStaging.template operator()(); pruneStaging.template operator()(); pruneStaging.template operator()(); pruneStaging.template operator()(); From 6d2df490b48bfdfe92061a9e4bd029439bfea0bd Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 14 Jul 2025 20:30:59 +0700 Subject: [PATCH 14/40] Add groupIndex to shader hash --- src/nbl/video/utilities/CAssetConverter.cpp | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/nbl/video/utilities/CAssetConverter.cpp b/src/nbl/video/utilities/CAssetConverter.cpp index 3980a7a3a4..5bf552c639 100644 --- a/src/nbl/video/utilities/CAssetConverter.cpp +++ b/src/nbl/video/utilities/CAssetConverter.cpp @@ -1129,6 +1129,15 @@ class HashVisit : public CAssetConverter::CHashCache::hash_impl_base assert(hlsl::bitCount(stage) == 1); hasher << stage; hasher << arg0.requiredSubgroupSize; + if (std::tuple_size(argTuple) >= 3) + { + const auto groupIndex = std::get<2>(argTuple); + hasher << groupIndex; + } else + { + // assume group index to be zero. + hasher << 0; + } if (!arg0.entries.empty()) { for (const auto& specConstant : arg0.entries) @@ -2239,7 +2248,8 @@ class GetDependantVisit : public GetDependantVisitBase(dep, soloPatch); - + if (!depObj) + return false; if (stage == hlsl::ShaderStage::ESS_RAYGEN) { assert(groupIndex == 0); From 9eb02276c4ef3ca1d0c392498153381c0fde51f0 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 15 Jul 2025 10:57:11 +0700 Subject: [PATCH 15/40] Small fix on rt pipeline has computation --- src/nbl/video/utilities/CAssetConverter.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/nbl/video/utilities/CAssetConverter.cpp b/src/nbl/video/utilities/CAssetConverter.cpp index 5bf552c639..29b3d291fd 100644 --- a/src/nbl/video/utilities/CAssetConverter.cpp +++ b/src/nbl/video/utilities/CAssetConverter.cpp @@ -1129,14 +1129,10 @@ class HashVisit : public CAssetConverter::CHashCache::hash_impl_base assert(hlsl::bitCount(stage) == 1); hasher << stage; hasher << arg0.requiredSubgroupSize; - if (std::tuple_size(argTuple) >= 3) + if constexpr (std::is_same_v) { const auto groupIndex = std::get<2>(argTuple); hasher << groupIndex; - } else - { - // assume group index to be zero. - hasher << 0; } if (!arg0.entries.empty()) { From 73ac23fce1b5d45f7ea2e12005c0490abea30c3f Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 15 Jul 2025 13:22:28 +0700 Subject: [PATCH 16/40] Automatic no null flags insertion for rt pipeline in asset converter --- src/nbl/video/utilities/CAssetConverter.cpp | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/src/nbl/video/utilities/CAssetConverter.cpp b/src/nbl/video/utilities/CAssetConverter.cpp index 29b3d291fd..e04d0153be 100644 --- a/src/nbl/video/utilities/CAssetConverter.cpp +++ b/src/nbl/video/utilities/CAssetConverter.cpp @@ -3677,6 +3677,27 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult } params.shaderGroups.hits = hitGroups; + using RayTracingFlags = IGPURayTracingPipeline::SCreationParams::FLAGS; + const auto isNullSpecInfo = [](const ICPUPipelineBase::SShaderSpecInfo& specInfo) + { + return specInfo.shader.get() == nullptr; + }; + const auto noNullMiss = std::none_of( + visitor.misses.begin(), + visitor.misses.end(), + isNullSpecInfo); + if (noNullMiss) params.flags |= RayTracingFlags::NO_NULL_MISS_SHADERS; + const auto noNullClosestHit = std::none_of( + visitor.hitGroups.closestHits.begin(), + visitor.hitGroups.closestHits.end(), + isNullSpecInfo); + if (noNullClosestHit) params.flags |= RayTracingFlags::NO_NULL_CLOSEST_HIT_SHADERS; + const auto noNullAnyHit = std::none_of( + visitor.hitGroups.anyHits.begin(), + visitor.hitGroups.anyHits.end(), + isNullSpecInfo); + if (noNullAnyHit) params.flags |= RayTracingFlags::NO_NULL_ANY_HIT_SHADERS; + params.cached = asset->getCachedCreationParams(); device->createRayTracingPipelines(inputs.pipelineCache, {¶ms, 1}, &ppln); conversionRequests.assign(entry.first, entry.second.firstCopyIx, i, std::move(ppln)); From bd3a266956401e7109b43966f0cce8211573cf56 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 18 Jul 2025 19:14:58 +0700 Subject: [PATCH 17/40] Extract some common attribute view creation into its own function --- src/nbl/asset/utils/CGeometryCreator.cpp | 726 +++++++++-------------- 1 file changed, 288 insertions(+), 438 deletions(-) diff --git a/src/nbl/asset/utils/CGeometryCreator.cpp b/src/nbl/asset/utils/CGeometryCreator.cpp index 05a80cf3a7..f2a9515566 100644 --- a/src/nbl/asset/utils/CGeometryCreator.cpp +++ b/src/nbl/asset/utils/CGeometryCreator.cpp @@ -15,11 +15,142 @@ namespace nbl::asset { + namespace + { + using snorm_normal_t = hlsl::vector; + constexpr int8_t snorm_one = std::numeric_limits::max(); + constexpr int8_t snorm_neg_one = std::numeric_limits::min(); + constexpr auto snorm_positive_x = hlsl::vector(snorm_one, 0, 0, 0); + constexpr auto snorm_negative_x = hlsl::vector(snorm_neg_one, 0, 0, 0); + constexpr auto snorm_positive_y = hlsl::vector(0, snorm_one, 0, 0); + constexpr auto snorm_negative_y = hlsl::vector(0, snorm_neg_one, 0, 0); + constexpr auto snorm_positive_z = hlsl::vector(0, 0, snorm_one, 0); + constexpr auto snorm_negative_z = hlsl::vector(0, 0, snorm_neg_one, 0); + + constexpr auto snorm_all_ones = hlsl::vector(snorm_one, snorm_one, snorm_one, snorm_one); + +} + static uint8_t packSnorm(float val) { return round(hlsl::clamp(val, -1.0f, 1.0f) * 127); } +template + requires(std::is_same_v || std::is_same_v) +static ICPUPolygonGeometry::SDataView createUvView(size_t vertexCount) +{ + const auto elementCount = 2; + const auto attrSize = sizeof(ElementT) * elementCount; + auto buff = ICPUBuffer::create({{attrSize * vertexCount,IBuffer::EUF_NONE}}); + hlsl::shapes::AABB<4, ElementT> aabb; + aabb.minVx = hlsl::vector(0,0,0,0); + aabb.maxVx = hlsl::vector(std::numeric_limits::max(), std::numeric_limits::max(), 0, 0); + + auto retval = ICPUPolygonGeometry::SDataView{ + .composed = { + .stride = attrSize, + }, + .src = { + .offset = 0, + .size = buff->getSize(), + .buffer = std::move(buff), + } + }; + + if constexpr(std::is_same_v) + { + retval.composed.encodedDataRange.u8 = aabb; + retval.composed.format = EF_R8G8_UNORM; + retval.composed.rangeFormat = IGeometryBase::EAABBFormat::U8_NORM; + } + else if constexpr(std::is_same_v) + { + retval.composed.encodedDataRange.u16 = aabb; + retval.composed.format = EF_R16G16_UNORM; + retval.composed.rangeFormat = IGeometryBase::EAABBFormat::U16_NORM; + } + + return retval; +} + +template + requires(std::is_same_v || std::is_same_v) +static ICPUPolygonGeometry::SDataView createIndexView(size_t indexCount, size_t maxIndex) +{ + + const auto bytesize = sizeof(IndexT) * indexCount; + auto indices = ICPUBuffer::create({bytesize,IBuffer::EUF_INDEX_BUFFER_BIT}); + + hlsl::shapes::AABB<4,IndexT> aabb; + aabb.minVx[0] = 0; + aabb.maxVx[0] = maxIndex; + + auto retval = ICPUPolygonGeometry::SDataView{ + .composed = { + .stride = sizeof(IndexT), + }, + .src = {.offset = 0,.size = bytesize,.buffer = std::move(indices)}, + }; + + if constexpr(std::is_same_v) + { + retval.composed.encodedDataRange.u16 = aabb; + retval.composed.format = EF_R16_UINT; + retval.composed.rangeFormat = IGeometryBase::EAABBFormat::U16; + } + else if constexpr(std::is_same_v) + { + retval.composed.encodedDataRange.u32 = aabb; + retval.composed.format = EF_R32_UINT; + retval.composed.rangeFormat = IGeometryBase::EAABBFormat::U32; + } + + return retval; +} + +template + requires(ElementCountV > 0 && ElementCountV <= 4) +static ICPUPolygonGeometry::SDataView createPositionView(size_t positionCount, const hlsl::shapes::AABB<4, hlsl::float32_t>& aabb) +{ + using position_t = hlsl::vector; + constexpr auto AttrSize = sizeof(position_t); + auto buff = ICPUBuffer::create({AttrSize * positionCount,IBuffer::EUF_NONE}); + + constexpr auto format = []() + { + if constexpr (ElementCountV == 1) return EF_R32_SFLOAT; + if constexpr (ElementCountV == 2) return EF_R32G32_SFLOAT; + if constexpr (ElementCountV == 3) return EF_R32G32B32_SFLOAT; + if constexpr (ElementCountV == 4) return EF_R32G32B32A32_SFLOAT; + }(); + + return { + .composed = { + .encodedDataRange = {.f32 = aabb}, + .stride = AttrSize, + .format = format, + .rangeFormat = IGeometryBase::EAABBFormat::F32 + }, + .src = {.offset = 0,.size = buff->getSize(),.buffer = std::move(buff)} + }; +} + +static ICPUPolygonGeometry::SDataView createSnormNormalView(size_t normalCount, const hlsl::shapes::AABB<4, int8_t>& aabb) +{ + constexpr auto AttrSize = sizeof(snorm_normal_t); + auto buff = ICPUBuffer::create({AttrSize * normalCount,IBuffer::EUF_NONE}); + return { + .composed = { + .encodedDataRange = {.s8=aabb}, + .stride = AttrSize, + .format = EF_R8G8B8A8_SNORM, + .rangeFormat = IGeometryBase::EAABBFormat::S8_NORM + }, + .src = {.offset=0,.size=buff->getSize(),.buffer=std::move(buff)} + }; +} + core::smart_refctd_ptr CGeometryCreator::createCube(const hlsl::float32_t3 size) const { using namespace hlsl; @@ -27,13 +158,15 @@ core::smart_refctd_ptr CGeometryCreator::createCube(const h auto retval = core::make_smart_refctd_ptr(); retval->setIndexing(IPolygonGeometryBase::TriangleList()); + constexpr auto CubeUniqueVertices = 24; + // Create indices using index_t = uint16_t; { - constexpr auto IndexCount = 36u; - constexpr auto bytesize = sizeof(index_t) * IndexCount; - auto indices = ICPUBuffer::create({bytesize,IBuffer::EUF_INDEX_BUFFER_BIT}); - auto u = reinterpret_cast(indices->getPointer()); + constexpr auto IndexCount = 36; + constexpr auto MaxIndex = CubeUniqueVertices - 1; + auto indexView = createIndexView(IndexCount, MaxIndex); + auto u = reinterpret_cast(indexView.src.buffer->getPointer()); for (uint32_t i=0u; i<6u; ++i) { u[i*6+0] = 4*i+0; @@ -43,84 +176,42 @@ core::smart_refctd_ptr CGeometryCreator::createCube(const h u[i*6+4] = 4*i+2; u[i*6+5] = 4*i+3; } - shapes::AABB<4,index_t> aabb; - aabb.minVx[0] = 0; - aabb.maxVx[0] = 23; - retval->setIndexView({ - .composed = { - .encodedDataRange = {.u16=aabb}, - .stride = sizeof(index_t), - .format = EF_R16_UINT, - .rangeFormat = IGeometryBase::EAABBFormat::U16 - }, - .src = {.offset=0,.size=bytesize,.buffer=std::move(indices)} - }); + retval->setIndexView(std::move(indexView)); } - constexpr auto CubeUniqueVertices = 24; // Create vertex attributes with NONE usage because we have no clue how they'll be used hlsl::float32_t3* positions; + // for now because no reliable RGB10A2 encode and scant support for 24-bit UTB formats - hlsl::vector* normals; - hlsl::vector* uvs; + snorm_normal_t* normals; + + using UvElementT = uint8_t; + constexpr auto MaxUvVal = std::numeric_limits::max(); + hlsl::vector* uvs; { { - constexpr auto AttrSize = sizeof(decltype(*positions)); - auto buff = ICPUBuffer::create({AttrSize*CubeUniqueVertices,IBuffer::EUF_NONE}); - positions = reinterpret_cast(buff->getPointer()); shapes::AABB<4,float32_t> aabb; aabb.maxVx = float32_t4(size*0.5f,0.f); - aabb.minVx = -aabb.maxVx; - retval->visitAABB([aabb](auto& ref)->void - { - ref.minVx = hlsl::trunc(aabb.minVx); - ref.maxVx = hlsl::trunc(aabb.maxVx); - } - ); - retval->setPositionView({ - .composed = { - .encodedDataRange = {.f32=aabb}, - .stride = AttrSize, - .format = EF_R32G32B32_SFLOAT, - .rangeFormat = IGeometryBase::EAABBFormat::F32 - }, - .src = {.offset=0,.size=buff->getSize(),.buffer = std::move(buff)} - }); + aabb.minVx = - aabb.maxVx; + + auto positionView = createPositionView(CubeUniqueVertices, aabb); + positions = reinterpret_cast(positionView.src.buffer->getPointer()); + retval->setPositionView(std::move(positionView)); } { - constexpr auto AttrSize = sizeof(decltype(*normals)); - auto buff = ICPUBuffer::create({AttrSize*CubeUniqueVertices,IBuffer::EUF_NONE}); - normals = reinterpret_cast(buff->getPointer()); shapes::AABB<4,int8_t> aabb; - aabb.maxVx = hlsl::vector(127,127,127,0); + aabb.maxVx = snorm_all_ones; aabb.minVx = -aabb.maxVx; - retval->setNormalView({ - .composed = { - .encodedDataRange = {.s8=aabb}, - .stride = AttrSize, - .format = EF_R8G8B8A8_SNORM, - .rangeFormat = IGeometryBase::EAABBFormat::S8_NORM - }, - .src = {.offset=0,.size=buff->getSize(),.buffer=std::move(buff)} - }); + auto normalView = createSnormNormalView(CubeUniqueVertices, aabb); + normals = reinterpret_cast(normalView.src.buffer->getPointer()); + retval->setNormalView(std::move(normalView)); } + { - constexpr auto AttrSize = sizeof(decltype(*uvs)); - auto buff = ICPUBuffer::create({AttrSize*CubeUniqueVertices,IBuffer::EUF_NONE}); - uvs = reinterpret_cast(buff->getPointer()); - shapes::AABB<4,uint8_t> aabb; - aabb.minVx = hlsl::vector(0,0,0,0); - aabb.maxVx = hlsl::vector(255,255,0,0); - retval->getAuxAttributeViews()->push_back({ - .composed = { - .encodedDataRange = {.u8=aabb}, - .stride = AttrSize, - .format = EF_R8G8_UNORM, - .rangeFormat = IGeometryBase::EAABBFormat::U8_NORM - }, - .src = {.offset=0,.size=buff->getSize(),.buffer=std::move(buff)} - }); + auto uvView = createUvView(CubeUniqueVertices); + uvs = reinterpret_cast(uvView.src.buffer->getPointer()); + retval->getAuxAttributeViews()->push_back(std::move(uvView)); } } @@ -165,30 +256,31 @@ core::smart_refctd_ptr CGeometryCreator::createCube(const h // { - const hlsl::vector norm[6] = + const snorm_normal_t norm[6] = { - hlsl::vector(0, 0, 1), - hlsl::vector(127, 0, 0), - hlsl::vector(0, 0,-127), - hlsl::vector(-127, 0, 0), - hlsl::vector(0, 127, 0), - hlsl::vector(0,-127, 0) + snorm_positive_z, + snorm_positive_x, + snorm_negative_z, + snorm_negative_x, + snorm_positive_y, + snorm_negative_y }; - const hlsl::vector uv[4] = + const hlsl::vector uv[4] = { - hlsl::vector( 0,255), - hlsl::vector(255,255), - hlsl::vector(255, 0), - hlsl::vector( 0, 0) + hlsl::vector( 0, MaxUvVal), + hlsl::vector(MaxUvVal, MaxUvVal), + hlsl::vector(MaxUvVal, 0), + hlsl::vector( 0, 0) }; - for (size_t f=0ull; f<6ull; ++f) + + for (size_t f = 0ull; f < 6ull; ++f) { - const size_t v = f*4ull; + const size_t v = f * 4ull; - for (size_t i=0ull; i<4ull; ++i) + for (size_t i = 0ull; i < 4ull; ++i) { - normals[v+i] = vector(norm[f],0); - uvs[v+i] = uv[i]; + normals[v + i] = snorm_normal_t(norm[f]); + uvs[v + i] = uv[i]; } } } @@ -218,10 +310,11 @@ core::smart_refctd_ptr CGeometryCreator::createSphere(float // Create indices { using index_t = uint32_t; + const auto indexCount = (polyCountX * polyCountY) * 6; - const auto bytesize = sizeof(index_t) * indexCount; - auto indices = ICPUBuffer::create({bytesize,IBuffer::EUF_INDEX_BUFFER_BIT}); - auto indexPtr = reinterpret_cast(indices->getPointer()); + auto indexView = createIndexView(indexCount, vertexCount - 1); + auto indexPtr = reinterpret_cast(indexView.src.buffer->getPointer()); + uint32_t level = 0; size_t indexAddIx = 0; for (uint32_t p1 = 0; p1 < polyCountY - 1; ++p1) @@ -280,18 +373,8 @@ core::smart_refctd_ptr CGeometryCreator::createSphere(float indexPtr[indexAddIx++] = polyCountSqM1; indexPtr[indexAddIx++] = polyCountSq1; - shapes::AABB<4,index_t> aabb; - aabb.minVx[0] = 0; - aabb.maxVx[0] = vertexCount - 1; - retval->setIndexView({ - .composed = { - .encodedDataRange = {.u32=aabb}, - .stride = sizeof(index_t), - .format = EF_R16_UINT, - .rangeFormat = IGeometryBase::EAABBFormat::U16 - }, - .src = {.offset=0,.size=bytesize,.buffer=std::move(indices)} - }); + retval->setIndexView(std::move(indexView)); + } constexpr auto NormalCacheFormat = EF_R8G8B8_SNORM; @@ -299,71 +382,32 @@ core::smart_refctd_ptr CGeometryCreator::createSphere(float // Create vertex attributes with NONE usage because we have no clue how they'll be used hlsl::float32_t3* positions; - hlsl::vector* normals; - hlsl::vector* uvs; + + snorm_normal_t* normals; + + using UvElementT = uint16_t; + hlsl::vector* uvs; { { - constexpr auto AttrSize = sizeof(decltype(*positions)); - auto buff = ICPUBuffer::create({{AttrSize * vertexCount,IBuffer::EUF_NONE}}); - positions = reinterpret_cast(buff->getPointer()); shapes::AABB<4, float32_t> aabb; aabb.maxVx = float32_t4(radius, radius, radius, 0.0f); aabb.minVx = float32_t4(-radius, -radius, -radius, 0.0f); - retval->setPositionView({ - .composed = { - .encodedDataRange = {.f32 = aabb}, - .stride = AttrSize, - .format = EF_R32G32B32_SFLOAT, - .rangeFormat = IGeometryBase::EAABBFormat::F32 - }, - .src = { - .offset=0, - .size = buff->getSize(), - .buffer = std::move(buff), - } - }); + auto positionView = createPositionView(vertexCount, aabb); + positions = reinterpret_cast(positionView.src.buffer->getPointer()); + retval->setPositionView(std::move(positionView)); } { - constexpr auto AttrSize = sizeof(decltype(*normals)); - auto buff = ICPUBuffer::create({{AttrSize * vertexCount,IBuffer::EUF_NONE}}); - normals = reinterpret_cast(buff->getPointer()); shapes::AABB<4, int8_t> aabb; - aabb.maxVx = hlsl::vector(127,127,127,0); + aabb.maxVx = snorm_all_ones; aabb.minVx = -aabb.maxVx; - retval->setNormalView({ - .composed = { - .encodedDataRange = {.s8=aabb}, - .stride = AttrSize, - .format = NormalFormat, - .rangeFormat = IGeometryBase::EAABBFormat::S8_NORM - }, - .src = { - .offset = 0, - .size = buff->getSize(), - .buffer = std::move(buff) - } - }); + auto normalView = createSnormNormalView(vertexCount, aabb); + normals = reinterpret_cast(normalView.src.buffer->getPointer()); + retval->setNormalView(std::move(normalView)); } { - constexpr auto AttrSize = sizeof(decltype(*uvs)); - auto buff = ICPUBuffer::create({{AttrSize * vertexCount,IBuffer::EUF_NONE}}); - uvs = reinterpret_cast(buff->getPointer()); - shapes::AABB<4, uint16_t> aabb; - aabb.minVx = hlsl::vector(0,0,0,0); - aabb.maxVx = hlsl::vector(255,255,0,0); - retval->getAuxAttributeViews()->push_back({ - .composed = { - .encodedDataRange = {.u16=aabb}, - .stride = AttrSize, - .format = EF_R8G8_UNORM, - .rangeFormat = IGeometryBase::EAABBFormat::U8_NORM - }, - .src = { - .offset = 0, - .size = buff->getSize(), - .buffer = std::move(buff), - } - }); + auto uvView = createUvView(vertexCount); + uvs = reinterpret_cast(uvView.src.buffer->getPointer()); + retval->getAuxAttributeViews()->push_back(std::move(uvView)); } } @@ -468,9 +512,9 @@ core::smart_refctd_ptr CGeometryCreator::createCylinder( { constexpr uint32_t RowCount = 2u; const auto IndexCount = RowCount * 3 * tesselation; - const auto bytesize = sizeof(index_t) * IndexCount; - auto indices = ICPUBuffer::create({bytesize,IBuffer::EUF_INDEX_BUFFER_BIT}); - auto u = reinterpret_cast(indices->getPointer()); + auto indexView = createIndexView(IndexCount, vertexCount - 1); + auto u = reinterpret_cast(indexView.src.buffer->getPointer()); + for (uint16_t i = 0u, j = 0u; i < halfIx; ++i) { u[j++] = i; @@ -481,18 +525,7 @@ core::smart_refctd_ptr CGeometryCreator::createCylinder( u[j++] = (i + 1u)!= halfIx ? (i + 1u + halfIx) : halfIx; } - shapes::AABB<4,index_t> aabb; - aabb.minVx[0] = 0; - aabb.maxVx[0] = vertexCount - 1; - retval->setIndexView({ - .composed = { - .encodedDataRange = {.u16=aabb}, - .stride = sizeof(index_t), - .format = EF_R16_UINT, - .rangeFormat = IGeometryBase::EAABBFormat::U16 - }, - .src = {.offset=0,.size=bytesize,.buffer=std::move(indices)} - }); + retval->setIndexView(std::move(indexView)); } constexpr auto NormalCacheFormat = EF_R8G8B8_SNORM; @@ -500,71 +533,32 @@ core::smart_refctd_ptr CGeometryCreator::createCylinder( // Create vertex attributes with NONE usage because we have no clue how they'll be used hlsl::float32_t3* positions; - hlsl::vector* normals; - hlsl::vector* uvs; + + snorm_normal_t* normals; + + using UvElementT = uint16_t; + hlsl::vector* uvs; { { - constexpr auto AttrSize = sizeof(decltype(*positions)); - auto buff = ICPUBuffer::create({{AttrSize * vertexCount,IBuffer::EUF_NONE}}); - positions = reinterpret_cast(buff->getPointer()); shapes::AABB<4, float32_t> aabb; aabb.maxVx = float32_t4(radius, radius, length, 0.0f); aabb.minVx = float32_t4(-radius, -radius, 0.0f, 0.0f); - retval->setPositionView({ - .composed = { - .encodedDataRange = {.f32 = aabb}, - .stride = AttrSize, - .format = EF_R32G32B32_SFLOAT, - .rangeFormat = IGeometryBase::EAABBFormat::F32 - }, - .src = { - .offset=0, - .size = buff->getSize(), - .buffer = std::move(buff), - } - }); + auto positionView = createPositionView(vertexCount, aabb); + positions = reinterpret_cast(positionView.src.buffer->getPointer()); + retval->setPositionView(std::move(positionView)); } { - constexpr auto AttrSize = sizeof(decltype(*normals)); - auto buff = ICPUBuffer::create({{AttrSize * vertexCount,IBuffer::EUF_NONE}}); - normals = reinterpret_cast(buff->getPointer()); shapes::AABB<4, int8_t> aabb; aabb.maxVx = hlsl::vector(127,127,127,0); aabb.minVx = -aabb.maxVx; - retval->setNormalView({ - .composed = { - .encodedDataRange = {.s8=aabb}, - .stride = AttrSize, - .format = NormalFormat, - .rangeFormat = IGeometryBase::EAABBFormat::S8_NORM - }, - .src = { - .offset = 0, - .size = buff->getSize(), - .buffer = std::move(buff) - } - }); + auto normalView = createSnormNormalView(vertexCount, aabb); + normals = reinterpret_cast(normalView.src.buffer->getPointer()); + retval->setNormalView(std::move(normalView)); } { - constexpr auto AttrSize = sizeof(decltype(*uvs)); - auto buff = ICPUBuffer::create({{AttrSize * vertexCount,IBuffer::EUF_NONE}}); - uvs = reinterpret_cast(buff->getPointer()); - shapes::AABB<4, uint8_t> aabb; - aabb.minVx = hlsl::vector(0,0,0,0); - aabb.maxVx = hlsl::vector(255,255,0,0); - retval->getAuxAttributeViews()->push_back({ - .composed = { - .encodedDataRange = {.u8=aabb}, - .stride = AttrSize, - .format = EF_R8G8_UNORM, - .rangeFormat = IGeometryBase::EAABBFormat::U8_NORM - }, - .src = { - .offset = 0, - .size = buff->getSize(), - .buffer = std::move(buff), - } - }); + auto uvView = createUvView(vertexCount); + uvs = reinterpret_cast(uvView.src.buffer->getPointer()); + retval->getAuxAttributeViews()->push_back(std::move(uvView)); } } @@ -612,11 +606,13 @@ core::smart_refctd_ptr CGeometryCreator::createCone( { constexpr uint32_t RowCount = 2u; const auto IndexCount = 3 * tesselation; - const auto bytesize = sizeof(index_t) * IndexCount; - auto indices = ICPUBuffer::create({bytesize,IBuffer::EUF_INDEX_BUFFER_BIT}); - auto u = reinterpret_cast(indices->getPointer()); + + auto indexView = createIndexView(IndexCount, vertexCount - 1); + auto u = reinterpret_cast(indexView.src.buffer->getPointer()); + const uint32_t firstIndexOfBaseVertices = 0; const uint32_t firstIndexOfApexVertices = tesselation; + for (uint32_t i = 0; i < tesselation; i++) { u[i * 3] = firstIndexOfApexVertices + i; @@ -624,18 +620,7 @@ core::smart_refctd_ptr CGeometryCreator::createCone( u[(i * 3) + 2] = i == (tesselation - 1) ? firstIndexOfBaseVertices : firstIndexOfBaseVertices + i + 1; } - shapes::AABB<4,index_t> aabb; - aabb.minVx[0] = 0; - aabb.maxVx[0] = vertexCount - 1; - retval->setIndexView({ - .composed = { - .encodedDataRange = {.u16=aabb}, - .stride = sizeof(index_t), - .format = EF_R16_UINT, - .rangeFormat = IGeometryBase::EAABBFormat::U16 - }, - .src = {.offset=0,.size=bytesize,.buffer=std::move(indices)} - }); + retval->setIndexView(std::move(indexView)); } constexpr auto NormalCacheFormat = EF_R8G8B8_SNORM; @@ -646,46 +631,20 @@ core::smart_refctd_ptr CGeometryCreator::createCone( hlsl::vector* normals; { { - constexpr auto AttrSize = sizeof(decltype(*positions)); - auto buff = ICPUBuffer::create({{AttrSize * vertexCount,IBuffer::EUF_NONE}}); - positions = reinterpret_cast(buff->getPointer()); shapes::AABB<4, float32_t> aabb; aabb.maxVx = float32_t4(radius, radius, length, 0.0f); aabb.minVx = float32_t4(-radius, -radius, 0.0f, 0.0f); - retval->setPositionView({ - .composed = { - .encodedDataRange = {.f32 = aabb}, - .stride = AttrSize, - .format = EF_R32G32B32_SFLOAT, - .rangeFormat = IGeometryBase::EAABBFormat::F32 - }, - .src = { - .offset=0, - .size = buff->getSize(), - .buffer = std::move(buff), - } - }); + auto positionView = createPositionView(vertexCount, aabb); + positions = reinterpret_cast(positionView.src.buffer->getPointer()); + retval->setPositionView(std::move(positionView)); } { - constexpr auto AttrSize = sizeof(decltype(*normals)); - auto buff = ICPUBuffer::create({{AttrSize * vertexCount,IBuffer::EUF_NONE}}); - normals = reinterpret_cast(buff->getPointer()); shapes::AABB<4, int8_t> aabb; - aabb.maxVx = hlsl::vector(127,127,127,0); + aabb.maxVx = snorm_all_ones; aabb.minVx = -aabb.maxVx; - retval->setNormalView({ - .composed = { - .encodedDataRange = {.s8=aabb}, - .stride = AttrSize, - .format = NormalFormat, - .rangeFormat = IGeometryBase::EAABBFormat::S8_NORM - }, - .src = { - .offset = 0, - .size = buff->getSize(), - .buffer = std::move(buff) - } - }); + auto normalView = createSnormNormalView(vertexCount, aabb); + normals = reinterpret_cast(normalView.src.buffer->getPointer()); + retval->setNormalView(std::move(normalView)); } } @@ -779,94 +738,56 @@ core::smart_refctd_ptr CGeometryCreator::createRectangle(co 3---2 */ const index_t indices[] = {0,3,1,1,3,2}; - auto buffer = ICPUBuffer::create({ - {sizeof(indices),IBuffer::EUF_INDEX_BUFFER_BIT}, - const_cast((const void*)indices) // TODO: temporary till two different creation params (adopting needs non const void, copying needs const void only - }); - shapes::AABB<4,index_t> aabb; - aabb.minVx[0] = 0; - aabb.maxVx[0] = 3; - retval->setIndexView({ - .composed = { - .encodedDataRange = {.u16=aabb}, - .stride = sizeof(index_t), - .format = EF_R16_UINT, - .rangeFormat = IGeometryBase::EAABBFormat::U16 - }, - .src = {.offset=0,.size=buffer->getSize(),.buffer=std::move(buffer)} - }); + auto indexView = createIndexView(std::size(indices), 3); + memcpy(indexView.src.buffer->getPointer(), indices, sizeof(indices)); + retval->setIndexView(std::move(indexView)); } + constexpr auto VertexCount = 4; // Create vertices { { - const hlsl::float32_t2 positions[] = { + const hlsl::float32_t2 positions[VertexCount] = { hlsl::float32_t2(-size.x, size.y), hlsl::float32_t2( size.x, size.y), hlsl::float32_t2( size.x,-size.y), hlsl::float32_t2(-size.x,-size.y) }; - auto buff = ICPUBuffer::create({{sizeof(positions),IBuffer::EUF_NONE},(void*)positions}); shapes::AABB<4,float32_t> aabb; aabb.minVx = float32_t4(-size,0.f,0.f); aabb.maxVx = float32_t4( size,0.f,0.f); - retval->visitAABB([aabb](auto& ref)->void - { - ref.minVx = hlsl::trunc(aabb.minVx); - ref.maxVx = hlsl::trunc(aabb.maxVx); - } - ); - retval->setPositionView({ - .composed = { - .encodedDataRange = {.f32=aabb}, - .stride = sizeof(positions[0]), - .format = EF_R32G32_SFLOAT, - .rangeFormat = IGeometryBase::EAABBFormat::F32 - }, - .src = {.offset=0,.size=buff->getSize(),.buffer = std::move(buff)} - }); + auto positionView = createPositionView<2>(VertexCount, aabb); + memcpy(positionView.src.buffer->getPointer(), positions, sizeof(positions)); + retval->setPositionView(std::move(positionView)); } { - const hlsl::vector normals[] = { - hlsl::vector(0,0,127,0), - hlsl::vector(0,0,127,0), - hlsl::vector(0,0,127,0), - hlsl::vector(0,0,127,0) + const hlsl::vector normals[VertexCount] = { + snorm_positive_z, + snorm_positive_z, + snorm_positive_z, + snorm_positive_z, }; - auto buff = ICPUBuffer::create({{sizeof(normals),IBuffer::EUF_NONE},(void*)normals}); shapes::AABB<4,int8_t> aabb; - aabb.maxVx = hlsl::vector(0,0,127,0); - aabb.minVx = -aabb.maxVx; - retval->setNormalView({ - .composed = { - .encodedDataRange = {.s8=aabb}, - .stride = sizeof(normals[0]), - .format = EF_R8G8B8A8_SNORM, - .rangeFormat = IGeometryBase::EAABBFormat::S8_NORM - }, - .src = {.offset=0,.size=buff->getSize(),.buffer=std::move(buff)} - }); + aabb.maxVx = snorm_positive_z; + aabb.minVx = snorm_normal_t(0, 0, 0, 0); + auto normalView = createSnormNormalView(VertexCount, aabb); + memcpy(normalView.src.buffer->getPointer(), normals, sizeof(normals)); + retval->setNormalView(std::move(normalView)); } { - const hlsl::vector uvs[] = { - hlsl::vector( 0,255), - hlsl::vector(255,255), - hlsl::vector(255, 0), - hlsl::vector( 0, 0) + using UvElementT = uint8_t; + constexpr auto MaxUvVal = std::numeric_limits::max(); + const hlsl::vector uvsData[VertexCount] = { + hlsl::vector( 0, MaxUvVal), + hlsl::vector(MaxUvVal, MaxUvVal), + hlsl::vector(MaxUvVal, 0), + hlsl::vector( 0, 0) }; - auto buff = ICPUBuffer::create({{sizeof(uvs),IBuffer::EUF_NONE},(void*)uvs}); - shapes::AABB<4,uint8_t> aabb; - aabb.minVx = hlsl::vector(0,0,0,0); - aabb.maxVx = hlsl::vector(255,255,0,0); - retval->getAuxAttributeViews()->push_back({ - .composed = { - .encodedDataRange = {.u8=aabb}, - .stride = sizeof(uvs[0]), - .format = EF_R8G8_UNORM, - .rangeFormat = IGeometryBase::EAABBFormat::U8_NORM - }, - .src = {.offset=0,.size=buff->getSize(),.buffer=std::move(buff)} - }); + hlsl::vector* uvs; + auto uvView = createUvView(VertexCount); + uvs = reinterpret_cast(uvView.src.buffer->getPointer()); + memcpy(uvs, uvsData, sizeof(uvsData)); + retval->getAuxAttributeViews()->push_back(std::move(uvView)); } } @@ -889,68 +810,36 @@ core::smart_refctd_ptr CGeometryCreator::createDisk(const f const size_t vertexCount = 2u + tesselation; float32_t2* positions; + // for now because no reliable RGB10A2 encode and scant support for 24-bit UTB formats - hlsl::vector* normals; + snorm_normal_t* normals; // - constexpr uint16_t UnityUV = 0xffffu; - uint16_t2* uvs; + using UvElementT = uint16_t; + constexpr uint16_t UnityUV = std::numeric_limits::max(); + hlsl::vector* uvs; { { - constexpr auto AttrSize = sizeof(decltype(*positions)); - auto buff = ICPUBuffer::create({AttrSize*vertexCount,IBuffer::EUF_NONE}); - positions = reinterpret_cast(buff->getPointer()); shapes::AABB<4,float32_t> aabb; - aabb.maxVx = float32_t4(radius,radius,0.f,0.f); + aabb.maxVx = float32_t4(radius,radius, 0.f, 0.f); aabb.minVx = -aabb.maxVx; - retval->visitAABB([aabb](auto& ref)->void - { - ref.minVx = hlsl::trunc(aabb.minVx); - ref.maxVx = hlsl::trunc(aabb.maxVx); - } - ); - retval->setPositionView({ - .composed = { - .encodedDataRange = {.f32=aabb}, - .stride = AttrSize, - .format = EF_R32G32_SFLOAT, - .rangeFormat = IGeometryBase::EAABBFormat::F32 - }, - .src = {.offset=0,.size=buff->getSize(),.buffer = std::move(buff)} - }); + auto positionView = createPositionView<2>(vertexCount, aabb); + positions = reinterpret_cast(positionView.src.buffer->getPointer()); + retval->setPositionView(std::move(positionView)); } { constexpr auto AttrSize = sizeof(decltype(*normals)); auto buff = ICPUBuffer::create({AttrSize*vertexCount,IBuffer::EUF_NONE}); - normals = reinterpret_cast(buff->getPointer()); shapes::AABB<4,int8_t> aabb; - aabb.maxVx = hlsl::vector(0,0,127,0); + aabb.maxVx = snorm_positive_z; aabb.minVx = -aabb.maxVx; - retval->setNormalView({ - .composed = { - .encodedDataRange = {.s8=aabb}, - .stride = AttrSize, - .format = EF_R8G8B8A8_SNORM, - .rangeFormat = IGeometryBase::EAABBFormat::S8_NORM - }, - .src = {.offset=0,.size=buff->getSize(),.buffer=std::move(buff)} - }); + auto normalView = createSnormNormalView(vertexCount, aabb); + normals = reinterpret_cast(normalView.src.buffer->getPointer()); + retval->setNormalView(std::move(normalView)); } { - constexpr auto AttrSize = sizeof(decltype(*uvs)); - auto buff = ICPUBuffer::create({AttrSize*vertexCount,IBuffer::EUF_NONE}); - uvs = reinterpret_cast(buff->getPointer()); - shapes::AABB<4,uint16_t> aabb; - aabb.minVx = uint16_t4(0,0,0,0); - aabb.maxVx = uint16_t4(UnityUV,UnityUV,0,0); - retval->getAuxAttributeViews()->push_back({ - .composed = { - .encodedDataRange = {.u16=aabb}, - .stride = AttrSize, - .format = EF_R16G16_UNORM, - .rangeFormat = IGeometryBase::EAABBFormat::U16_NORM - }, - .src = {.offset=0,.size=buff->getSize(),.buffer=std::move(buff)} - }); + auto uvView = createUvView(vertexCount); + uvs = reinterpret_cast(uvView.src.buffer->getPointer()); + retval->getAuxAttributeViews()->push_back(std::move(uvView)); } } @@ -1955,50 +1844,19 @@ core::smart_refctd_ptr CGeometryCreator::createIcoSphere(fl // Create indices { - auto indexBuffer = asset::ICPUBuffer::create({ icosphere.getIndexSize() }); - memcpy(indexBuffer->getPointer(), icosphere.getIndices(), indexBuffer->getSize()); - - shapes::AABB<4,Icosphere::index_t> aabb; - aabb.minVx[0] = 0; - aabb.maxVx[0] = icosphere.getPositionCount() - 1; - - static_assert(sizeof(Icosphere::index_t) == 2 || sizeof(Icosphere::index_t) == 4); - const auto isIndex16Bit = sizeof(Icosphere::index_t) == 2; - - retval->setIndexView({ - .composed = { - .encodedDataRange = {.u32=aabb}, - .stride = sizeof(Icosphere::index_t), - .format = isIndex16Bit ? EF_R16_UINT : EF_R32_UINT, - .rangeFormat = isIndex16Bit? IGeometryBase::EAABBFormat::U16 : IGeometryBase::EAABBFormat::U32 - }, - .src = {.offset=0,.size=icosphere.getIndexSize(),.buffer = std::move(indexBuffer)} - }); + auto indexView = createIndexView(icosphere.getIndexCount(), icosphere.getPositionCount() - 1); + memcpy(indexView.src.buffer->getPointer(), icosphere.getIndices(), icosphere.getIndexSize()); + retval->setIndexView(std::move(indexView)); } { { - using position_t = float32_t3; - constexpr auto AttrSize = sizeof(position_t); - auto buff = ICPUBuffer::create({ icosphere.getPositionCount() * AttrSize, IBuffer::EUF_NONE }); - const auto positions = reinterpret_cast(buff->getPointer()); - memcpy(positions, icosphere.getPositions(), icosphere.getPositionSize()); shapes::AABB<4, float32_t> aabb; aabb.maxVx = float32_t4(radius, radius, radius, 0.f); aabb.minVx = -aabb.maxVx; - retval->setPositionView({ - .composed = { - .encodedDataRange = {.f32 = aabb}, - .stride = AttrSize, - .format = EF_R32G32B32_SFLOAT, - .rangeFormat = IGeometryBase::EAABBFormat::F32 - }, - .src = { - .offset = 0, - .size = buff->getSize(), - .buffer = std::move(buff), - } - }); + auto positionView = createPositionView(icosphere.getPositionCount(), aabb); + memcpy(positionView.src.buffer->getPointer(), icosphere.getPositions(), icosphere.getPositionSize()); + retval->setPositionView(std::move(positionView)); } { using normal_t = float32_t3; @@ -2020,28 +1878,20 @@ core::smart_refctd_ptr CGeometryCreator::createIcoSphere(fl }); } { - using uv_t = uint32_t; - constexpr auto AttrSize = sizeof(uv_t); - auto buff = ICPUBuffer::create({AttrSize * icosphere.getTexCoordCount(), IBuffer::EUF_NONE}); - const auto uvs = reinterpret_cast(buff->getPointer()); - shapes::AABB<4, uint16_t> aabb; - aabb.minVx = uint16_t4(0,0,0,0); - aabb.maxVx = uint16_t4(0xFFFF,0xFFFF,0,0); - retval->getAuxAttributeViews()->push_back({ - .composed = { - .encodedDataRange = {.u16=aabb}, - .stride = AttrSize, - .format = EF_R16G16_UNORM, - .rangeFormat = IGeometryBase::EAABBFormat::U16_NORM - }, - .src = {.offset=0,.size=buff->getSize(),.buffer=std::move(buff)} - }); + using UvElementT = uint16_t; + hlsl::vector* uvs; + auto uvView = createUvView(icosphere.getTexCoordCount()); + uvs = reinterpret_cast(uvView.src.buffer->getPointer()); + for (auto uv_i = 0u; uv_i < icosphere.getTexCoordCount(); uv_i++) { const auto texCoords = icosphere.getTexCoords(); const auto f32_uv = float32_t2{ texCoords[2 * uv_i], texCoords[(2 * uv_i) + 1] }; - uvs[uv_i] = packUnorm2x16(f32_uv); + const auto u32_uv = packUnorm2x16(f32_uv); + memcpy(uvs + uv_i, &u32_uv, sizeof(u32_uv)); } + + retval->getAuxAttributeViews()->push_back(std::move(uvView)); } } From 489e2f2d73fdad25e82cde1c83abd98e7b1eafce Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 18 Jul 2025 21:14:46 +0700 Subject: [PATCH 18/40] Slight type naming improvement in geometry creator --- src/nbl/asset/utils/CGeometryCreator.cpp | 62 ++++++++++++------------ 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/src/nbl/asset/utils/CGeometryCreator.cpp b/src/nbl/asset/utils/CGeometryCreator.cpp index f2a9515566..3e59d2f8c6 100644 --- a/src/nbl/asset/utils/CGeometryCreator.cpp +++ b/src/nbl/asset/utils/CGeometryCreator.cpp @@ -186,9 +186,9 @@ core::smart_refctd_ptr CGeometryCreator::createCube(const h // for now because no reliable RGB10A2 encode and scant support for 24-bit UTB formats snorm_normal_t* normals; - using UvElementT = uint8_t; - constexpr auto MaxUvVal = std::numeric_limits::max(); - hlsl::vector* uvs; + using uv_element_t = uint8_t; + constexpr auto MaxUvVal = std::numeric_limits::max(); + hlsl::vector* uvs; { { shapes::AABB<4,float32_t> aabb; @@ -209,7 +209,7 @@ core::smart_refctd_ptr CGeometryCreator::createCube(const h } { - auto uvView = createUvView(CubeUniqueVertices); + auto uvView = createUvView(CubeUniqueVertices); uvs = reinterpret_cast(uvView.src.buffer->getPointer()); retval->getAuxAttributeViews()->push_back(std::move(uvView)); } @@ -265,12 +265,12 @@ core::smart_refctd_ptr CGeometryCreator::createCube(const h snorm_positive_y, snorm_negative_y }; - const hlsl::vector uv[4] = + const hlsl::vector uv[4] = { - hlsl::vector( 0, MaxUvVal), - hlsl::vector(MaxUvVal, MaxUvVal), - hlsl::vector(MaxUvVal, 0), - hlsl::vector( 0, 0) + hlsl::vector( 0, MaxUvVal), + hlsl::vector(MaxUvVal, MaxUvVal), + hlsl::vector(MaxUvVal, 0), + hlsl::vector( 0, 0) }; for (size_t f = 0ull; f < 6ull; ++f) @@ -385,8 +385,8 @@ core::smart_refctd_ptr CGeometryCreator::createSphere(float snorm_normal_t* normals; - using UvElementT = uint16_t; - hlsl::vector* uvs; + using uv_element_t = uint16_t; + hlsl::vector* uvs; { { shapes::AABB<4, float32_t> aabb; @@ -405,7 +405,7 @@ core::smart_refctd_ptr CGeometryCreator::createSphere(float retval->setNormalView(std::move(normalView)); } { - auto uvView = createUvView(vertexCount); + auto uvView = createUvView(vertexCount); uvs = reinterpret_cast(uvView.src.buffer->getPointer()); retval->getAuxAttributeViews()->push_back(std::move(uvView)); } @@ -536,8 +536,8 @@ core::smart_refctd_ptr CGeometryCreator::createCylinder( snorm_normal_t* normals; - using UvElementT = uint16_t; - hlsl::vector* uvs; + using uv_element_t = uint16_t; + hlsl::vector* uvs; { { shapes::AABB<4, float32_t> aabb; @@ -556,7 +556,7 @@ core::smart_refctd_ptr CGeometryCreator::createCylinder( retval->setNormalView(std::move(normalView)); } { - auto uvView = createUvView(vertexCount); + auto uvView = createUvView(vertexCount); uvs = reinterpret_cast(uvView.src.buffer->getPointer()); retval->getAuxAttributeViews()->push_back(std::move(uvView)); } @@ -775,16 +775,16 @@ core::smart_refctd_ptr CGeometryCreator::createRectangle(co retval->setNormalView(std::move(normalView)); } { - using UvElementT = uint8_t; - constexpr auto MaxUvVal = std::numeric_limits::max(); - const hlsl::vector uvsData[VertexCount] = { - hlsl::vector( 0, MaxUvVal), - hlsl::vector(MaxUvVal, MaxUvVal), - hlsl::vector(MaxUvVal, 0), - hlsl::vector( 0, 0) + using uv_element_t = uint8_t; + constexpr auto MaxUvVal = std::numeric_limits::max(); + const hlsl::vector uvsData[VertexCount] = { + hlsl::vector( 0, MaxUvVal), + hlsl::vector(MaxUvVal, MaxUvVal), + hlsl::vector(MaxUvVal, 0), + hlsl::vector( 0, 0) }; - hlsl::vector* uvs; - auto uvView = createUvView(VertexCount); + hlsl::vector* uvs; + auto uvView = createUvView(VertexCount); uvs = reinterpret_cast(uvView.src.buffer->getPointer()); memcpy(uvs, uvsData, sizeof(uvsData)); retval->getAuxAttributeViews()->push_back(std::move(uvView)); @@ -814,9 +814,9 @@ core::smart_refctd_ptr CGeometryCreator::createDisk(const f // for now because no reliable RGB10A2 encode and scant support for 24-bit UTB formats snorm_normal_t* normals; // - using UvElementT = uint16_t; - constexpr uint16_t UnityUV = std::numeric_limits::max(); - hlsl::vector* uvs; + using uv_element_t = uint16_t; + constexpr uint16_t UnityUV = std::numeric_limits::max(); + hlsl::vector* uvs; { { shapes::AABB<4,float32_t> aabb; @@ -837,7 +837,7 @@ core::smart_refctd_ptr CGeometryCreator::createDisk(const f retval->setNormalView(std::move(normalView)); } { - auto uvView = createUvView(vertexCount); + auto uvView = createUvView(vertexCount); uvs = reinterpret_cast(uvView.src.buffer->getPointer()); retval->getAuxAttributeViews()->push_back(std::move(uvView)); } @@ -1878,9 +1878,9 @@ core::smart_refctd_ptr CGeometryCreator::createIcoSphere(fl }); } { - using UvElementT = uint16_t; - hlsl::vector* uvs; - auto uvView = createUvView(icosphere.getTexCoordCount()); + using uv_element_t = uint16_t; + hlsl::vector* uvs; + auto uvView = createUvView(icosphere.getTexCoordCount()); uvs = reinterpret_cast(uvView.src.buffer->getPointer()); for (auto uv_i = 0u; uv_i < icosphere.getTexCoordCount(); uv_i++) From de023238681df1b44a3f6754a85d89dada4f5631 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 18 Jul 2025 21:29:23 +0700 Subject: [PATCH 19/40] Fix bug prone constant --- src/nbl/asset/utils/CGeometryCreator.cpp | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/nbl/asset/utils/CGeometryCreator.cpp b/src/nbl/asset/utils/CGeometryCreator.cpp index 3e59d2f8c6..a867bf1277 100644 --- a/src/nbl/asset/utils/CGeometryCreator.cpp +++ b/src/nbl/asset/utils/CGeometryCreator.cpp @@ -187,7 +187,7 @@ core::smart_refctd_ptr CGeometryCreator::createCube(const h snorm_normal_t* normals; using uv_element_t = uint8_t; - constexpr auto MaxUvVal = std::numeric_limits::max(); + constexpr auto UnityUV = std::numeric_limits::max(); hlsl::vector* uvs; { { @@ -267,9 +267,9 @@ core::smart_refctd_ptr CGeometryCreator::createCube(const h }; const hlsl::vector uv[4] = { - hlsl::vector( 0, MaxUvVal), - hlsl::vector(MaxUvVal, MaxUvVal), - hlsl::vector(MaxUvVal, 0), + hlsl::vector( 0, UnityUV), + hlsl::vector(UnityUV, UnityUV), + hlsl::vector(UnityUV, 0), hlsl::vector( 0, 0) }; @@ -378,7 +378,6 @@ core::smart_refctd_ptr CGeometryCreator::createSphere(float } constexpr auto NormalCacheFormat = EF_R8G8B8_SNORM; - constexpr auto NormalFormat = EF_R8G8B8A8_SNORM; // Create vertex attributes with NONE usage because we have no clue how they'll be used hlsl::float32_t3* positions; @@ -386,6 +385,8 @@ core::smart_refctd_ptr CGeometryCreator::createSphere(float snorm_normal_t* normals; using uv_element_t = uint16_t; + constexpr auto UnityUV = std::numeric_limits::max(); + hlsl::vector* uvs; { { @@ -474,14 +475,14 @@ core::smart_refctd_ptr CGeometryCreator::createSphere(float // the vertex at the top of the sphere positions[vertex_i] = { 0.f, radius, 0.f }; - uvs[vertex_i] = { 0, 63}; + uvs[vertex_i] = { 0, UnityUV / 2}; const auto quantizedTopNormal = quantNormalCache->quantize(hlsl::float32_t3(0.f, 1.f, 0.f)); memcpy(normals + vertex_i, &quantizedTopNormal, sizeof(quantizedTopNormal)); // the vertex at the bottom of the sphere vertex_i++; positions[vertex_i] = { 0.f, -radius, 0.f }; - uvs[vertex_i] = { 63, 127}; + uvs[vertex_i] = { UnityUV / 2, UnityUV}; const auto quantizedBottomNormal = quantNormalCache->quantize(hlsl::float32_t3(0.f, -1.f, 0.f)); memcpy(normals + vertex_i, &quantizedBottomNormal, sizeof(quantizedBottomNormal)); } @@ -529,7 +530,6 @@ core::smart_refctd_ptr CGeometryCreator::createCylinder( } constexpr auto NormalCacheFormat = EF_R8G8B8_SNORM; - constexpr auto NormalFormat = EF_R8G8B8A8_SNORM; // Create vertex attributes with NONE usage because we have no clue how they'll be used hlsl::float32_t3* positions; @@ -628,7 +628,7 @@ core::smart_refctd_ptr CGeometryCreator::createCone( // Create vertex attributes with NONE usage because we have no clue how they'll be used hlsl::float32_t3* positions; - hlsl::vector* normals; + snorm_normal_t* normals; { { shapes::AABB<4, float32_t> aabb; @@ -860,7 +860,7 @@ core::smart_refctd_ptr CGeometryCreator::createDisk(const f *(uvs++) = uint16_t2(t*UnityUV+0.5f,0); } } - std::fill_n(normals,vertexCount,hlsl::vector(0,0,127,0)); + std::fill_n(normals,vertexCount, snorm_positive_z); CPolygonGeometryManipulator::recomputeContentHashes(retval.get()); return retval; From 1964b274c9d2e14f66394a08d2bf5461c83c5b01 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 18 Jul 2025 21:45:58 +0700 Subject: [PATCH 20/40] Accept tesselation as uint16_t parameter --- include/nbl/asset/utils/CGeometryCreator.h | 8 ++-- src/nbl/asset/utils/CGeometryCreator.cpp | 52 +++------------------- 2 files changed, 10 insertions(+), 50 deletions(-) diff --git a/include/nbl/asset/utils/CGeometryCreator.h b/include/nbl/asset/utils/CGeometryCreator.h index 1852b0f033..54f2664bc7 100644 --- a/include/nbl/asset/utils/CGeometryCreator.h +++ b/include/nbl/asset/utils/CGeometryCreator.h @@ -58,8 +58,8 @@ class NBL_API2 CGeometryCreator final : public core::IReferenceCounted \param colorCone color of the cone \return Generated mesh. */ - core::vector> createArrow(const uint32_t tesselationCylinder = 4, - const uint32_t tesselationCone = 8, const float height = 1.f, + core::vector> createArrow(const uint16_t tesselationCylinder = 4, + const uint16_t tesselationCone = 8, const float height = 1.f, const float cylinderHeight = 0.6f, const float widthCylinder = 0.05f, const float widthCone = 0.3f) const; @@ -85,7 +85,7 @@ class NBL_API2 CGeometryCreator final : public core::IReferenceCounted \return Generated mesh. */ core::smart_refctd_ptr createCylinder(float radius, float length, - uint32_t tesselation, + uint16_t tesselation, CQuantNormalCache* const quantNormalCacheOverride=nullptr) const; //! Create a cone mesh. @@ -98,7 +98,7 @@ class NBL_API2 CGeometryCreator final : public core::IReferenceCounted \param oblique (to be documented) \return Generated mesh. */ - core::smart_refctd_ptr createCone(float radius, float length, uint32_t tesselation, + core::smart_refctd_ptr createCone(float radius, float length, uint16_t tesselation, float oblique=0.f, CQuantNormalCache* const quantNormalCacheOverride=nullptr) const; core::smart_refctd_ptr createRectangle(const hlsl::float32_t2 size={0.5f,0.5f}) const; diff --git a/src/nbl/asset/utils/CGeometryCreator.cpp b/src/nbl/asset/utils/CGeometryCreator.cpp index a867bf1277..125554c88c 100644 --- a/src/nbl/asset/utils/CGeometryCreator.cpp +++ b/src/nbl/asset/utils/CGeometryCreator.cpp @@ -493,13 +493,13 @@ core::smart_refctd_ptr CGeometryCreator::createSphere(float core::smart_refctd_ptr CGeometryCreator::createCylinder( float radius, float length, - uint32_t tesselation, CQuantNormalCache* const quantNormalCacheOverride) const + uint16_t tesselation, CQuantNormalCache* const quantNormalCacheOverride) const { using namespace hlsl; CQuantNormalCache* const quantNormalCache = quantNormalCacheOverride == nullptr ? m_params.normalCache.get() : quantNormalCacheOverride; - const auto halfIx = static_cast(tesselation); + const auto halfIx = tesselation; const uint32_t u32_vertexCount = 2 * tesselation; if (u32_vertexCount > std::numeric_limits::max()) return nullptr; @@ -585,7 +585,7 @@ core::smart_refctd_ptr CGeometryCreator::createCylinder( } core::smart_refctd_ptr CGeometryCreator::createCone( - float radius, float length, uint32_t tesselation, + float radius, float length, uint16_t tesselation, float oblique, CQuantNormalCache* const quantNormalCacheOverride) const { @@ -690,8 +690,8 @@ core::smart_refctd_ptr CGeometryCreator::createCone( } core::vector> CGeometryCreator::createArrow( - const uint32_t tesselationCylinder, - const uint32_t tesselationCone, + const uint16_t tesselationCylinder, + const uint16_t tesselationCone, const float height, const float cylinderHeight, const float width0, @@ -880,7 +880,7 @@ class Icosphere public: using index_t = unsigned int; - Icosphere(float radius = 1.0f, int subdivision = 1, bool smooth = false) : radius(radius), subdivision(subdivision), smooth(smooth), interleavedStride(32) + Icosphere(float radius = 1.0f, int subdivision = 1, bool smooth = false) : radius(radius), subdivision(subdivision), smooth(smooth) { if (smooth) buildVerticesSmooth(); @@ -909,12 +909,6 @@ class Icosphere const unsigned int* getIndices() const { return indices.data(); } const unsigned int* getLineIndices() const { return lineIndices.data(); } - // for interleaved vertices: V/N/T - unsigned int getInterleavedVertexCount() const { return getPositionCount(); } // # of vertices - unsigned int getInterleavedVertexSize() const { return (unsigned int)interleavedVertices.size() * sizeof(float); } // # of bytes - int getInterleavedStride() const { return interleavedStride; } // should be 32 bytes - const float* getInterleavedVertices() const { return interleavedVertices.data(); } - protected: private: @@ -1092,11 +1086,6 @@ class Icosphere vertices[i] *= scale; vertices[i + 1] *= scale; vertices[i + 2] *= scale; - - // for interleaved array - interleavedVertices[j] *= scale; - interleavedVertices[j + 1] *= scale; - interleavedVertices[j + 2] *= scale; } } @@ -1260,9 +1249,6 @@ class Icosphere // subdivide icosahedron subdivideVerticesFlat(); - - // generate interleaved vertex array as well - buildInterleavedVertices(); } /* @@ -1485,8 +1471,6 @@ class Icosphere // subdivide icosahedron subdivideVerticesSmooth(); - // generate interleaved vertex array as well - buildInterleavedVertices(); } /* divide a trinage into 4 sub triangles and repeat N times @@ -1662,27 +1646,6 @@ class Icosphere stride must be 32 bytes */ - void buildInterleavedVertices() - { - core::vector().swap(interleavedVertices); - - std::size_t i, j; - std::size_t count = vertices.size(); - for (i = 0, j = 0; i < count; i += 3, j += 2) - { - interleavedVertices.push_back(vertices[i]); - interleavedVertices.push_back(vertices[i + 1]); - interleavedVertices.push_back(vertices[i + 2]); - - interleavedVertices.push_back(normals[i]); - interleavedVertices.push_back(normals[i + 1]); - interleavedVertices.push_back(normals[i + 2]); - - interleavedVertices.push_back(texCoords[j]); - interleavedVertices.push_back(texCoords[j + 1]); - } - } - void addVertex(float x, float y, float z) { vertices.push_back(x); @@ -1826,9 +1789,6 @@ class Icosphere core::vector lineIndices; std::map, uint32_t> sharedIndices; // indices of shared vertices, key is tex coord (s,t) - // interleaved - core::vector interleavedVertices; - uint32_t interleavedStride; // # of bytes to hop to the next vertex (should be 32 bytes) }; From 215723574a26539e0c234e3583729bd6fed8d012 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 18 Jul 2025 21:49:02 +0700 Subject: [PATCH 21/40] Remove reciprocal_approxim usage --- src/nbl/asset/utils/CGeometryCreator.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nbl/asset/utils/CGeometryCreator.cpp b/src/nbl/asset/utils/CGeometryCreator.cpp index 125554c88c..e2be673682 100644 --- a/src/nbl/asset/utils/CGeometryCreator.cpp +++ b/src/nbl/asset/utils/CGeometryCreator.cpp @@ -562,7 +562,7 @@ core::smart_refctd_ptr CGeometryCreator::createCylinder( } } - const float tesselationRec = core::reciprocal_approxim(static_cast(tesselation)); + const float tesselationRec = 1.f / static_cast(tesselation); const float step = 2.f * core::PI() * tesselationRec; for (uint32_t i = 0u; i < tesselation; ++i) { From 7728987aaef5749fda6cc9abb0ee0df3e279725b Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 18 Jul 2025 22:08:09 +0700 Subject: [PATCH 22/40] use hlsl::numbers instead of constant from core --- src/nbl/asset/utils/CGeometryCreator.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/nbl/asset/utils/CGeometryCreator.cpp b/src/nbl/asset/utils/CGeometryCreator.cpp index e2be673682..4c0b24a34a 100644 --- a/src/nbl/asset/utils/CGeometryCreator.cpp +++ b/src/nbl/asset/utils/CGeometryCreator.cpp @@ -450,7 +450,7 @@ core::smart_refctd_ptr CGeometryCreator::createSphere(float //if (y==0) //{ if (normal.y != -1.0f && normal.y != 1.0f) - tu = static_cast(acos(core::clamp(normal.x / sinay, -1.0, 1.0)) * 0.5 * core::RECIPROCAL_PI()); + tu = static_cast(acos(core::clamp(normal.x / sinay, -1.0, 1.0)) * 0.5 * numbers::inv_pi()); if (normal.z < 0.0f) tu = 1 - tu; //} @@ -458,7 +458,7 @@ core::smart_refctd_ptr CGeometryCreator::createSphere(float //tu = ((float*)(tmpMem+(i-polyCountXPitch)*vertexSize))[4]; positions[vertex_i] = pos; - uvs[vertex_i] = { packSnorm(tu), packSnorm(static_cast(ay * core::RECIPROCAL_PI())) }; + uvs[vertex_i] = { packSnorm(tu), packSnorm(static_cast(ay * numbers::inv_pi())) }; memcpy(normals + vertex_i, &quantizedNormal, sizeof(quantizedNormal)); vertex_i++; @@ -563,7 +563,7 @@ core::smart_refctd_ptr CGeometryCreator::createCylinder( } const float tesselationRec = 1.f / static_cast(tesselation); - const float step = 2.f * core::PI() * tesselationRec; + const float step = 2.f * numbers::pi * tesselationRec; for (uint32_t i = 0u; i < tesselation; ++i) { const auto f_i = static_cast(i); From 53f81af9da454d91d7c1973ce6e3afc921d1e08f Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 18 Jul 2025 22:11:42 +0700 Subject: [PATCH 23/40] Reorder normal calculation so no need to normalize position --- src/nbl/asset/utils/CGeometryCreator.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nbl/asset/utils/CGeometryCreator.cpp b/src/nbl/asset/utils/CGeometryCreator.cpp index 4c0b24a34a..9fe65a5ff2 100644 --- a/src/nbl/asset/utils/CGeometryCreator.cpp +++ b/src/nbl/asset/utils/CGeometryCreator.cpp @@ -568,8 +568,8 @@ core::smart_refctd_ptr CGeometryCreator::createCylinder( { const auto f_i = static_cast(i); hlsl::float32_t3 p(std::cos(f_i * step), std::sin(f_i * step), 0.f); + const auto n = quantNormalCache->quantize(p); p *= radius; - const auto n = quantNormalCache->quantize(hlsl::normalize(p)); positions[i] = { p.x, p.y, p.z }; memcpy(normals + i, &n, sizeof(n)); From e29bbf9382424219dbfcf145bdb1f5bbd881187d Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 18 Jul 2025 23:47:47 +0700 Subject: [PATCH 24/40] Remove packSnorm --- src/nbl/asset/utils/CGeometryCreator.cpp | 29 ++++++++++++++++-------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/src/nbl/asset/utils/CGeometryCreator.cpp b/src/nbl/asset/utils/CGeometryCreator.cpp index 9fe65a5ff2..78560aaaa1 100644 --- a/src/nbl/asset/utils/CGeometryCreator.cpp +++ b/src/nbl/asset/utils/CGeometryCreator.cpp @@ -29,11 +29,19 @@ namespace nbl::asset constexpr auto snorm_all_ones = hlsl::vector(snorm_one, snorm_one, snorm_one, snorm_one); -} + template + requires(std::is_same_v || std::is_same_v) + constexpr E_FORMAT get_uv_format() + { + if constexpr(std::is_same_v) + { + return EF_R8G8_UNORM; + } else + { + return EF_R16G16_UNORM; + } + } -static uint8_t packSnorm(float val) -{ - return round(hlsl::clamp(val, -1.0f, 1.0f) * 127); } template @@ -61,13 +69,13 @@ static ICPUPolygonGeometry::SDataView createUvView(size_t vertexCount) if constexpr(std::is_same_v) { retval.composed.encodedDataRange.u8 = aabb; - retval.composed.format = EF_R8G8_UNORM; + retval.composed.format = get_uv_format(); retval.composed.rangeFormat = IGeometryBase::EAABBFormat::U8_NORM; } else if constexpr(std::is_same_v) { retval.composed.encodedDataRange.u16 = aabb; - retval.composed.format = EF_R16G16_UNORM; + retval.composed.format = get_uv_format(); retval.composed.rangeFormat = IGeometryBase::EAABBFormat::U16_NORM; } @@ -458,7 +466,8 @@ core::smart_refctd_ptr CGeometryCreator::createSphere(float //tu = ((float*)(tmpMem+(i-polyCountXPitch)*vertexSize))[4]; positions[vertex_i] = pos; - uvs[vertex_i] = { packSnorm(tu), packSnorm(static_cast(ay * numbers::inv_pi())) }; + float32_t2 f32_uv = { tu, static_cast(ay * numbers::inv_pi()) }; + encodePixels(), float>(uvs + vertex_i, f32_uv.data.data); memcpy(normals + vertex_i, &quantizedNormal, sizeof(quantizedNormal)); vertex_i++; @@ -537,6 +546,7 @@ core::smart_refctd_ptr CGeometryCreator::createCylinder( snorm_normal_t* normals; using uv_element_t = uint16_t; + constexpr auto UnityUV = std::numeric_limits::max(); hlsl::vector* uvs; { { @@ -573,11 +583,12 @@ core::smart_refctd_ptr CGeometryCreator::createCylinder( positions[i] = { p.x, p.y, p.z }; memcpy(normals + i, &n, sizeof(n)); - uvs[i] = { packSnorm(f_i * tesselationRec), packSnorm(0.0) }; + float32_t2 f32_uv = { f_i * tesselationRec, 0.f }; + encodePixels(), float>(uvs + i, f32_uv.data.data); positions[i + halfIx] = { p.x, p.y, length }; normals[i + halfIx] = normals[i]; - uvs[i + halfIx] = { packSnorm(1.0f), packSnorm(0.0f) }; + uvs[i + halfIx] = { UnityUV, 0 }; } CPolygonGeometryManipulator::recomputeContentHashes(retval.get()); From b957ca78944c10c1f5a49dcaf139afa52d344beb Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 18 Jul 2025 23:50:19 +0700 Subject: [PATCH 25/40] Small impovement on Icosphere index_t --- src/nbl/asset/utils/CGeometryCreator.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nbl/asset/utils/CGeometryCreator.cpp b/src/nbl/asset/utils/CGeometryCreator.cpp index 78560aaaa1..3596a9353f 100644 --- a/src/nbl/asset/utils/CGeometryCreator.cpp +++ b/src/nbl/asset/utils/CGeometryCreator.cpp @@ -889,7 +889,7 @@ core::smart_refctd_ptr CGeometryCreator::createDisk(const f class Icosphere { public: - using index_t = unsigned int; + using index_t = uint32_t; Icosphere(float radius = 1.0f, int subdivision = 1, bool smooth = false) : radius(radius), subdivision(subdivision), smooth(smooth) { From 75d486d9578731ece3c021c1b9a2b13348bc5250 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 18 Jul 2025 23:52:34 +0700 Subject: [PATCH 26/40] Remove unnecessary method on Icosphere --- src/nbl/asset/utils/CGeometryCreator.cpp | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/nbl/asset/utils/CGeometryCreator.cpp b/src/nbl/asset/utils/CGeometryCreator.cpp index 3596a9353f..48a07f0eda 100644 --- a/src/nbl/asset/utils/CGeometryCreator.cpp +++ b/src/nbl/asset/utils/CGeometryCreator.cpp @@ -901,9 +901,7 @@ class Icosphere ~Icosphere() {} - unsigned int getPositionCount() const { return (unsigned int)vertices.size() / 3; } - unsigned int getNormalCount() const { return (unsigned int)normals.size() / 3; } - unsigned int getTexCoordCount() const { return (unsigned int)texCoords.size() / 2; } + unsigned int getVertexCount() const { return (unsigned int)vertices.size() / 3; } unsigned int getIndexCount() const { return (unsigned int)indices.size(); } unsigned int getLineIndexCount() const { return (unsigned int)lineIndices.size(); } unsigned int getTriangleCount() const { return getIndexCount() / 3; } @@ -1815,7 +1813,7 @@ core::smart_refctd_ptr CGeometryCreator::createIcoSphere(fl // Create indices { - auto indexView = createIndexView(icosphere.getIndexCount(), icosphere.getPositionCount() - 1); + auto indexView = createIndexView(icosphere.getIndexCount(), icosphere.getVertexCount() - 1); memcpy(indexView.src.buffer->getPointer(), icosphere.getIndices(), icosphere.getIndexSize()); retval->setIndexView(std::move(indexView)); } @@ -1825,7 +1823,7 @@ core::smart_refctd_ptr CGeometryCreator::createIcoSphere(fl shapes::AABB<4, float32_t> aabb; aabb.maxVx = float32_t4(radius, radius, radius, 0.f); aabb.minVx = -aabb.maxVx; - auto positionView = createPositionView(icosphere.getPositionCount(), aabb); + auto positionView = createPositionView(icosphere.getVertexCount(), aabb); memcpy(positionView.src.buffer->getPointer(), icosphere.getPositions(), icosphere.getPositionSize()); retval->setPositionView(std::move(positionView)); } @@ -1851,10 +1849,10 @@ core::smart_refctd_ptr CGeometryCreator::createIcoSphere(fl { using uv_element_t = uint16_t; hlsl::vector* uvs; - auto uvView = createUvView(icosphere.getTexCoordCount()); + auto uvView = createUvView(icosphere.getVertexCount()); uvs = reinterpret_cast(uvView.src.buffer->getPointer()); - for (auto uv_i = 0u; uv_i < icosphere.getTexCoordCount(); uv_i++) + for (auto uv_i = 0u; uv_i < icosphere.getVertexCount(); uv_i++) { const auto texCoords = icosphere.getTexCoords(); const auto f32_uv = float32_t2{ texCoords[2 * uv_i], texCoords[(2 * uv_i) + 1] }; From e0013cbedccffced44918c34bd15a97f48a02079 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Tue, 22 Jul 2025 06:47:16 +0700 Subject: [PATCH 27/40] Fix normal quantization cache --- include/nbl/asset/utils/CDirQuantCacheBase.h | 109 ++++++++++++------- 1 file changed, 69 insertions(+), 40 deletions(-) diff --git a/include/nbl/asset/utils/CDirQuantCacheBase.h b/include/nbl/asset/utils/CDirQuantCacheBase.h index df574d9f3e..302d6ae39b 100644 --- a/include/nbl/asset/utils/CDirQuantCacheBase.h +++ b/include/nbl/asset/utils/CDirQuantCacheBase.h @@ -43,13 +43,13 @@ class CDirQuantCacheBase Vector8u3() : x(0u),y(0u),z(0u) {} Vector8u3(const Vector8u3&) = default; - explicit Vector8u3(const hlsl::float32_t3& val) + explicit Vector8u3(const hlsl::uint32_t4& val) { operator=(val); } Vector8u3& operator=(const Vector8u3&) = default; - Vector8u3& operator=(const hlsl::float32_t3& val) + Vector8u3& operator=(const hlsl::uint32_t4& val) { x = val.x; y = val.y; @@ -57,9 +57,9 @@ class CDirQuantCacheBase return *this; } - hlsl::float32_t3 getValue() const + hlsl::uint32_t4 getValue() const { - return { x, y, z }; + return { x, y, z, 0 }; } @@ -75,24 +75,24 @@ class CDirQuantCacheBase Vector8u4() : x(0u),y(0u),z(0u),w(0u) {} Vector8u4(const Vector8u4&) = default; - explicit Vector8u4(const hlsl::float32_t3& val) + explicit Vector8u4(const hlsl::uint32_t4& val) { operator=(val); } Vector8u4& operator=(const Vector8u4&) = default; - Vector8u4& operator=(const hlsl::float32_t3& val) + Vector8u4& operator=(const hlsl::uint32_t4& val) { x = val.x; y = val.y; z = val.z; - w = 0; + w = val.w; return *this; } - hlsl::float32_t3 getValue() const + hlsl::uint32_t4 getValue() const { - return { x, y, z }; + return { x, y, z, w }; } private: @@ -109,17 +109,16 @@ class CDirQuantCacheBase Vector1010102() : storage(0u) {} Vector1010102(const Vector1010102&) = default; - explicit Vector1010102(const hlsl::float32_t3& val) + explicit Vector1010102(const hlsl::uint32_t4& val) { operator=(val); } Vector1010102& operator=(const Vector1010102&) = default; - Vector1010102& operator=(const hlsl::float32_t3& val) + Vector1010102& operator=(const hlsl::uint32_t4& val) { constexpr auto storageBits = quantizationBits + 1u; - hlsl::uint32_t3 u32_val = { val.x, val.y, val.z }; - storage = u32_val.x | (u32_val.y << storageBits) | (u32_val.z << (storageBits * 2u)); + storage = val.x | (val.y << storageBits) | (val.z << (storageBits * 2u)); return *this; } @@ -132,11 +131,11 @@ class CDirQuantCacheBase return storage==other.storage; } - hlsl::float32_t3 getValue() const + hlsl::uint32_t4 getValue() const { constexpr auto storageBits = quantizationBits + 1u; const auto mask = (0x1u << storageBits) - 1u; - return { storage & mask, (storage >> storageBits) & mask, (storage >> (storageBits * 2)) & mask}; + return { storage & mask, (storage >> storageBits) & mask, (storage >> (storageBits * 2)) & mask, 0}; } private: @@ -151,13 +150,13 @@ class CDirQuantCacheBase Vector16u3() : x(0u),y(0u),z(0u) {} Vector16u3(const Vector16u3&) = default; - explicit Vector16u3(const hlsl::float32_t3& val) + explicit Vector16u3(const hlsl::uint32_t4& val) { operator=(val); } Vector16u3& operator=(const Vector16u3&) = default; - Vector16u3& operator=(const hlsl::float32_t3& val) + Vector16u3& operator=(const hlsl::uint32_t4& val) { x = val.x; y = val.y; @@ -165,9 +164,9 @@ class CDirQuantCacheBase return *this; } - hlsl::float32_t3 getValue() const + hlsl::uint32_t4 getValue() const { - return { x, y, z }; + return { x, y, z, 0 }; } private: @@ -182,24 +181,24 @@ class CDirQuantCacheBase Vector16u4() : x(0u),y(0u),z(0u),w(0u) {} Vector16u4(const Vector16u4&) = default; - explicit Vector16u4(const hlsl::float32_t3& val) + explicit Vector16u4(const hlsl::uint32_t4& val) { operator=(val); } Vector16u4& operator=(const Vector16u4&) = default; - Vector16u4& operator=(const hlsl::float32_t3& val) + Vector16u4& operator=(const hlsl::uint32_t4& val) { x = val.x; y = val.y; z = val.z; - w = 0; + w = val.w; return *this; } - hlsl::float32_t3 getValue() const + hlsl::float32_t4 getValue() const { - return { x, y, z }; + return { x, y, z, w }; } private: @@ -379,11 +378,28 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl:: std::tuple...> cache; template - value_type_t quantize(const hlsl::float32_t3& value) + value_type_t quantize(const hlsl::vector& value) { - const auto negativeMask = lessThan(value, hlsl::float32_t3(0.0f)); - - const hlsl::float32_t3 absValue = abs(value); + auto to_float32_t4 = [](hlsl::vector src) -> hlsl::float32_t4 + { + if constexpr(dimensions == 1) + { + return {src.x, 0, 0, 0}; + } else if constexpr (dimensions == 2) + { + return {src.x, src.y, 0, 0}; + } else if constexpr (dimensions == 3) + { + return {src.x, src.y, src.z, 0}; + } else if constexpr (dimensions == 4) + { + return {src.x, src.y, src.z, src.w}; + } + }; + + const auto negativeMask = to_float32_t4(lessThan(value, hlsl::vector(0.0f))); + + const hlsl::vector absValue = abs(value); const auto key = Key(absValue); constexpr auto quantizationBits = quantization_bits_v; @@ -397,18 +413,31 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl:: { const auto fit = findBestFit(absValue); - quantized = abs(fit); + const auto abs_fit = to_float32_t4(abs(fit)); + quantized = hlsl::uint32_t4(abs_fit.x, abs_fit.y, abs_fit.z, abs_fit.w); + insertIntoCache(key,quantized); } } - //return quantized. - const auto negativeMulVec = hlsl::float32_t3(negativeMask.x ? -1 : 1, negativeMask.y ? -1 : 1, negativeMask.z ? -1 : 1); - return value_type_t(negativeMulVec * quantized.getValue()); + auto switch_vec = [](hlsl::uint32_t4 val1, hlsl::uint32_t4 val2, hlsl::bool4 mask) + { + hlsl::uint32_t4 retval; + retval.x = mask.x ? val2.x : val1.x; + retval.y = mask.y ? val2.y : val1.y; + retval.z = mask.z ? val2.z : val1.z; + retval.w = mask.w ? val2.w : val1.w; + return retval; + }; + + const hlsl::uint32_t4 xorflag((0x1u << (quantizationBits + 1u)) - 1u); + auto restoredAsVec = quantized.getValue() ^ switch_vec(hlsl::uint32_t4(0u), hlsl::uint32_t4(xorflag), negativeMask); + restoredAsVec += switch_vec(hlsl::uint32_t4(0u), hlsl::uint32_t4(1u), negativeMask); + return value_type_t(restoredAsVec & xorflag); } template - static inline hlsl::float32_t3 findBestFit(const hlsl::float32_t3& value) + static inline hlsl::vector findBestFit(const hlsl::vector& value) { static_assert(dimensions>1u,"No point"); static_assert(dimensions<=4u,"High Dimensions are Hard!"); @@ -416,10 +445,10 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl:: const auto vectorForDots = hlsl::normalize(value); // - hlsl::float32_t3 fittingVector; - hlsl::float32_t3 floorOffset; + hlsl::vector fittingVector; + hlsl::vector floorOffset; constexpr uint32_t cornerCount = (0x1u<<(dimensions-1u))-1u; - hlsl::float32_t3 corners[cornerCount] = {}; + hlsl::vector corners[cornerCount] = {}; { uint32_t maxDirCompIndex = 0u; for (auto i=1u; i(0.f); } fittingVector = value / maxDirectionComp; floorOffset[maxDirCompIndex] = 0.499f; @@ -453,9 +482,9 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl:: } } - hlsl::float32_t3 bestFit; + hlsl::vector bestFit; float closestTo1 = -1.f; - auto evaluateFit = [&](const hlsl::float32_t3& newFit) -> void + auto evaluateFit = [&](const hlsl::vector& newFit) -> void { auto newFitLen = length(newFit); const float dp = hlsl::dot(newFit,vectorForDots) / (newFitLen); @@ -467,7 +496,7 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl:: }; constexpr uint32_t cubeHalfSize = (0x1u << quantizationBits) - 1u; - const hlsl::float32_t3 cubeHalfSizeND = hlsl::float32_t3(cubeHalfSize); + const hlsl::vector cubeHalfSizeND = hlsl::vector(cubeHalfSize); for (uint32_t n=cubeHalfSize; n>0u; n--) { //we'd use float addition in the interest of speed, to increment the loop From 4afd07208d8c9eaf877d8367f2ab768d6468f049 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Wed, 23 Jul 2025 20:10:05 +0700 Subject: [PATCH 28/40] implement constexpr findLSB --- .../hlsl/cpp_compat/impl/intrinsics_impl.hlsl | 10 +++++-- .../builtin/hlsl/cpp_compat/intrinsics.hlsl | 2 +- include/nbl/core/util/bitflag.h | 2 +- src/nbl/asset/ECommonEnums.cpp | 29 +++++-------------- 4 files changed, 18 insertions(+), 25 deletions(-) diff --git a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl index f92dddfb26..8bfed025ce 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl @@ -348,8 +348,14 @@ requires concepts::IntegralScalar struct find_lsb_helper { using return_t = int32_t; - static inline T __call(const T arg) + NBL_CONSTEXPR_FUNC static inline T __call(const T arg) { + if constexpr (std::is_constant_evaluated()) + { + for (T ix = T(0); ix < sizeof(size_t) * 8; ix++) + if ((T(1) << ix) & arg) return ix; + return ~T(0); + } return glm::findLSB(arg); } }; @@ -369,7 +375,7 @@ requires std::is_enum_v struct find_lsb_helper { using return_t = int32_t; - static int32_t __call(NBL_CONST_REF_ARG(EnumType) val) + NBL_CONSTEXPR_FUNC static int32_t __call(NBL_CONST_REF_ARG(EnumType) val) { using underlying_t = std::underlying_type_t; return find_lsb_helper::__call(static_cast(val)); diff --git a/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl b/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl index a5747a5fb7..7198bae563 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl @@ -68,7 +68,7 @@ inline typename matrix_traits::scalar_type determinant(NBL_CONST_REF_ARG } template -inline typename cpp_compat_intrinsics_impl::find_lsb_helper::return_t findLSB(NBL_CONST_REF_ARG(T) val) +NBL_CONSTEXPR_FUNC inline typename cpp_compat_intrinsics_impl::find_lsb_helper::return_t findLSB(NBL_CONST_REF_ARG(T) val) { return cpp_compat_intrinsics_impl::find_lsb_helper::__call(val); } diff --git a/include/nbl/core/util/bitflag.h b/include/nbl/core/util/bitflag.h index 1731c0cac3..62bec57d49 100644 --- a/include/nbl/core/util/bitflag.h +++ b/include/nbl/core/util/bitflag.h @@ -60,7 +60,7 @@ namespace nbl::hlsl::cpp_compat_intrinsics_impl struct find_lsb_helper> { using return_t = int32_t; - static return_t __call(NBL_CONST_REF_ARG(core::bitflag) val) + NBL_CONSTEXPR_FUNC static return_t __call(NBL_CONST_REF_ARG(core::bitflag) val) { return find_lsb_helper::__call(val.value); } diff --git a/src/nbl/asset/ECommonEnums.cpp b/src/nbl/asset/ECommonEnums.cpp index 0f23b9b3fc..2366b25f99 100644 --- a/src/nbl/asset/ECommonEnums.cpp +++ b/src/nbl/asset/ECommonEnums.cpp @@ -3,19 +3,6 @@ namespace nbl::asset { -constexpr static int32_t findLSB(size_t val) -{ - if constexpr(std::is_constant_evaluated()) - { - for (size_t ix=0ull; ix allPreviousStages(core::bitflag stages) { struct PerStagePreviousStages @@ -49,7 +36,7 @@ core::bitflag allPreviousStages(core::bitflag(stageFlag)); + const auto bitIx = hlsl::findLSB(static_cast(stageFlag)); data[bitIx] |= previousStageFlags; } @@ -61,7 +48,7 @@ core::bitflag allPreviousStages(core::bitflag retval = PIPELINE_STAGE_FLAGS::NONE; while (bool(stages.value)) { - const auto bitIx = findLSB(static_cast(stages.value)); + const auto bitIx = hlsl::findLSB(static_cast(stages.value)); retval |= bitToAccess[bitIx]; stages ^= static_cast(0x1u< allLaterStages(core::bitflag(stageFlag)); + const auto bitIx = hlsl::findLSB(static_cast(stageFlag)); data[bitIx] |= laterStageFlags; } @@ -113,7 +100,7 @@ core::bitflag allLaterStages(core::bitflag retval = PIPELINE_STAGE_FLAGS::NONE; while (bool(stages.value)) { - const auto bitIx = findLSB(static_cast(stages.value)); + const auto bitIx = hlsl::findLSB(static_cast(stages.value)); retval |= bitToAccess[bitIx]; stages ^= static_cast(0x1u< allAccessesFromStages(core::bitflag(stageFlag)); + const auto bitIx = hlsl::findLSB(static_cast(stageFlag)); data[bitIx] = accessFlags; } @@ -193,7 +180,7 @@ core::bitflag allAccessesFromStages(core::bitflag retval = ACCESS_FLAGS::NONE; while (bool(stages.value)) { - const auto bitIx = findLSB(static_cast(stages.value)); + const auto bitIx = hlsl::findLSB(static_cast(stages.value)); retval |= bitToAccess[bitIx]; stages ^= static_cast(0x1u< allStagesFromAccesses(core::bitflag(accessFlags)); + const auto bitIx = hlsl::findLSB(static_cast(accessFlags)); data[bitIx] = stageFlags; } @@ -270,7 +257,7 @@ core::bitflag allStagesFromAccesses(core::bitflag retval = PIPELINE_STAGE_FLAGS::NONE; while (bool(accesses.value)) { - const auto bitIx = findLSB(static_cast(accesses.value)); + const auto bitIx = hlsl::findLSB(static_cast(accesses.value)); retval |= bitToStage[bitIx]; accesses ^= static_cast(0x1u< Date: Wed, 23 Jul 2025 20:10:29 +0700 Subject: [PATCH 29/40] Remove unused include --- src/nbl/asset/utils/CGeometryCreator.cpp | 108 +++++++---------------- 1 file changed, 32 insertions(+), 76 deletions(-) diff --git a/src/nbl/asset/utils/CGeometryCreator.cpp b/src/nbl/asset/utils/CGeometryCreator.cpp index 48a07f0eda..ed1788f543 100644 --- a/src/nbl/asset/utils/CGeometryCreator.cpp +++ b/src/nbl/asset/utils/CGeometryCreator.cpp @@ -5,13 +5,11 @@ #include "nbl/asset/utils/CGeometryCreator.h" #include "nbl/builtin/hlsl/tgmath.hlsl" +#include "nbl/builtin/hlsl/math/linalg/transform.hlsl" -#include -#include #include #include - namespace nbl::asset { @@ -159,6 +157,12 @@ static ICPUPolygonGeometry::SDataView createSnormNormalView(size_t normalCount, }; } +static void encodeUv(hlsl::vector* uvDst, hlsl::float32_t2 uvSrc) +{ + uint32_t u32_uv = hlsl::packUnorm2x16(uvSrc); + memcpy(uvDst, &u32_uv, sizeof(uint16_t) * 2); +} + core::smart_refctd_ptr CGeometryCreator::createCube(const hlsl::float32_t3 size) const { using namespace hlsl; @@ -458,7 +462,7 @@ core::smart_refctd_ptr CGeometryCreator::createSphere(float //if (y==0) //{ if (normal.y != -1.0f && normal.y != 1.0f) - tu = static_cast(acos(core::clamp(normal.x / sinay, -1.0, 1.0)) * 0.5 * numbers::inv_pi()); + tu = static_cast(acos(core::clamp(normal.x / sinay, -1.0, 1.0)) * 0.5 * numbers::inv_pi); if (normal.z < 0.0f) tu = 1 - tu; //} @@ -466,8 +470,7 @@ core::smart_refctd_ptr CGeometryCreator::createSphere(float //tu = ((float*)(tmpMem+(i-polyCountXPitch)*vertexSize))[4]; positions[vertex_i] = pos; - float32_t2 f32_uv = { tu, static_cast(ay * numbers::inv_pi()) }; - encodePixels(), float>(uvs + vertex_i, f32_uv.data.data); + encodeUv(uvs + vertex_i, float32_t2(tu, static_cast(ay* numbers::inv_pi))); memcpy(normals + vertex_i, &quantizedNormal, sizeof(quantizedNormal)); vertex_i++; @@ -476,7 +479,7 @@ core::smart_refctd_ptr CGeometryCreator::createSphere(float // This is the doubled vertex on the initial position positions[vertex_i] = positions[old_vertex_i]; - uvs[vertex_i] = { 127, uvs[old_vertex_i].y }; + uvs[vertex_i] = { UnityUV, uvs[old_vertex_i].y }; normals[vertex_i] = normals[old_vertex_i]; vertex_i++; @@ -583,12 +586,11 @@ core::smart_refctd_ptr CGeometryCreator::createCylinder( positions[i] = { p.x, p.y, p.z }; memcpy(normals + i, &n, sizeof(n)); - float32_t2 f32_uv = { f_i * tesselationRec, 0.f }; - encodePixels(), float>(uvs + i, f32_uv.data.data); + encodeUv(uvs + i, float32_t2(f_i * tesselationRec, 0.f)); positions[i + halfIx] = { p.x, p.y, length }; normals[i + halfIx] = normals[i]; - uvs[i + halfIx] = { UnityUV, 0 }; + uvs[i + halfIx] = { 1.f * tesselationRec, UnityUV }; } CPolygonGeometryManipulator::recomputeContentHashes(retval.get()); @@ -602,9 +604,7 @@ core::smart_refctd_ptr CGeometryCreator::createCone( using namespace hlsl; - CQuantNormalCache* const quantNormalCache = quantNormalCacheOverride == nullptr ? m_params.normalCache.get() : quantNormalCacheOverride; - - const uint32_t u32_vertexCount = 2 * tesselation; + const uint32_t u32_vertexCount = tesselation + 1; if (u32_vertexCount > std::numeric_limits::max()) return nullptr; const auto vertexCount = static_cast(u32_vertexCount); @@ -615,31 +615,25 @@ core::smart_refctd_ptr CGeometryCreator::createCone( // Create indices using index_t = uint16_t; { - constexpr uint32_t RowCount = 2u; const auto IndexCount = 3 * tesselation; auto indexView = createIndexView(IndexCount, vertexCount - 1); auto u = reinterpret_cast(indexView.src.buffer->getPointer()); - const uint32_t firstIndexOfBaseVertices = 0; - const uint32_t firstIndexOfApexVertices = tesselation; + const uint32_t apexVertexIndex = tesselation; for (uint32_t i = 0; i < tesselation; i++) { - u[i * 3] = firstIndexOfApexVertices + i; - u[(i * 3) + 1] = firstIndexOfBaseVertices + i; - u[(i * 3) + 2] = i == (tesselation - 1) ? firstIndexOfBaseVertices : firstIndexOfBaseVertices + i + 1; + u[i * 3] = apexVertexIndex; + u[(i * 3) + 1] = i; + u[(i * 3) + 2] = i == (tesselation - 1) ? 0 : i + 1; } retval->setIndexView(std::move(indexView)); } - constexpr auto NormalCacheFormat = EF_R8G8B8_SNORM; - constexpr auto NormalFormat = EF_R8G8B8A8_SNORM; - // Create vertex attributes with NONE usage because we have no clue how they'll be used hlsl::float32_t3* positions; - snorm_normal_t* normals; { { shapes::AABB<4, float32_t> aabb; @@ -649,14 +643,6 @@ core::smart_refctd_ptr CGeometryCreator::createCone( positions = reinterpret_cast(positionView.src.buffer->getPointer()); retval->setPositionView(std::move(positionView)); } - { - shapes::AABB<4, int8_t> aabb; - aabb.maxVx = snorm_all_ones; - aabb.minVx = -aabb.maxVx; - auto normalView = createSnormNormalView(vertexCount, aabb); - normals = reinterpret_cast(normalView.src.buffer->getPointer()); - retval->setNormalView(std::move(normalView)); - } } const float step = (2.f*core::PI()) / tesselation; @@ -669,38 +655,15 @@ core::smart_refctd_ptr CGeometryCreator::createCone( { hlsl::float32_t3 v(std::cos(i * step), 0.0f, std::sin(i * step)); v *= radius; - - positions[i] = { v.x, v.y, v.z }; - positions[apexVertexBase_i + i] = { apexVertexCoords.x, apexVertexCoords.y, apexVertexCoords.z }; - - const auto simdPosition = hlsl::float32_t3(positions[i].x, positions[i].y, positions[i].z); - const hlsl::float32_t3 v0ToApex = apexVertexCoords - simdPosition; - - uint32_t nextVertexIndex = i == (tesselation - 1) ? 0 : i + 1; - hlsl::float32_t3 u1 = hlsl::float32_t3(positions[nextVertexIndex].x, positions[nextVertexIndex].y, positions[nextVertexIndex].z); - u1 -= simdPosition; - float angleWeight = std::acos(hlsl::dot(hlsl::normalize(apexVertexCoords), hlsl::normalize(u1))); - u1 = hlsl::normalize(hlsl::cross(v0ToApex, u1)) * angleWeight; - - uint32_t prevVertexIndex = i == 0 ? (tesselation - 1) : i - 1; - hlsl::float32_t3 u2 = hlsl::float32_t3(positions[prevVertexIndex].x, positions[prevVertexIndex].y, positions[prevVertexIndex].z); - u2 -= simdPosition; - angleWeight = std::acos(hlsl::dot(hlsl::normalize(apexVertexCoords), hlsl::normalize(u2))); - u2 = hlsl::normalize(hlsl::cross(u2, v0ToApex)) * angleWeight; - - - const auto baseNormal = quantNormalCache->quantize(hlsl::normalize(u1 + u2)); - memcpy(normals + i, &baseNormal, sizeof(baseNormal)); - - const auto apexNormal = quantNormalCache->quantize(hlsl::normalize(u1)); - memcpy(normals + apexVertexBase_i + i, &apexNormal, sizeof(apexNormal)); + positions[i] = v; } + positions[apexVertexBase_i] = apexVertexCoords; CPolygonGeometryManipulator::recomputeContentHashes(retval.get()); return retval; } -core::vector> CGeometryCreator::createArrow( +core::smart_refctd_ptr CGeometryCreator::createArrow( const uint16_t tesselationCylinder, const uint16_t tesselationCone, const float height, @@ -711,25 +674,20 @@ core::vector> CGeometryCreator::crea { assert(height > cylinderHeight); - using position_t = hlsl::float32_t3; - auto cylinder = createCylinder(width0, cylinderHeight, tesselationCylinder); auto cone = createCone(width1, height-cylinderHeight, tesselationCone); - auto conePositions = reinterpret_cast(cone->getPositionView().src.buffer->getPointer()); - - const auto coneVertexCount = cone->getPositionView().getElementCount(); - - for (auto i = 0ull; i < coneVertexCount; ++i) - { - auto& conePosition = conePositions[i]; - core::vector3df_SIMD newPos(conePosition.x, conePosition.y, conePosition.z); - newPos.rotateYZByRAD(-1.5707963268); - - conePosition = {newPos.x, newPos.y, newPos.z}; - } - - return {cylinder, cone}; + auto collection = core::make_smart_refctd_ptr(); + auto* geometries = collection->getGeometries(); + geometries->push_back({ + .geometry = cylinder + }); + const auto coneTransform = hlsl::math::linalg::rotation_mat(-1.5707963268f, hlsl::float32_t3(1.f, 0.f, 0.f)); + geometries->push_back({ + .transform = hlsl::float32_t3x4(coneTransform), + .geometry = cone + }); + return collection; } @@ -1855,9 +1813,7 @@ core::smart_refctd_ptr CGeometryCreator::createIcoSphere(fl for (auto uv_i = 0u; uv_i < icosphere.getVertexCount(); uv_i++) { const auto texCoords = icosphere.getTexCoords(); - const auto f32_uv = float32_t2{ texCoords[2 * uv_i], texCoords[(2 * uv_i) + 1] }; - const auto u32_uv = packUnorm2x16(f32_uv); - memcpy(uvs + uv_i, &u32_uv, sizeof(u32_uv)); + encodeUv(uvs + uv_i, float32_t2(texCoords[2 * uv_i], texCoords[(2 * uv_i) + 1])); } retval->getAuxAttributeViews()->push_back(std::move(uvView)); From f8e837bc885c4ed07bde3bc1836dda0ffb3f0c29 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 25 Jul 2025 12:15:06 +0700 Subject: [PATCH 30/40] Fix createArrow to return ICPUGeometryCollection instead of vector of polygon geometry --- include/nbl/asset/utils/CGeometryCreator.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/nbl/asset/utils/CGeometryCreator.h b/include/nbl/asset/utils/CGeometryCreator.h index 54f2664bc7..52e8fb1495 100644 --- a/include/nbl/asset/utils/CGeometryCreator.h +++ b/include/nbl/asset/utils/CGeometryCreator.h @@ -11,6 +11,8 @@ // legacy, needs to be removed #include "SColor.h" +#include "nbl/asset/ICPUGeometryCollection.h" + namespace nbl::asset { @@ -58,7 +60,7 @@ class NBL_API2 CGeometryCreator final : public core::IReferenceCounted \param colorCone color of the cone \return Generated mesh. */ - core::vector> createArrow(const uint16_t tesselationCylinder = 4, + core::smart_refctd_ptr createArrow(const uint16_t tesselationCylinder = 4, const uint16_t tesselationCone = 8, const float height = 1.f, const float cylinderHeight = 0.6f, const float widthCylinder = 0.05f, const float widthCone = 0.3f) const; From 6552952083cf380330e0733db5b0f3abfa308bdf Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 25 Jul 2025 12:15:21 +0700 Subject: [PATCH 31/40] Add more ray tracing intersection query --- .../builtin/hlsl/spirv_intrinsics/raytracing.hlsl | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/include/nbl/builtin/hlsl/spirv_intrinsics/raytracing.hlsl b/include/nbl/builtin/hlsl/spirv_intrinsics/raytracing.hlsl index 3a49450d7c..41f56e225e 100644 --- a/include/nbl/builtin/hlsl/spirv_intrinsics/raytracing.hlsl +++ b/include/nbl/builtin/hlsl/spirv_intrinsics/raytracing.hlsl @@ -46,11 +46,21 @@ bool rayQueryProceedKHR([[vk::ext_reference]] RayQueryKHR query); [[vk::ext_instruction(spv::OpRayQueryGetIntersectionTypeKHR)]] int rayQueryGetIntersectionTypeKHR([[vk::ext_reference]] RayQueryKHR query, uint32_t committed); +[[vk::ext_capability(spv::CapabilityRayQueryKHR)]] +[[vk::ext_extension("SPV_KHR_ray_query")]] +[[vk::ext_instruction(spv::OpRayQueryGetIntersectionInstanceCustomIndexKHR)]] +int rayQueryGetIntersectionInstanceCustomIndexKHR([[vk::ext_reference]] RayQueryKHR query, uint32_t committed); + [[vk::ext_capability(spv::CapabilityRayQueryKHR)]] [[vk::ext_extension("SPV_KHR_ray_query")]] [[vk::ext_instruction(spv::OpRayQueryGetIntersectionInstanceIdKHR)]] int rayQueryGetIntersectionInstanceIdKHR([[vk::ext_reference]] RayQueryKHR query, uint32_t committed); +[[vk::ext_capability(spv::CapabilityRayQueryKHR)]] +[[vk::ext_extension("SPV_KHR_ray_query")]] +[[vk::ext_instruction(spv::OpRayQueryGetIntersectionGeometryIndexKHR)]] +int rayQueryGetIntersectionGeometryIndexKHR([[vk::ext_reference]] RayQueryKHR query, uint32_t committed); + [[vk::ext_capability(spv::CapabilityRayQueryKHR)]] [[vk::ext_extension("SPV_KHR_ray_query")]] [[vk::ext_instruction(spv::OpRayQueryGetIntersectionPrimitiveIndexKHR)]] @@ -61,6 +71,11 @@ int rayQueryGetIntersectionPrimitiveIndexKHR([[vk::ext_reference]] RayQueryKHR q [[vk::ext_instruction(spv::OpRayQueryGetIntersectionBarycentricsKHR)]] float2 rayQueryGetIntersectionBarycentricsKHR([[vk::ext_reference]] RayQueryKHR query, uint32_t committed); +[[vk::ext_capability(spv::CapabilityRayQueryKHR)]] +[[vk::ext_extension("SPV_KHR_ray_query")]] +[[vk::ext_instruction(spv::OpRayQueryGetIntersectionFrontFaceKHR)]] +float2 rayQueryGetIntersectionFrontFaceKHR([[vk::ext_reference]] RayQueryKHR query, uint32_t committed); + // position fetch for ray tracing uses gl_HitTriangleVertexPositionsEXT -> HitTriangleVertexPositionsKHR decorated OpVariable [[vk::ext_builtin_input(spv::BuiltInHitTriangleVertexPositionsKHR)]] static const float32_t3 HitTriangleVertexPositionsKHR[3]; From e0b30d0d58aa9de31f2a083f75ec5a4aad9c272c Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 25 Jul 2025 12:15:39 +0700 Subject: [PATCH 32/40] Add transform.hlsl to cmakelists --- src/nbl/builtin/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index fcbe58eb41..e3a59b2b50 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -290,6 +290,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/format/shared_exp.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/format.hlsl") #linear algebra LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/linalg/fast_affine.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/linalg/transform.hlsl") # TODO: rename `equations` to `polynomials` probably LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/functions.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/geometry.hlsl") From c6dd9ac8d0c74db8f4d1564ea85d71cc17667601 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 25 Jul 2025 13:19:29 +0700 Subject: [PATCH 33/40] Transform data minimum alignment fix --- include/nbl/asset/IAccelerationStructure.h | 2 ++ src/nbl/video/utilities/CAssetConverter.cpp | 6 +++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/include/nbl/asset/IAccelerationStructure.h b/include/nbl/asset/IAccelerationStructure.h index 829d10bcd8..6caa63ddfa 100644 --- a/include/nbl/asset/IAccelerationStructure.h +++ b/include/nbl/asset/IAccelerationStructure.h @@ -23,6 +23,8 @@ namespace nbl::asset class IAccelerationStructure : public virtual core::IReferenceCounted { public: + static constexpr inline size_t TRANSFORM_DATA_MIN_ALIGNMENT = 16; + // build flags, we don't expose flags that don't make sense for certain levels enum class BUILD_FLAGS : uint8_t { diff --git a/src/nbl/video/utilities/CAssetConverter.cpp b/src/nbl/video/utilities/CAssetConverter.cpp index d004660e42..bd64912bfc 100644 --- a/src/nbl/video/utilities/CAssetConverter.cpp +++ b/src/nbl/video/utilities/CAssetConverter.cpp @@ -3024,7 +3024,7 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult uint16_t alignment = hlsl::max(0x1u<(alignof(float),alignment); } uint16_t indexSize = 0; @@ -5061,7 +5061,7 @@ ISemaphore::future_t CAssetConverter::convert_impl(SReserveResul uint16_t alignment = hlsl::max(0x1u<(alignof(float),alignment); } uint16_t indexSize = 0u; @@ -5265,7 +5265,7 @@ ISemaphore::future_t CAssetConverter::convert_impl(SReserveResul } if (geom.hasTransform()) { - offset = core::alignUp(offset,alignof(float)); + offset = core::alignUp(offset, IAccelerationStructure::TRANSFORM_DATA_MIN_ALIGNMENT); outGeom.transform = {.offset=offset,.buffer=smart_refctd_ptr(scratchBuffer)}; memcpyCallback.data = &geom.transform; if (!streamDataToScratch(offset,sizeof(geom.transform),memcpyCallback)) From 86dae5588894d664a494bbc12c3635783720b29e Mon Sep 17 00:00:00 2001 From: kevyuu Date: Fri, 25 Jul 2025 17:33:54 +0700 Subject: [PATCH 34/40] Fix after merge with master --- src/nbl/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/src/nbl/CMakeLists.txt b/src/nbl/CMakeLists.txt index 15c8237585..c567a5b4b7 100755 --- a/src/nbl/CMakeLists.txt +++ b/src/nbl/CMakeLists.txt @@ -151,6 +151,7 @@ set(NBL_UI_SOURCES ) set(NBL_ASSET_SOURCES # Assets + asset/ECommonEnums.cpp asset/IAsset.cpp asset/IRenderpass.cpp asset/IAssetManager.cpp From c7caf761e3387dfd019c32bd0bc467b6701508ee Mon Sep 17 00:00:00 2001 From: kevyuu Date: Sat, 26 Jul 2025 17:26:01 +0700 Subject: [PATCH 35/40] Fix uninitialized bug in quantization cache --- include/nbl/asset/utils/CDirQuantCacheBase.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/nbl/asset/utils/CDirQuantCacheBase.h b/include/nbl/asset/utils/CDirQuantCacheBase.h index 302d6ae39b..d8f6f7b28c 100644 --- a/include/nbl/asset/utils/CDirQuantCacheBase.h +++ b/include/nbl/asset/utils/CDirQuantCacheBase.h @@ -496,12 +496,13 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl:: }; constexpr uint32_t cubeHalfSize = (0x1u << quantizationBits) - 1u; + const auto test = core::vectorSIMDf(cubeHalfSize); const hlsl::vector cubeHalfSizeND = hlsl::vector(cubeHalfSize); for (uint32_t n=cubeHalfSize; n>0u; n--) { //we'd use float addition in the interest of speed, to increment the loop //but adding a small number to a large one loses precision, so multiplication preferrable - const auto bottomFit = floor(fittingVector * float(n) + floorOffset); + const auto bottomFit = glm::floor(fittingVector * float(n) + floorOffset); if (hlsl::all(glm::lessThanEqual(bottomFit, cubeHalfSizeND))) evaluateFit(bottomFit); for (auto i = 0u; i < cornerCount; i++) From ae5a7553c71872283968891183782e77ff0cb14c Mon Sep 17 00:00:00 2001 From: kevyuu Date: Sat, 26 Jul 2025 17:26:13 +0700 Subject: [PATCH 36/40] Add some comment in quantization cache --- include/nbl/asset/utils/CDirQuantCacheBase.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/include/nbl/asset/utils/CDirQuantCacheBase.h b/include/nbl/asset/utils/CDirQuantCacheBase.h index d8f6f7b28c..d8e01ed02f 100644 --- a/include/nbl/asset/utils/CDirQuantCacheBase.h +++ b/include/nbl/asset/utils/CDirQuantCacheBase.h @@ -429,11 +429,16 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl:: retval.w = mask.w ? val2.w : val1.w; return retval; }; - +; + // create all one bits const hlsl::uint32_t4 xorflag((0x1u << (quantizationBits + 1u)) - 1u); + + // for positive number xoring with 0 keep its value + // for negative number we xor with all one which will flip the bits, then we add one later. Flipping the bits then adding one will turn positive number into negative number auto restoredAsVec = quantized.getValue() ^ switch_vec(hlsl::uint32_t4(0u), hlsl::uint32_t4(xorflag), negativeMask); restoredAsVec += switch_vec(hlsl::uint32_t4(0u), hlsl::uint32_t4(1u), negativeMask); - return value_type_t(restoredAsVec & xorflag); + + return value_type_t(restoredAsVec); } template @@ -446,7 +451,7 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl:: // hlsl::vector fittingVector; - hlsl::vector floorOffset; + hlsl::vector floorOffset = {}; constexpr uint32_t cornerCount = (0x1u<<(dimensions-1u))-1u; hlsl::vector corners[cornerCount] = {}; { From 02d6c6dc8c9200225f35f8c292fb0edf227503cb Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 28 Jul 2025 13:59:01 +0700 Subject: [PATCH 37/40] Fix indentation in CGeometryCreator.cpp --- src/nbl/asset/utils/CGeometryCreator.cpp | 108 +++++++++++------------ 1 file changed, 54 insertions(+), 54 deletions(-) diff --git a/src/nbl/asset/utils/CGeometryCreator.cpp b/src/nbl/asset/utils/CGeometryCreator.cpp index ed1788f543..d844dca857 100644 --- a/src/nbl/asset/utils/CGeometryCreator.cpp +++ b/src/nbl/asset/utils/CGeometryCreator.cpp @@ -18,44 +18,44 @@ namespace nbl::asset using snorm_normal_t = hlsl::vector; constexpr int8_t snorm_one = std::numeric_limits::max(); constexpr int8_t snorm_neg_one = std::numeric_limits::min(); - constexpr auto snorm_positive_x = hlsl::vector(snorm_one, 0, 0, 0); - constexpr auto snorm_negative_x = hlsl::vector(snorm_neg_one, 0, 0, 0); + constexpr auto snorm_positive_x = hlsl::vector(snorm_one, 0, 0, 0); + constexpr auto snorm_negative_x = hlsl::vector(snorm_neg_one, 0, 0, 0); constexpr auto snorm_positive_y = hlsl::vector(0, snorm_one, 0, 0); constexpr auto snorm_negative_y = hlsl::vector(0, snorm_neg_one, 0, 0); - constexpr auto snorm_positive_z = hlsl::vector(0, 0, snorm_one, 0); + constexpr auto snorm_positive_z = hlsl::vector(0, 0, snorm_one, 0); constexpr auto snorm_negative_z = hlsl::vector(0, 0, snorm_neg_one, 0); constexpr auto snorm_all_ones = hlsl::vector(snorm_one, snorm_one, snorm_one, snorm_one); - template - requires(std::is_same_v || std::is_same_v) + template + requires(std::is_same_v || std::is_same_v) constexpr E_FORMAT get_uv_format() - { - if constexpr(std::is_same_v) - { + { + if constexpr(std::is_same_v) + { return EF_R8G8_UNORM; - } else - { + } else + { return EF_R16G16_UNORM; - } - } + } + } } template - requires(std::is_same_v || std::is_same_v) + requires(std::is_same_v || std::is_same_v) static ICPUPolygonGeometry::SDataView createUvView(size_t vertexCount) { const auto elementCount = 2; const auto attrSize = sizeof(ElementT) * elementCount; - auto buff = ICPUBuffer::create({{attrSize * vertexCount,IBuffer::EUF_NONE}}); - hlsl::shapes::AABB<4, ElementT> aabb; - aabb.minVx = hlsl::vector(0,0,0,0); - aabb.maxVx = hlsl::vector(std::numeric_limits::max(), std::numeric_limits::max(), 0, 0); + auto buff = ICPUBuffer::create({{attrSize * vertexCount,IBuffer::EUF_NONE}}); + hlsl::shapes::AABB<4, ElementT> aabb; + aabb.minVx = hlsl::vector(0,0,0,0); + aabb.maxVx = hlsl::vector(std::numeric_limits::max(), std::numeric_limits::max(), 0, 0); auto retval = ICPUPolygonGeometry::SDataView{ .composed = { - .stride = attrSize, + .stride = attrSize, }, .src = { .offset = 0, @@ -81,22 +81,22 @@ static ICPUPolygonGeometry::SDataView createUvView(size_t vertexCount) } template - requires(std::is_same_v || std::is_same_v) + requires(std::is_same_v || std::is_same_v) static ICPUPolygonGeometry::SDataView createIndexView(size_t indexCount, size_t maxIndex) { - - const auto bytesize = sizeof(IndexT) * indexCount; - auto indices = ICPUBuffer::create({bytesize,IBuffer::EUF_INDEX_BUFFER_BIT}); + + const auto bytesize = sizeof(IndexT) * indexCount; + auto indices = ICPUBuffer::create({bytesize,IBuffer::EUF_INDEX_BUFFER_BIT}); - hlsl::shapes::AABB<4,IndexT> aabb; - aabb.minVx[0] = 0; - aabb.maxVx[0] = maxIndex; + hlsl::shapes::AABB<4,IndexT> aabb; + aabb.minVx[0] = 0; + aabb.maxVx[0] = maxIndex; auto retval = ICPUPolygonGeometry::SDataView{ - .composed = { - .stride = sizeof(IndexT), - }, - .src = {.offset = 0,.size = bytesize,.buffer = std::move(indices)}, + .composed = { + .stride = sizeof(IndexT), + }, + .src = {.offset = 0,.size = bytesize,.buffer = std::move(indices)}, }; if constexpr(std::is_same_v) @@ -116,20 +116,20 @@ static ICPUPolygonGeometry::SDataView createIndexView(size_t indexCount, size_t } template - requires(ElementCountV > 0 && ElementCountV <= 4) + requires(ElementCountV > 0 && ElementCountV <= 4) static ICPUPolygonGeometry::SDataView createPositionView(size_t positionCount, const hlsl::shapes::AABB<4, hlsl::float32_t>& aabb) { using position_t = hlsl::vector; constexpr auto AttrSize = sizeof(position_t); - auto buff = ICPUBuffer::create({AttrSize * positionCount,IBuffer::EUF_NONE}); + auto buff = ICPUBuffer::create({AttrSize * positionCount,IBuffer::EUF_NONE}); constexpr auto format = []() { - if constexpr (ElementCountV == 1) return EF_R32_SFLOAT; - if constexpr (ElementCountV == 2) return EF_R32G32_SFLOAT; - if constexpr (ElementCountV == 3) return EF_R32G32B32_SFLOAT; - if constexpr (ElementCountV == 4) return EF_R32G32B32A32_SFLOAT; - }(); + if constexpr (ElementCountV == 1) return EF_R32_SFLOAT; + if constexpr (ElementCountV == 2) return EF_R32G32_SFLOAT; + if constexpr (ElementCountV == 3) return EF_R32G32B32_SFLOAT; + if constexpr (ElementCountV == 4) return EF_R32G32B32A32_SFLOAT; + }(); return { .composed = { @@ -145,22 +145,22 @@ static ICPUPolygonGeometry::SDataView createPositionView(size_t positionCount, c static ICPUPolygonGeometry::SDataView createSnormNormalView(size_t normalCount, const hlsl::shapes::AABB<4, int8_t>& aabb) { constexpr auto AttrSize = sizeof(snorm_normal_t); - auto buff = ICPUBuffer::create({AttrSize * normalCount,IBuffer::EUF_NONE}); - return { - .composed = { - .encodedDataRange = {.s8=aabb}, - .stride = AttrSize, - .format = EF_R8G8B8A8_SNORM, - .rangeFormat = IGeometryBase::EAABBFormat::S8_NORM - }, - .src = {.offset=0,.size=buff->getSize(),.buffer=std::move(buff)} - }; + auto buff = ICPUBuffer::create({AttrSize * normalCount,IBuffer::EUF_NONE}); + return { + .composed = { + .encodedDataRange = {.s8=aabb}, + .stride = AttrSize, + .format = EF_R8G8B8A8_SNORM, + .rangeFormat = IGeometryBase::EAABBFormat::S8_NORM + }, + .src = {.offset=0,.size=buff->getSize(),.buffer=std::move(buff)} + }; } static void encodeUv(hlsl::vector* uvDst, hlsl::float32_t2 uvSrc) { - uint32_t u32_uv = hlsl::packUnorm2x16(uvSrc); - memcpy(uvDst, &u32_uv, sizeof(uint16_t) * 2); + uint32_t u32_uv = hlsl::packUnorm2x16(uvSrc); + memcpy(uvDst, &u32_uv, sizeof(uint16_t) * 2); } core::smart_refctd_ptr CGeometryCreator::createCube(const hlsl::float32_t3 size) const @@ -221,7 +221,7 @@ core::smart_refctd_ptr CGeometryCreator::createCube(const h } { - auto uvView = createUvView(CubeUniqueVertices); + auto uvView = createUvView(CubeUniqueVertices); uvs = reinterpret_cast(uvView.src.buffer->getPointer()); retval->getAuxAttributeViews()->push_back(std::move(uvView)); } @@ -657,7 +657,7 @@ core::smart_refctd_ptr CGeometryCreator::createCone( v *= radius; positions[i] = v; } - positions[apexVertexBase_i] = apexVertexCoords; + positions[apexVertexBase_i] = apexVertexCoords; CPolygonGeometryManipulator::recomputeContentHashes(retval.get()); return retval; @@ -681,13 +681,13 @@ core::smart_refctd_ptr CGeometryCreator::createArrow( auto* geometries = collection->getGeometries(); geometries->push_back({ .geometry = cylinder - }); + }); const auto coneTransform = hlsl::math::linalg::rotation_mat(-1.5707963268f, hlsl::float32_t3(1.f, 0.f, 0.f)); geometries->push_back({ .transform = hlsl::float32_t3x4(coneTransform), .geometry = cone - }); - return collection; + }); + return collection; } @@ -1806,7 +1806,7 @@ core::smart_refctd_ptr CGeometryCreator::createIcoSphere(fl } { using uv_element_t = uint16_t; - hlsl::vector* uvs; + hlsl::vector* uvs; auto uvView = createUvView(icosphere.getVertexCount()); uvs = reinterpret_cast(uvView.src.buffer->getPointer()); From bb45773371627c13adac4e5fb71599ee030df669 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 28 Jul 2025 14:03:18 +0700 Subject: [PATCH 38/40] Fix indentation of CDirQuantCacheBase.h --- include/nbl/asset/utils/CDirQuantCacheBase.h | 60 ++++++++++---------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/include/nbl/asset/utils/CDirQuantCacheBase.h b/include/nbl/asset/utils/CDirQuantCacheBase.h index d8e01ed02f..5598364ba6 100644 --- a/include/nbl/asset/utils/CDirQuantCacheBase.h +++ b/include/nbl/asset/utils/CDirQuantCacheBase.h @@ -57,7 +57,7 @@ class CDirQuantCacheBase return *this; } - hlsl::uint32_t4 getValue() const + hlsl::uint32_t4 getValue() const { return { x, y, z, 0 }; } @@ -90,7 +90,7 @@ class CDirQuantCacheBase return *this; } - hlsl::uint32_t4 getValue() const + hlsl::uint32_t4 getValue() const { return { x, y, z, w }; } @@ -131,7 +131,7 @@ class CDirQuantCacheBase return storage==other.storage; } - hlsl::uint32_t4 getValue() const + hlsl::uint32_t4 getValue() const { constexpr auto storageBits = quantizationBits + 1u; const auto mask = (0x1u << storageBits) - 1u; @@ -164,7 +164,7 @@ class CDirQuantCacheBase return *this; } - hlsl::uint32_t4 getValue() const + hlsl::uint32_t4 getValue() const { return { x, y, z, 0 }; } @@ -196,7 +196,7 @@ class CDirQuantCacheBase return *this; } - hlsl::float32_t4 getValue() const + hlsl::float32_t4 getValue() const { return { x, y, z, w }; } @@ -381,21 +381,21 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl:: value_type_t quantize(const hlsl::vector& value) { auto to_float32_t4 = [](hlsl::vector src) -> hlsl::float32_t4 - { - if constexpr(dimensions == 1) - { - return {src.x, 0, 0, 0}; - } else if constexpr (dimensions == 2) - { - return {src.x, src.y, 0, 0}; - } else if constexpr (dimensions == 3) - { - return {src.x, src.y, src.z, 0}; - } else if constexpr (dimensions == 4) - { - return {src.x, src.y, src.z, src.w}; - } - }; + { + if constexpr(dimensions == 1) + { + return {src.x, 0, 0, 0}; + } else if constexpr (dimensions == 2) + { + return {src.x, src.y, 0, 0}; + } else if constexpr (dimensions == 3) + { + return {src.x, src.y, src.z, 0}; + } else if constexpr (dimensions == 4) + { + return {src.x, src.y, src.z, src.w}; + } + }; const auto negativeMask = to_float32_t4(lessThan(value, hlsl::vector(0.0f))); @@ -414,31 +414,31 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl:: const auto fit = findBestFit(absValue); const auto abs_fit = to_float32_t4(abs(fit)); - quantized = hlsl::uint32_t4(abs_fit.x, abs_fit.y, abs_fit.z, abs_fit.w); + quantized = hlsl::uint32_t4(abs_fit.x, abs_fit.y, abs_fit.z, abs_fit.w); insertIntoCache(key,quantized); } } auto switch_vec = [](hlsl::uint32_t4 val1, hlsl::uint32_t4 val2, hlsl::bool4 mask) - { + { hlsl::uint32_t4 retval; retval.x = mask.x ? val2.x : val1.x; retval.y = mask.y ? val2.y : val1.y; retval.z = mask.z ? val2.z : val1.z; retval.w = mask.w ? val2.w : val1.w; return retval; - }; + }; ; - // create all one bits - const hlsl::uint32_t4 xorflag((0x1u << (quantizationBits + 1u)) - 1u); + // create all one bits + const hlsl::uint32_t4 xorflag((0x1u << (quantizationBits + 1u)) - 1u); - // for positive number xoring with 0 keep its value - // for negative number we xor with all one which will flip the bits, then we add one later. Flipping the bits then adding one will turn positive number into negative number - auto restoredAsVec = quantized.getValue() ^ switch_vec(hlsl::uint32_t4(0u), hlsl::uint32_t4(xorflag), negativeMask); - restoredAsVec += switch_vec(hlsl::uint32_t4(0u), hlsl::uint32_t4(1u), negativeMask); + // for positive number xoring with 0 keep its value + // for negative number we xor with all one which will flip the bits, then we add one later. Flipping the bits then adding one will turn positive number into negative number + auto restoredAsVec = quantized.getValue() ^ switch_vec(hlsl::uint32_t4(0u), hlsl::uint32_t4(xorflag), negativeMask); + restoredAsVec += switch_vec(hlsl::uint32_t4(0u), hlsl::uint32_t4(1u), negativeMask); - return value_type_t(restoredAsVec); + return value_type_t(restoredAsVec); } template From 1bedf2dc2acbbf4e36f39f8b7f9bd85158100998 Mon Sep 17 00:00:00 2001 From: kevyuu Date: Mon, 28 Jul 2025 14:04:31 +0700 Subject: [PATCH 39/40] Fix indentation of CGeometryCreator.h --- include/nbl/asset/utils/CGeometryCreator.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/nbl/asset/utils/CGeometryCreator.h b/include/nbl/asset/utils/CGeometryCreator.h index 52e8fb1495..bd5281cde3 100644 --- a/include/nbl/asset/utils/CGeometryCreator.h +++ b/include/nbl/asset/utils/CGeometryCreator.h @@ -60,7 +60,7 @@ class NBL_API2 CGeometryCreator final : public core::IReferenceCounted \param colorCone color of the cone \return Generated mesh. */ - core::smart_refctd_ptr createArrow(const uint16_t tesselationCylinder = 4, + core::smart_refctd_ptr createArrow(const uint16_t tesselationCylinder = 4, const uint16_t tesselationCone = 8, const float height = 1.f, const float cylinderHeight = 0.6f, const float widthCylinder = 0.05f, const float widthCone = 0.3f) const; From 5de6b84b35226333db92b6da17add9d045d847ca Mon Sep 17 00:00:00 2001 From: kevyuu Date: Wed, 30 Jul 2025 18:06:07 +0700 Subject: [PATCH 40/40] Move creationFlags to cached params --- include/nbl/asset/IRayTracingPipeline.h | 26 ++++++++++----------- include/nbl/video/IGPUComputePipeline.h | 4 ++++ include/nbl/video/IGPUGraphicsPipeline.h | 4 ++++ include/nbl/video/IGPURayTracingPipeline.h | 18 +++++++------- include/nbl/video/ILogicalDevice.h | 2 +- src/nbl/video/CVulkanLogicalDevice.cpp | 2 +- src/nbl/video/ILogicalDevice.cpp | 4 ++-- src/nbl/video/utilities/CAssetConverter.cpp | 8 +++---- 8 files changed, 38 insertions(+), 30 deletions(-) diff --git a/include/nbl/asset/IRayTracingPipeline.h b/include/nbl/asset/IRayTracingPipeline.h index b97d8d7002..c0d8d98ce5 100644 --- a/include/nbl/asset/IRayTracingPipeline.h +++ b/include/nbl/asset/IRayTracingPipeline.h @@ -14,18 +14,6 @@ namespace nbl::asset class IRayTracingPipelineBase : public virtual core::IReferenceCounted { public: - struct SCachedCreationParams final - { - uint32_t maxRecursionDepth : 6 = 0; - uint32_t dynamicStackSize : 1 = false; - }; -}; - -template -class IRayTracingPipeline : public IPipeline, public IRayTracingPipelineBase -{ - public: - #define base_flag(F) static_cast(IPipelineBase::FLAGS::F) enum class CreationFlags : uint64_t { @@ -43,7 +31,19 @@ class IRayTracingPipeline : public IPipeline, public IRayTra ALLOW_MOTION = 1<<20, }; #undef base_flag - using FLAGS = CreationFlags; + + struct SCachedCreationParams final + { + core::bitflag flags = CreationFlags::NONE; + uint32_t maxRecursionDepth : 6 = 0; + uint32_t dynamicStackSize : 1 = false; + }; +}; + +template +class IRayTracingPipeline : public IPipeline, public IRayTracingPipelineBase +{ + public: inline const SCachedCreationParams& getCachedCreationParams() const { return m_params; } diff --git a/include/nbl/video/IGPUComputePipeline.h b/include/nbl/video/IGPUComputePipeline.h index c7343c131a..9854725cd1 100644 --- a/include/nbl/video/IGPUComputePipeline.h +++ b/include/nbl/video/IGPUComputePipeline.h @@ -74,6 +74,10 @@ class IGPUComputePipeline : public IGPUPipeline& getFlags() { return flags; } + + inline core::bitflag getFlags() const { return flags; } + const IGPUPipelineLayout* layout = nullptr; // TODO: Could guess the required flags from SPIR-V introspection of declared caps core::bitflag flags = FLAGS::NONE; diff --git a/include/nbl/video/IGPUGraphicsPipeline.h b/include/nbl/video/IGPUGraphicsPipeline.h index e5dc7c5d7b..79e1337787 100644 --- a/include/nbl/video/IGPUGraphicsPipeline.h +++ b/include/nbl/video/IGPUGraphicsPipeline.h @@ -87,6 +87,10 @@ class IGPUGraphicsPipeline : public IGPUPipeline& getFlags() { return flags; } + + inline core::bitflag getFlags() const { return flags; } + const IGPUPipelineLayout* layout = nullptr; SShaderSpecInfo vertexShader; SShaderSpecInfo tesselationControlShader; diff --git a/include/nbl/video/IGPURayTracingPipeline.h b/include/nbl/video/IGPURayTracingPipeline.h index ecdc529542..816cc68243 100644 --- a/include/nbl/video/IGPURayTracingPipeline.h +++ b/include/nbl/video/IGPURayTracingPipeline.h @@ -24,7 +24,7 @@ class IGPURayTracingPipeline : public IGPUPipeline { - using FLAGS = pipeline_t::FLAGS; + using FLAGS = IRayTracingPipelineBase::CreationFlags; struct SShaderGroupsParams { @@ -45,8 +45,6 @@ class IGPURayTracingPipeline : public IGPUPipeline flags = FLAGS::NONE; inline SSpecializationValidationResult valid() const { @@ -76,7 +74,7 @@ class IGPURayTracingPipeline : public IGPUPipeline& getFlags() { return cached.flags; } + + inline core::bitflag getFlags() const { return cached.flags; } + }; struct SShaderGroupHandle @@ -153,7 +155,7 @@ class IGPURayTracingPipeline : public IGPUPipeline getCreationFlags() const { return m_flags; } + inline core::bitflag getCreationFlags() const { return getCachedCreationParams().flags; } // Vulkan: const VkPipeline* virtual const void* getNativeHandle() const = 0; @@ -170,13 +172,11 @@ class IGPURayTracingPipeline : public IGPUPipeline(params.layout->getOriginDevice()), params.layout, params.cached), - m_flags(params.flags) + IGPURayTracingPipeline(const SCreationParams& params) : IGPUPipeline(core::smart_refctd_ptr(params.layout->getOriginDevice()), params.layout, params.cached) {} virtual ~IGPURayTracingPipeline() = default; - const core::bitflag m_flags; }; } diff --git a/include/nbl/video/ILogicalDevice.h b/include/nbl/video/ILogicalDevice.h index 6298afeb27..180342e2d4 100644 --- a/include/nbl/video/ILogicalDevice.h +++ b/include/nbl/video/ILogicalDevice.h @@ -1258,7 +1258,7 @@ class NBL_API2 ILogicalDevice : public core::IReferenceCounted, public IDeviceMe } } // https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkComputePipelineCreateInfo.html#VUID-VkComputePipelineCreateInfo-flags-07985 - else if (ci.basePipelineIndex < -1 || ci.basePipelineIndex >= i || ci.basePipelineIndex >= 0 && !params[ci.basePipelineIndex].flags.hasFlags(AllowDerivativesFlag)) + else if (ci.basePipelineIndex < -1 || ci.basePipelineIndex >= i || ci.basePipelineIndex >= 0 && !params[ci.basePipelineIndex].getFlags().hasFlags(AllowDerivativesFlag)) { NBL_LOG_ERROR("Invalid basePipeline was specified (params[%d])", i); return {}; diff --git a/src/nbl/video/CVulkanLogicalDevice.cpp b/src/nbl/video/CVulkanLogicalDevice.cpp index 86eaa4fd51..e1c5d89da8 100644 --- a/src/nbl/video/CVulkanLogicalDevice.cpp +++ b/src/nbl/video/CVulkanLogicalDevice.cpp @@ -1132,7 +1132,7 @@ template void initPipelineCreateInfo(VkPipelineCreateInfo_t* vk_info, const SCreationParams& info) { // the new flags type (64bit) is only available with maintenance5 - vk_info->flags = static_cast(info.flags.value); + vk_info->flags = static_cast(info.getFlags().value); vk_info->layout = static_cast(info.layout)->getInternalObject(); if (info.isDerivative()) { diff --git a/src/nbl/video/ILogicalDevice.cpp b/src/nbl/video/ILogicalDevice.cpp index a21e00e303..c1b4174541 100644 --- a/src/nbl/video/ILogicalDevice.cpp +++ b/src/nbl/video/ILogicalDevice.cpp @@ -1025,8 +1025,8 @@ bool ILogicalDevice::createRayTracingPipelines(IGPUPipelineCache* const pipeline for (const auto& param : params) { - const bool skipAABBs = bool(param.flags & IGPURayTracingPipeline::SCreationParams::FLAGS::SKIP_AABBS); - const bool skipBuiltin = bool(param.flags & IGPURayTracingPipeline::SCreationParams::FLAGS::SKIP_BUILT_IN_PRIMITIVES); + const bool skipAABBs = bool(param.getFlags() & IGPURayTracingPipeline::SCreationParams::FLAGS::SKIP_AABBS); + const bool skipBuiltin = bool(param.getFlags() & IGPURayTracingPipeline::SCreationParams::FLAGS::SKIP_BUILT_IN_PRIMITIVES); if (!features.rayTracingPipeline) { diff --git a/src/nbl/video/utilities/CAssetConverter.cpp b/src/nbl/video/utilities/CAssetConverter.cpp index 935c8bf4aa..b0eb0a23d5 100644 --- a/src/nbl/video/utilities/CAssetConverter.cpp +++ b/src/nbl/video/utilities/CAssetConverter.cpp @@ -3676,6 +3676,7 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult hitGroups[hitGroup_i].intersection = GPUShaderSpecInfo::create(visitor.hitGroups.intersections[hitGroup_i], &intersectionEntryMaps[hitGroup_i]); } params.shaderGroups.hits = hitGroups; + params.cached = asset->getCachedCreationParams(); using RayTracingFlags = IGPURayTracingPipeline::SCreationParams::FLAGS; const auto isNullSpecInfo = [](const ICPUPipelineBase::SShaderSpecInfo& specInfo) @@ -3686,19 +3687,18 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult visitor.misses.begin(), visitor.misses.end(), isNullSpecInfo); - if (noNullMiss) params.flags |= RayTracingFlags::NO_NULL_MISS_SHADERS; + if (noNullMiss) params.cached.flags |= RayTracingFlags::NO_NULL_MISS_SHADERS; const auto noNullClosestHit = std::none_of( visitor.hitGroups.closestHits.begin(), visitor.hitGroups.closestHits.end(), isNullSpecInfo); - if (noNullClosestHit) params.flags |= RayTracingFlags::NO_NULL_CLOSEST_HIT_SHADERS; + if (noNullClosestHit) params.cached.flags |= RayTracingFlags::NO_NULL_CLOSEST_HIT_SHADERS; const auto noNullAnyHit = std::none_of( visitor.hitGroups.anyHits.begin(), visitor.hitGroups.anyHits.end(), isNullSpecInfo); - if (noNullAnyHit) params.flags |= RayTracingFlags::NO_NULL_ANY_HIT_SHADERS; + if (noNullAnyHit) params.cached.flags |= RayTracingFlags::NO_NULL_ANY_HIT_SHADERS; - params.cached = asset->getCachedCreationParams(); device->createRayTracingPipelines(inputs.pipelineCache, {¶ms, 1}, &ppln); conversionRequests.assign(entry.first, entry.second.firstCopyIx, i, std::move(ppln)); }