diff --git a/include/nbl/asset/ECommonEnums.h b/include/nbl/asset/ECommonEnums.h index c07a0ced6a..f830b270d3 100644 --- a/include/nbl/asset/ECommonEnums.h +++ b/include/nbl/asset/ECommonEnums.h @@ -185,292 +185,13 @@ struct SMemoryBarrier } }; -inline core::bitflag allPreviousStages(core::bitflag stages) -{ - struct PerStagePreviousStages - { - public: - constexpr PerStagePreviousStages() - { - // set all stage to have itself as their previous stages - for (auto i = 0; i < std::numeric_limits::digits; i++) - data[i] = static_cast(i); - - add(PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT); - - add(PIPELINE_STAGE_FLAGS::RAY_TRACING_SHADER_BIT, PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT); - - // graphics primitive pipeline - PIPELINE_STAGE_FLAGS primitivePrevStage = PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT; - for (auto pipelineStage : {PIPELINE_STAGE_FLAGS::INDEX_INPUT_BIT, PIPELINE_STAGE_FLAGS::VERTEX_ATTRIBUTE_INPUT_BIT, PIPELINE_STAGE_FLAGS::VERTEX_SHADER_BIT, PIPELINE_STAGE_FLAGS::TESSELLATION_CONTROL_SHADER_BIT, PIPELINE_STAGE_FLAGS::TESSELLATION_EVALUATION_SHADER_BIT, PIPELINE_STAGE_FLAGS::GEOMETRY_SHADER_BIT, PIPELINE_STAGE_FLAGS::SHADING_RATE_ATTACHMENT_BIT, PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT, PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, PIPELINE_STAGE_FLAGS::LATE_FRAGMENT_TESTS_BIT, PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT}) - { - if (pipelineStage == PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT) - primitivePrevStage |= PIPELINE_STAGE_FLAGS::FRAGMENT_DENSITY_PROCESS_BIT; - add(pipelineStage, primitivePrevStage); - primitivePrevStage |= pipelineStage; - } - - - } - constexpr const auto& operator[](const size_t ix) const {return data[ix];} - - private: - constexpr static uint8_t findLSB(size_t val) - { - for (size_t ix=0ull; ix(stageFlag)); - data[bitIx] |= previousStageFlags; - } - - PIPELINE_STAGE_FLAGS data[std::numeric_limits>::digits] = {}; - }; - - constexpr PerStagePreviousStages bitToAccess = {}; - - core::bitflag retval = PIPELINE_STAGE_FLAGS::NONE; - while (bool(stages.value)) - { - const auto bitIx = hlsl::findLSB(stages); - retval |= bitToAccess[bitIx]; - stages ^= static_cast(0x1u< allLaterStages(core::bitflag stages) -{ - struct PerStageLaterStages - { - public: - constexpr PerStageLaterStages() - { - // set all stage to have itself as their next stages - for (auto i = 0; i < std::numeric_limits::digits; i++) - data[i] = static_cast(i); - - add(PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT, PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT); - add(PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT, PIPELINE_STAGE_FLAGS::RAY_TRACING_SHADER_BIT); - - // graphics primitive pipeline - PIPELINE_STAGE_FLAGS laterStage = PIPELINE_STAGE_FLAGS::NONE; - const auto graphicsPrimitivePipelineOrders = std::array{ PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT, PIPELINE_STAGE_FLAGS::INDEX_INPUT_BIT, PIPELINE_STAGE_FLAGS::VERTEX_ATTRIBUTE_INPUT_BIT, PIPELINE_STAGE_FLAGS::VERTEX_SHADER_BIT, PIPELINE_STAGE_FLAGS::TESSELLATION_CONTROL_SHADER_BIT, PIPELINE_STAGE_FLAGS::TESSELLATION_EVALUATION_SHADER_BIT, PIPELINE_STAGE_FLAGS::GEOMETRY_SHADER_BIT, PIPELINE_STAGE_FLAGS::SHADING_RATE_ATTACHMENT_BIT, PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT, PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, PIPELINE_STAGE_FLAGS::LATE_FRAGMENT_TESTS_BIT, PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT }; - for (auto iter = graphicsPrimitivePipelineOrders.rbegin(); iter < graphicsPrimitivePipelineOrders.rend(); iter++) - { - const auto pipelineStage = *iter; - add(pipelineStage, laterStage); - laterStage |= pipelineStage; - } - - add(PIPELINE_STAGE_FLAGS::FRAGMENT_DENSITY_PROCESS_BIT, PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT); - } - constexpr const auto& operator[](const size_t ix) const {return data[ix];} - - private: - constexpr static uint8_t findLSB(size_t val) - { - for (size_t ix=0ull; ix(stageFlag)); - data[bitIx] |= laterStageFlags; - } - - PIPELINE_STAGE_FLAGS data[std::numeric_limits>::digits] = {}; - }; - - constexpr PerStageLaterStages bitToAccess = {}; - - core::bitflag retval = PIPELINE_STAGE_FLAGS::NONE; - while (bool(stages.value)) - { - const auto bitIx = hlsl::findLSB(stages); - retval |= bitToAccess[bitIx]; - stages ^= static_cast(0x1u< allAccessesFromStages(core::bitflag stages) -{ - struct PerStageAccesses - { - public: - constexpr PerStageAccesses() - { - init(PIPELINE_STAGE_FLAGS::HOST_BIT,ACCESS_FLAGS::HOST_READ_BIT|ACCESS_FLAGS::HOST_WRITE_BIT); - - constexpr auto TransferRW = ACCESS_FLAGS::TRANSFER_READ_BIT|ACCESS_FLAGS::TRANSFER_WRITE_BIT; - init(PIPELINE_STAGE_FLAGS::COPY_BIT,TransferRW); - init(PIPELINE_STAGE_FLAGS::CLEAR_BIT,ACCESS_FLAGS::TRANSFER_WRITE_BIT); - - constexpr auto MicromapRead = ACCESS_FLAGS::SHADER_READ_BITS;//|ACCESS_FLAGS::MICROMAP_READ_BIT; -// init(PIPELINE_STAGE_FLAGS::MICROMAP_BUILD_BIT,MicromapRead|ACCESS_FLAGS::MICROMAP_WRITE_BIT); // can micromaps be built indirectly? - - constexpr auto AccelerationStructureRW = ACCESS_FLAGS::ACCELERATION_STRUCTURE_READ_BIT|ACCESS_FLAGS::ACCELERATION_STRUCTURE_WRITE_BIT; - init(PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_COPY_BIT,TransferRW|AccelerationStructureRW); - init(PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_BUILD_BIT,ACCESS_FLAGS::INDIRECT_COMMAND_READ_BIT|MicromapRead|AccelerationStructureRW); - - init(PIPELINE_STAGE_FLAGS::COMMAND_PREPROCESS_BIT,ACCESS_FLAGS::COMMAND_PREPROCESS_READ_BIT|ACCESS_FLAGS::COMMAND_PREPROCESS_WRITE_BIT); - init(PIPELINE_STAGE_FLAGS::CONDITIONAL_RENDERING_BIT,ACCESS_FLAGS::CONDITIONAL_RENDERING_READ_BIT); - init(PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT,ACCESS_FLAGS::INDIRECT_COMMAND_READ_BIT); - - constexpr auto ShaderRW = ACCESS_FLAGS::SHADER_READ_BITS|ACCESS_FLAGS::SHADER_WRITE_BITS; - constexpr auto AllShaderStagesRW = ShaderRW^(ACCESS_FLAGS::INPUT_ATTACHMENT_READ_BIT|ACCESS_FLAGS::SHADER_BINDING_TABLE_READ_BIT); - init(PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT,AllShaderStagesRW); - init(PIPELINE_STAGE_FLAGS::INDEX_INPUT_BIT,ACCESS_FLAGS::INDEX_READ_BIT); - init(PIPELINE_STAGE_FLAGS::VERTEX_ATTRIBUTE_INPUT_BIT,ACCESS_FLAGS::VERTEX_ATTRIBUTE_READ_BIT); - init(PIPELINE_STAGE_FLAGS::VERTEX_SHADER_BIT,AllShaderStagesRW); - init(PIPELINE_STAGE_FLAGS::TESSELLATION_CONTROL_SHADER_BIT,AllShaderStagesRW); - init(PIPELINE_STAGE_FLAGS::TESSELLATION_EVALUATION_SHADER_BIT,AllShaderStagesRW); - init(PIPELINE_STAGE_FLAGS::GEOMETRY_SHADER_BIT,AllShaderStagesRW); -// init(PIPELINE_STAGE_FLAGS::TASK_SHADER_BIT,AllShaderStagesRW); -// init(PIPELINE_STAGE_FLAGS::MESH_SHADER_BIT,AllShaderStagesRW); - init(PIPELINE_STAGE_FLAGS::FRAGMENT_DENSITY_PROCESS_BIT,ACCESS_FLAGS::FRAGMENT_DENSITY_MAP_READ_BIT); - init(PIPELINE_STAGE_FLAGS::SHADING_RATE_ATTACHMENT_BIT,ACCESS_FLAGS::SHADING_RATE_ATTACHMENT_READ_BIT); - constexpr auto DepthStencilRW = ACCESS_FLAGS::DEPTH_STENCIL_ATTACHMENT_READ_BIT|ACCESS_FLAGS::DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; - init(PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT,DepthStencilRW); - init(PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT,AllShaderStagesRW|ACCESS_FLAGS::INPUT_ATTACHMENT_READ_BIT); - init(PIPELINE_STAGE_FLAGS::LATE_FRAGMENT_TESTS_BIT,DepthStencilRW); - init(PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT,ACCESS_FLAGS::COLOR_ATTACHMENT_READ_BIT|ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT); - - init(PIPELINE_STAGE_FLAGS::RAY_TRACING_SHADER_BIT,AllShaderStagesRW|ACCESS_FLAGS::SHADER_BINDING_TABLE_READ_BIT); - - init(PIPELINE_STAGE_FLAGS::RESOLVE_BIT,TransferRW); - init(PIPELINE_STAGE_FLAGS::BLIT_BIT,TransferRW); - -// init(PIPELINE_STAGE_FLAGS::VIDEO_DECODE_BIT,ACCESS_FLAGS::VIDEO_DECODE_READ_BIT|ACCESS_FLAGS::VIDEO_DECODE_WRITE_BIT); -// init(PIPELINE_STAGE_FLAGS::VIDEO_ENCODE_BIT,ACCESS_FLAGS::VIDEO_ENCODE_READ_BIT|ACCESS_FLAGS::VIDEO_ENCODE_WRITE_BIT); -// init(PIPELINE_STAGE_FLAGS::OPTICAL_FLOW_BIT,ACCESS_FLAGS::OPTICAL_FLOW_READ_BIT|ACCESS_FLAGS::OPTICAL_FLOW_WRITE_BIT); - } - constexpr const auto& operator[](const size_t ix) const {return data[ix];} +core::bitflag allPreviousStages(core::bitflag stages); - private: - constexpr static uint8_t findLSB(size_t val) - { - for (size_t ix=0ull; ix(stageFlag)); - data[bitIx] = accessFlags; - } +core::bitflag allLaterStages(core::bitflag stages); - ACCESS_FLAGS data[32] = {}; - }; - constexpr PerStageAccesses bitToAccess = {}; +core::bitflag allAccessesFromStages(core::bitflag stages); - // TODO: add logically later or previous stages to make sure all other accesses remain valid - // or ideally expand the stages before calling `allAccessesFromStages` (TODO: add a `allLaterStages` and `allPreviouStages` basically) - - core::bitflag retval = ACCESS_FLAGS::NONE; - while (bool(stages.value)) - { - const auto bitIx = hlsl::findLSB(stages); - retval |= bitToAccess[bitIx]; - stages ^= static_cast(0x1u< allStagesFromAccesses(core::bitflag accesses) -{ - struct PerAccessStages - { - public: - constexpr PerAccessStages() - { - init(ACCESS_FLAGS::HOST_READ_BIT,PIPELINE_STAGE_FLAGS::HOST_BIT); - init(ACCESS_FLAGS::HOST_WRITE_BIT,PIPELINE_STAGE_FLAGS::HOST_BIT); - - init(ACCESS_FLAGS::TRANSFER_READ_BIT,PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS^PIPELINE_STAGE_FLAGS::CLEAR_BIT); - init(ACCESS_FLAGS::TRANSFER_WRITE_BIT,PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS); - - constexpr auto MicromapAccelerationStructureBuilds = PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_BUILD_BIT;//|PIPELINE_STAGE_FLAGS::MICROMAP_BUILD_BIT; -// init(ACCESS_FLAGS::MICROMAP_READ_BIT,MicromapAccelerationStructureBuilds); -// init(ACCESS_FLAGS::MICROMAP_WRITE_BIT,PIPELINE_STAGE_FLAGS::MICROMAP_BUILD_BIT); - - constexpr auto AllShaders = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT|PIPELINE_STAGE_FLAGS::PRE_RASTERIZATION_SHADERS_BITS|PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT|PIPELINE_STAGE_FLAGS::RAY_TRACING_SHADER_BIT; - constexpr auto AccelerationStructureOperations = PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_COPY_BIT|PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_BUILD_BIT; - init(ACCESS_FLAGS::ACCELERATION_STRUCTURE_READ_BIT,AccelerationStructureOperations|AllShaders); - init(ACCESS_FLAGS::ACCELERATION_STRUCTURE_WRITE_BIT,AccelerationStructureOperations); - - init(ACCESS_FLAGS::COMMAND_PREPROCESS_READ_BIT,PIPELINE_STAGE_FLAGS::COMMAND_PREPROCESS_BIT); - init(ACCESS_FLAGS::COMMAND_PREPROCESS_WRITE_BIT,PIPELINE_STAGE_FLAGS::COMMAND_PREPROCESS_BIT); - init(ACCESS_FLAGS::CONDITIONAL_RENDERING_READ_BIT,PIPELINE_STAGE_FLAGS::CONDITIONAL_RENDERING_BIT); - init(ACCESS_FLAGS::INDIRECT_COMMAND_READ_BIT,PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_BUILD_BIT|PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT); - - init(ACCESS_FLAGS::UNIFORM_READ_BIT,AllShaders); - init(ACCESS_FLAGS::SAMPLED_READ_BIT,AllShaders);//|PIPELINE_STAGE_FLAGS::MICROMAP_BUILD_BIT); - init(ACCESS_FLAGS::STORAGE_READ_BIT,AllShaders|MicromapAccelerationStructureBuilds); - init(ACCESS_FLAGS::STORAGE_WRITE_BIT,AllShaders); - - init(ACCESS_FLAGS::INDEX_READ_BIT,PIPELINE_STAGE_FLAGS::INDEX_INPUT_BIT); - init(ACCESS_FLAGS::VERTEX_ATTRIBUTE_READ_BIT,PIPELINE_STAGE_FLAGS::VERTEX_ATTRIBUTE_INPUT_BIT); - - init(ACCESS_FLAGS::FRAGMENT_DENSITY_MAP_READ_BIT,PIPELINE_STAGE_FLAGS::FRAGMENT_DENSITY_PROCESS_BIT); - init(ACCESS_FLAGS::SHADING_RATE_ATTACHMENT_READ_BIT,PIPELINE_STAGE_FLAGS::SHADING_RATE_ATTACHMENT_BIT); - constexpr auto FragmentTests = PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT|PIPELINE_STAGE_FLAGS::LATE_FRAGMENT_TESTS_BIT; - init(ACCESS_FLAGS::DEPTH_STENCIL_ATTACHMENT_READ_BIT,FragmentTests); - init(ACCESS_FLAGS::DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,FragmentTests); - init(ACCESS_FLAGS::INPUT_ATTACHMENT_READ_BIT,PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT); - init(ACCESS_FLAGS::COLOR_ATTACHMENT_READ_BIT,PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT); - init(ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT,PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT); - - init(ACCESS_FLAGS::SHADER_BINDING_TABLE_READ_BIT,PIPELINE_STAGE_FLAGS::RAY_TRACING_SHADER_BIT); - -// init(ACCESS_FLAGS::VIDEO_DECODE_READ_BIT,PIPELINE_STAGE_FLAGS::VIDEO_DECODE_BIT); -// init(ACCESS_FLAGS::VIDEO_DECODE_WRITE_BIT,PIPELINE_STAGE_FLAGS::VIDEO_DECODE_BIT); -// init(ACCESS_FLAGS::VIDEO_ENCODE_READ_BIT,PIPELINE_STAGE_FLAGS::VIDEO_ENCODE_BIT); -// init(ACCESS_FLAGS::VIDEO_ENCODE_WRITE_BIT,PIPELINE_STAGE_FLAGS::VIDEO_ENCODE_BIT); -// init(ACCESS_FLAGS::OPTICAL_FLOW_READ_BIT,PIPELINE_STAGE_FLAGS::OPTICAL_FLOW_BIT); -// init(ACCESS_FLAGS::OPTICAL_FLOW_WRITE_BIT,PIPELINE_STAGE_FLAGS::OPTICAL_FLOW_BIT); - } - constexpr const auto& operator[](const size_t ix) const {return data[ix];} - - private: - constexpr static uint8_t findLSB(size_t val) - { - for (size_t ix=0ull; ix(accessFlags)); - data[bitIx] = stageFlags; - } - - PIPELINE_STAGE_FLAGS data[32] = {}; - }; - constexpr PerAccessStages bitToStage = {}; - - core::bitflag retval = PIPELINE_STAGE_FLAGS::NONE; - while (bool(accesses.value)) - { - const auto bitIx = hlsl::findLSB(accesses); - retval |= bitToStage[bitIx]; - accesses ^= static_cast(0x1u< allStagesFromAccesses(core::bitflag accesses); } diff --git a/include/nbl/asset/IAccelerationStructure.h b/include/nbl/asset/IAccelerationStructure.h index 829d10bcd8..755c81e477 100644 --- a/include/nbl/asset/IAccelerationStructure.h +++ b/include/nbl/asset/IAccelerationStructure.h @@ -23,6 +23,8 @@ namespace nbl::asset class IAccelerationStructure : public virtual core::IReferenceCounted { public: + static constexpr inline size_t TransformDataMinAlignment = 16; + // build flags, we don't expose flags that don't make sense for certain levels enum class BUILD_FLAGS : uint8_t { diff --git a/include/nbl/asset/IBuffer.h b/include/nbl/asset/IBuffer.h index 8c3b8f95ef..6f8c1bb35b 100644 --- a/include/nbl/asset/IBuffer.h +++ b/include/nbl/asset/IBuffer.h @@ -100,7 +100,7 @@ struct SBufferRange inline operator SBufferRange&() {return *reinterpret_cast*>(this);} inline operator const SBufferRange&() const {return *reinterpret_cast*>(this);} - template requires std::is_same_v,BufferType> + template requires (std::is_const_v && std::is_base_of_v>) inline operator SBufferBinding() const { return {.offset=offset,.buffer=buffer}; } explicit inline operator bool() const {return isValid();} diff --git a/include/nbl/asset/IPolygonGeometry.h b/include/nbl/asset/IPolygonGeometry.h index a8ab97aa4d..f21e7f7233 100644 --- a/include/nbl/asset/IPolygonGeometry.h +++ b/include/nbl/asset/IPolygonGeometry.h @@ -203,6 +203,26 @@ class IPolygonGeometry : public IIndexableGeometry, public IPolygonG // For User defined semantics inline const core::vector& getAuxAttributeViews() const {return m_auxAttributeViews;} + inline E_INDEX_TYPE getIndexType() const + { + auto indexType = EIT_UNKNOWN; + // disallowed index format + if (base_t::m_indexView) + { + switch (base_t::m_indexView.composed.format) + { + case EF_R16_UINT: + indexType = EIT_16BIT; + break; + case EF_R32_UINT: [[fallthrough]]; + indexType = EIT_32BIT; + break; + default: + break; + } + } + return indexType; + } // Does not set the `transform` or `geometryFlags` fields, because it doesn't care about it. // Also won't set second set of vertex data, opacity mipmaps, etc. @@ -212,30 +232,12 @@ class IPolygonGeometry : public IIndexableGeometry, public IPolygonG // must be a triangle list, but don't want to compare pointers if (m_indexing && m_indexing->knownTopology()==EPT_TRIANGLE_LIST)// && m_indexing->degree() == TriangleList()->degree() && m_indexing->rate() == TriangleList->rate()) { - auto indexType = EIT_UNKNOWN; - // disallowed index format - if (base_t::m_indexView) - { - switch (base_t::m_indexView.composed.format) - { - case EF_R16_UINT: - indexType = EIT_16BIT; - break; - case EF_R32_UINT: [[fallthrough]]; - indexType = EIT_32BIT; - break; - default: - break; - } - if (indexType==EIT_UNKNOWN) - return retval; - } retval.vertexData[0] = base_t::m_positionView.src; retval.indexData = base_t::m_indexView.src; retval.maxVertex = base_t::m_positionView.getElementCount() - 1; retval.vertexStride = base_t::m_positionView.composed.getStride(); retval.vertexFormat = base_t::m_positionView.composed.format; - retval.indexType = indexType; + retval.indexType = getIndexType(); } return retval; } diff --git a/include/nbl/asset/utils/CDirQuantCacheBase.h b/include/nbl/asset/utils/CDirQuantCacheBase.h index c4e7174013..c02499b849 100644 --- a/include/nbl/asset/utils/CDirQuantCacheBase.h +++ b/include/nbl/asset/utils/CDirQuantCacheBase.h @@ -43,13 +43,13 @@ class CDirQuantCacheBase Vector8u3() : x(0u),y(0u),z(0u) {} Vector8u3(const Vector8u3&) = default; - explicit Vector8u3(const core::vectorSIMDu32& val) + explicit Vector8u3(const hlsl::uint32_t4& val) { operator=(val); } Vector8u3& operator=(const Vector8u3&) = default; - Vector8u3& operator=(const core::vectorSIMDu32& val) + Vector8u3& operator=(const hlsl::uint32_t4& val) { x = val.x; y = val.y; @@ -57,11 +57,12 @@ class CDirQuantCacheBase return *this; } - inline core::vectorSIMDu32 getValue() const + hlsl::uint32_t4 getValue() const { - return core::vectorSIMDu32(x,y,z); + return { x, y, z, 0 }; } + private: uint8_t x; uint8_t y; @@ -74,13 +75,13 @@ class CDirQuantCacheBase Vector8u4() : x(0u),y(0u),z(0u),w(0u) {} Vector8u4(const Vector8u4&) = default; - explicit Vector8u4(const core::vectorSIMDu32& val) + explicit Vector8u4(const hlsl::uint32_t4& val) { operator=(val); } Vector8u4& operator=(const Vector8u4&) = default; - Vector8u4& operator=(const core::vectorSIMDu32& val) + Vector8u4& operator=(const hlsl::uint32_t4& val) { x = val.x; y = val.y; @@ -89,9 +90,9 @@ class CDirQuantCacheBase return *this; } - inline core::vectorSIMDu32 getValue() const + hlsl::uint32_t4 getValue() const { - return core::vectorSIMDu32(x,y,z,w); + return { x, y, z, w }; } private: @@ -108,16 +109,16 @@ class CDirQuantCacheBase Vector1010102() : storage(0u) {} Vector1010102(const Vector1010102&) = default; - explicit Vector1010102(const core::vectorSIMDu32& val) + explicit Vector1010102(const hlsl::uint32_t4& val) { operator=(val); } Vector1010102& operator=(const Vector1010102&) = default; - Vector1010102& operator=(const core::vectorSIMDu32& val) + Vector1010102& operator=(const hlsl::uint32_t4& val) { - constexpr auto storageBits = quantizationBits+1u; - storage = val.x|(val.y<>storageBits,storage>>(storageBits*2u))&mask; + constexpr auto storageBits = quantizationBits + 1u; + const auto mask = (0x1u << storageBits) - 1u; + return { storage & mask, (storage >> storageBits) & mask, (storage >> (storageBits * 2)) & mask, 0}; } - + private: uint32_t storage; }; @@ -149,13 +150,13 @@ class CDirQuantCacheBase Vector16u3() : x(0u),y(0u),z(0u) {} Vector16u3(const Vector16u3&) = default; - explicit Vector16u3(const core::vectorSIMDu32& val) + explicit Vector16u3(const hlsl::uint32_t4& val) { operator=(val); } Vector16u3& operator=(const Vector16u3&) = default; - Vector16u3& operator=(const core::vectorSIMDu32& val) + Vector16u3& operator=(const hlsl::uint32_t4& val) { x = val.x; y = val.y; @@ -163,11 +164,11 @@ class CDirQuantCacheBase return *this; } - inline core::vectorSIMDu32 getValue() const + hlsl::uint32_t4 getValue() const { - return core::vectorSIMDu32(x,y,z); + return { x, y, z, 0 }; } - + private: uint16_t x; uint16_t y; @@ -180,13 +181,13 @@ class CDirQuantCacheBase Vector16u4() : x(0u),y(0u),z(0u),w(0u) {} Vector16u4(const Vector16u4&) = default; - explicit Vector16u4(const core::vectorSIMDu32& val) + explicit Vector16u4(const hlsl::uint32_t4& val) { operator=(val); } Vector16u4& operator=(const Vector16u4&) = default; - Vector16u4& operator=(const core::vectorSIMDu32& val) + Vector16u4& operator=(const hlsl::uint32_t4& val) { x = val.x; y = val.y; @@ -195,11 +196,11 @@ class CDirQuantCacheBase return *this; } - inline core::vectorSIMDu32 getValue() const + hlsl::float32_t4 getValue() const { - return core::vectorSIMDu32(x,y,z,w); + return { x, y, z, w }; } - + private: uint16_t x; uint16_t y; @@ -377,11 +378,30 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl:: std::tuple...> cache; template - value_type_t quantize(const core::vectorSIMDf& value) + value_type_t quantize(const hlsl::vector& value) { - const auto negativeMask = value < core::vectorSIMDf(0.0f); + using float32_tN = hlsl::vector; + + auto to_vec_t4 = [](hlsl::vector src, T padValue) -> hlsl::vector + { + if constexpr(dimensions == 1) + { + return {src.x, padValue, padValue, padValue}; + } else if constexpr (dimensions == 2) + { + return {src.x, src.y, padValue, padValue}; + } else if constexpr (dimensions == 3) + { + return {src.x, src.y, src.z, padValue}; + } else if constexpr (dimensions == 4) + { + return {src.x, src.y, src.z, src.w}; + } + }; + + const auto negativeMask = to_vec_t4(lessThan(value, float32_tN(0.0f)), false); - const core::vectorSIMDf absValue = abs(value); + const float32_tN absValue = abs(value); const auto key = Key(absValue); constexpr auto quantizationBits = quantization_bits_v; @@ -393,32 +413,50 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl:: quantized = found->second; else { - const core::vectorSIMDf fit = findBestFit(absValue); + const auto fit = findBestFit(absValue); + + const auto abs_fit = to_vec_t4(abs(fit), 0.f); + quantized = hlsl::uint32_t4(abs_fit.x, abs_fit.y, abs_fit.z, abs_fit.w); - quantized = core::vectorSIMDu32(core::abs(fit)); insertIntoCache(key,quantized); } } - const core::vectorSIMDu32 xorflag((0x1u<<(quantizationBits+1u))-1u); - auto restoredAsVec = quantized.getValue()^core::mix(core::vectorSIMDu32(0u),xorflag,negativeMask); - restoredAsVec += core::mix(core::vectorSIMDu32(0u),core::vectorSIMDu32(1u),negativeMask); - return value_type_t(restoredAsVec&xorflag); + auto select = [](hlsl::uint32_t4 val1, hlsl::uint32_t4 val2, hlsl::bool4 mask) + { + hlsl::uint32_t4 retval; + retval.x = mask.x ? val2.x : val1.x; + retval.y = mask.y ? val2.y : val1.y; + retval.z = mask.z ? val2.z : val1.z; + retval.w = mask.w ? val2.w : val1.w; + return retval; + }; +; + // create all one bits + const hlsl::uint32_t4 xorflag((0x1u << (quantizationBits + 1u)) - 1u); + + // for positive number xoring with 0 keep its value + // for negative number we xor with all one which will flip the bits, then we add one later. Flipping the bits then adding one will turn positive number into negative number + auto restoredAsVec = quantized.getValue() ^ select(hlsl::uint32_t4(0u), hlsl::uint32_t4(xorflag), negativeMask); + restoredAsVec += hlsl::uint32_t4(negativeMask); + + return value_type_t(restoredAsVec); } template - static inline core::vectorSIMDf findBestFit(const core::vectorSIMDf& value) + static inline hlsl::vector findBestFit(const hlsl::vector& value) { + using float32_tN = hlsl::vector; static_assert(dimensions>1u,"No point"); static_assert(dimensions<=4u,"High Dimensions are Hard!"); - // precise normalize - const auto vectorForDots = value.preciseDivision(length(value)); + + const auto vectorForDots = hlsl::normalize(value); // - core::vectorSIMDf fittingVector; - core::vectorSIMDf floorOffset; + float32_tN fittingVector; + float32_tN floorOffset = {}; constexpr uint32_t cornerCount = (0x1u<<(dimensions-1u))-1u; - core::vectorSIMDf corners[cornerCount] = {}; + float32_tN corners[cornerCount] = {}; { uint32_t maxDirCompIndex = 0u; for (auto i=1u; i void + auto evaluateFit = [&](const float32_tN& newFit) -> void { - auto newFitLen = core::length(newFit); - const float dp = core::dot(newFit,vectorForDots).preciseDivision(newFitLen)[0]; + auto newFitLen = length(newFit); + const float dp = hlsl::dot(newFit,vectorForDots) / (newFitLen); if (dp > closestTo1) { closestTo1 = dp; @@ -466,18 +504,18 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl:: }; constexpr uint32_t cubeHalfSize = (0x1u << quantizationBits) - 1u; - const core::vectorSIMDf cubeHalfSizeND = core::vectorSIMDf(cubeHalfSize); + const float32_tN cubeHalfSizeND = hlsl::promote(cubeHalfSize); for (uint32_t n=cubeHalfSize; n>0u; n--) { //we'd use float addition in the interest of speed, to increment the loop //but adding a small number to a large one loses precision, so multiplication preferrable - core::vectorSIMDf bottomFit = core::floor(fittingVector*float(n)+floorOffset); - if ((bottomFit<=cubeHalfSizeND).all()) + const auto bottomFit = glm::floor(fittingVector * float(n) + floorOffset); + if (hlsl::all(glm::lessThanEqual(bottomFit, cubeHalfSizeND))) evaluateFit(bottomFit); - for (auto i=0u; i createArrow(const uint32_t tesselationCylinder = 4, - const uint32_t tesselationCone = 8, const float height = 1.f, + core::smart_refctd_ptr createArrow(const uint16_t tesselationCylinder = 4, + const uint16_t tesselationCone = 8, const float height = 1.f, const float cylinderHeight = 0.6f, const float widthCylinder = 0.05f, - const float widthCone = 0.3f, const video::SColor colorCylinder = 0xFFFFFFFF, - const video::SColor colorCone = 0xFFFFFFFF) const; + const float widthCone = 0.3f) const; //! Create a sphere mesh. @@ -86,8 +87,8 @@ class NBL_API2 CGeometryCreator final : public core::IReferenceCounted \return Generated mesh. */ core::smart_refctd_ptr createCylinder(float radius, float length, - uint32_t tesselation, - const video::SColor& color=video::SColor(0xffffffff), CQuantNormalCache* const quantNormalCacheOverride=nullptr) const; + uint16_t tesselation, + CQuantNormalCache* const quantNormalCacheOverride=nullptr) const; //! Create a cone mesh. /** @@ -99,9 +100,7 @@ class NBL_API2 CGeometryCreator final : public core::IReferenceCounted \param oblique (to be documented) \return Generated mesh. */ - core::smart_refctd_ptr createCone(float radius, float length, uint32_t tesselation, - const video::SColor& colorTop=video::SColor(0xffffffff), - const video::SColor& colorBottom=video::SColor(0xffffffff), + core::smart_refctd_ptr createCone(float radius, float length, uint16_t tesselation, float oblique=0.f, CQuantNormalCache* const quantNormalCacheOverride=nullptr) const; core::smart_refctd_ptr createRectangle(const hlsl::float32_t2 size={0.5f,0.5f}) const; diff --git a/include/nbl/asset/utils/CQuantNormalCache.h b/include/nbl/asset/utils/CQuantNormalCache.h index 92703d9d37..31b7d403d2 100644 --- a/include/nbl/asset/utils/CQuantNormalCache.h +++ b/include/nbl/asset/utils/CQuantNormalCache.h @@ -19,7 +19,7 @@ namespace impl struct VectorUV { - inline VectorUV(const core::vectorSIMDf& absNormal) + inline VectorUV(const hlsl::float32_t3& absNormal) { const float rcpManhattanNorm = 1.f / (absNormal.x + absNormal.y + absNormal.z); u = absNormal.x * rcpManhattanNorm; @@ -56,9 +56,8 @@ class CQuantNormalCache : public CDirQuantCacheBase - value_type_t quantize(core::vectorSIMDf normal) + value_type_t quantize(hlsl::float32_t3 normal) { - normal.makeSafe3D(); return Base::quantize<3u,CacheFormat>(normal); } }; diff --git a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl index f92dddfb26..8bfed025ce 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl @@ -348,8 +348,14 @@ requires concepts::IntegralScalar struct find_lsb_helper { using return_t = int32_t; - static inline T __call(const T arg) + NBL_CONSTEXPR_FUNC static inline T __call(const T arg) { + if constexpr (std::is_constant_evaluated()) + { + for (T ix = T(0); ix < sizeof(size_t) * 8; ix++) + if ((T(1) << ix) & arg) return ix; + return ~T(0); + } return glm::findLSB(arg); } }; @@ -369,7 +375,7 @@ requires std::is_enum_v struct find_lsb_helper { using return_t = int32_t; - static int32_t __call(NBL_CONST_REF_ARG(EnumType) val) + NBL_CONSTEXPR_FUNC static int32_t __call(NBL_CONST_REF_ARG(EnumType) val) { using underlying_t = std::underlying_type_t; return find_lsb_helper::__call(static_cast(val)); diff --git a/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl b/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl index a5747a5fb7..7198bae563 100644 --- a/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl +++ b/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl @@ -68,7 +68,7 @@ inline typename matrix_traits::scalar_type determinant(NBL_CONST_REF_ARG } template -inline typename cpp_compat_intrinsics_impl::find_lsb_helper::return_t findLSB(NBL_CONST_REF_ARG(T) val) +NBL_CONSTEXPR_FUNC inline typename cpp_compat_intrinsics_impl::find_lsb_helper::return_t findLSB(NBL_CONST_REF_ARG(T) val) { return cpp_compat_intrinsics_impl::find_lsb_helper::__call(val); } diff --git a/include/nbl/builtin/hlsl/math/linalg/transform.hlsl b/include/nbl/builtin/hlsl/math/linalg/transform.hlsl new file mode 100644 index 0000000000..943bd313f0 --- /dev/null +++ b/include/nbl/builtin/hlsl/math/linalg/transform.hlsl @@ -0,0 +1,57 @@ +// Copyright (C) 2018-2024 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_BUILTIN_HLSL_MATH_LINALG_TRANSFORM_INCLUDED_ +#define _NBL_BUILTIN_HLSL_MATH_LINALG_TRANSFORM_INCLUDED_ + + +#include +#include +#include + + +namespace nbl +{ +namespace hlsl +{ +namespace math +{ +namespace linalg +{ + +/// Builds a rotation 4 * 4 matrix created from an axis vector and an angle. +/// +/// @param angle Rotation angle expressed in radians. +/// @param axis Rotation axis, must be normalized. +/// +/// @tparam T A floating-point scalar type +template +matrix rotation_mat(T angle, vector const& axis) +{ + T const a = angle; + T const c = cos(a); + T const s = sin(a); + + vector temp((T(1) - c) * axis); + + matrix rotation; + rotation[0][0] = c + temp[0] * axis[0]; + rotation[0][1] = temp[1] * axis[0] - s * axis[2]; + rotation[0][2] = temp[2] * axis[0] + s * axis[1]; + + rotation[1][0] = temp[0] * axis[1] + s * axis[2]; + rotation[1][1] = c + temp[1] * axis[1]; + rotation[1][2] = temp[2] * axis[1] - s * axis[0]; + + rotation[2][0] = temp[0] * axis[2] - s * axis[1]; + rotation[2][1] = temp[1] * axis[2] + s * axis[0]; + rotation[2][2] = c + temp[2] * axis[2]; + + return rotation; +} + +} +} +} +} +#endif diff --git a/include/nbl/builtin/hlsl/spirv_intrinsics/raytracing.hlsl b/include/nbl/builtin/hlsl/spirv_intrinsics/raytracing.hlsl index 3a49450d7c..41f56e225e 100644 --- a/include/nbl/builtin/hlsl/spirv_intrinsics/raytracing.hlsl +++ b/include/nbl/builtin/hlsl/spirv_intrinsics/raytracing.hlsl @@ -46,11 +46,21 @@ bool rayQueryProceedKHR([[vk::ext_reference]] RayQueryKHR query); [[vk::ext_instruction(spv::OpRayQueryGetIntersectionTypeKHR)]] int rayQueryGetIntersectionTypeKHR([[vk::ext_reference]] RayQueryKHR query, uint32_t committed); +[[vk::ext_capability(spv::CapabilityRayQueryKHR)]] +[[vk::ext_extension("SPV_KHR_ray_query")]] +[[vk::ext_instruction(spv::OpRayQueryGetIntersectionInstanceCustomIndexKHR)]] +int rayQueryGetIntersectionInstanceCustomIndexKHR([[vk::ext_reference]] RayQueryKHR query, uint32_t committed); + [[vk::ext_capability(spv::CapabilityRayQueryKHR)]] [[vk::ext_extension("SPV_KHR_ray_query")]] [[vk::ext_instruction(spv::OpRayQueryGetIntersectionInstanceIdKHR)]] int rayQueryGetIntersectionInstanceIdKHR([[vk::ext_reference]] RayQueryKHR query, uint32_t committed); +[[vk::ext_capability(spv::CapabilityRayQueryKHR)]] +[[vk::ext_extension("SPV_KHR_ray_query")]] +[[vk::ext_instruction(spv::OpRayQueryGetIntersectionGeometryIndexKHR)]] +int rayQueryGetIntersectionGeometryIndexKHR([[vk::ext_reference]] RayQueryKHR query, uint32_t committed); + [[vk::ext_capability(spv::CapabilityRayQueryKHR)]] [[vk::ext_extension("SPV_KHR_ray_query")]] [[vk::ext_instruction(spv::OpRayQueryGetIntersectionPrimitiveIndexKHR)]] @@ -61,6 +71,11 @@ int rayQueryGetIntersectionPrimitiveIndexKHR([[vk::ext_reference]] RayQueryKHR q [[vk::ext_instruction(spv::OpRayQueryGetIntersectionBarycentricsKHR)]] float2 rayQueryGetIntersectionBarycentricsKHR([[vk::ext_reference]] RayQueryKHR query, uint32_t committed); +[[vk::ext_capability(spv::CapabilityRayQueryKHR)]] +[[vk::ext_extension("SPV_KHR_ray_query")]] +[[vk::ext_instruction(spv::OpRayQueryGetIntersectionFrontFaceKHR)]] +float2 rayQueryGetIntersectionFrontFaceKHR([[vk::ext_reference]] RayQueryKHR query, uint32_t committed); + // position fetch for ray tracing uses gl_HitTriangleVertexPositionsEXT -> HitTriangleVertexPositionsKHR decorated OpVariable [[vk::ext_builtin_input(spv::BuiltInHitTriangleVertexPositionsKHR)]] static const float32_t3 HitTriangleVertexPositionsKHR[3]; diff --git a/include/nbl/core/util/bitflag.h b/include/nbl/core/util/bitflag.h index 1731c0cac3..62bec57d49 100644 --- a/include/nbl/core/util/bitflag.h +++ b/include/nbl/core/util/bitflag.h @@ -60,7 +60,7 @@ namespace nbl::hlsl::cpp_compat_intrinsics_impl struct find_lsb_helper> { using return_t = int32_t; - static return_t __call(NBL_CONST_REF_ARG(core::bitflag) val) + NBL_CONSTEXPR_FUNC static return_t __call(NBL_CONST_REF_ARG(core::bitflag) val) { return find_lsb_helper::__call(val.value); } diff --git a/src/nbl/CMakeLists.txt b/src/nbl/CMakeLists.txt index e7193e3eaf..935beffe2c 100755 --- a/src/nbl/CMakeLists.txt +++ b/src/nbl/CMakeLists.txt @@ -151,6 +151,7 @@ set(NBL_UI_SOURCES ) set(NBL_ASSET_SOURCES # Assets + asset/ECommonEnums.cpp asset/IAsset.cpp asset/IRenderpass.cpp asset/IAssetManager.cpp diff --git a/src/nbl/asset/ECommonEnums.cpp b/src/nbl/asset/ECommonEnums.cpp new file mode 100644 index 0000000000..2366b25f99 --- /dev/null +++ b/src/nbl/asset/ECommonEnums.cpp @@ -0,0 +1,268 @@ +#include "nbl/asset/ECommonEnums.h" + +namespace nbl::asset +{ + +core::bitflag allPreviousStages(core::bitflag stages) +{ + struct PerStagePreviousStages + { + public: + constexpr PerStagePreviousStages() + { + // set all stage to have itself as their previous stages + for (auto i = 0; i < std::numeric_limits::digits; i++) + data[i] = static_cast(i); + + add(PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT); + + add(PIPELINE_STAGE_FLAGS::RAY_TRACING_SHADER_BIT, PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT); + + // graphics primitive pipeline + PIPELINE_STAGE_FLAGS primitivePrevStage = PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT; + for (auto pipelineStage : {PIPELINE_STAGE_FLAGS::INDEX_INPUT_BIT, PIPELINE_STAGE_FLAGS::VERTEX_ATTRIBUTE_INPUT_BIT, PIPELINE_STAGE_FLAGS::VERTEX_SHADER_BIT, PIPELINE_STAGE_FLAGS::TESSELLATION_CONTROL_SHADER_BIT, PIPELINE_STAGE_FLAGS::TESSELLATION_EVALUATION_SHADER_BIT, PIPELINE_STAGE_FLAGS::GEOMETRY_SHADER_BIT, PIPELINE_STAGE_FLAGS::SHADING_RATE_ATTACHMENT_BIT, PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT, PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, PIPELINE_STAGE_FLAGS::LATE_FRAGMENT_TESTS_BIT, PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT}) + { + if (pipelineStage == PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT) + primitivePrevStage |= PIPELINE_STAGE_FLAGS::FRAGMENT_DENSITY_PROCESS_BIT; + add(pipelineStage, primitivePrevStage); + primitivePrevStage |= pipelineStage; + } + + + } + constexpr const auto& operator[](const size_t ix) const {return data[ix];} + + private: + + constexpr void add(PIPELINE_STAGE_FLAGS stageFlag, PIPELINE_STAGE_FLAGS previousStageFlags) + { + const auto bitIx = hlsl::findLSB(static_cast(stageFlag)); + data[bitIx] |= previousStageFlags; + } + + PIPELINE_STAGE_FLAGS data[std::numeric_limits>::digits] = {}; + }; + + constexpr PerStagePreviousStages bitToAccess = {}; + + core::bitflag retval = PIPELINE_STAGE_FLAGS::NONE; + while (bool(stages.value)) + { + const auto bitIx = hlsl::findLSB(static_cast(stages.value)); + retval |= bitToAccess[bitIx]; + stages ^= static_cast(0x1u< allLaterStages(core::bitflag stages) +{ + struct PerStageLaterStages + { + public: + constexpr PerStageLaterStages() + { + // set all stage to have itself as their next stages + for (auto i = 0; i < std::numeric_limits::digits; i++) + data[i] = static_cast(i); + + add(PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT, PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT); + add(PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT, PIPELINE_STAGE_FLAGS::RAY_TRACING_SHADER_BIT); + + // graphics primitive pipeline + PIPELINE_STAGE_FLAGS laterStage = PIPELINE_STAGE_FLAGS::NONE; + const auto graphicsPrimitivePipelineOrders = std::array{ PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT, PIPELINE_STAGE_FLAGS::INDEX_INPUT_BIT, PIPELINE_STAGE_FLAGS::VERTEX_ATTRIBUTE_INPUT_BIT, PIPELINE_STAGE_FLAGS::VERTEX_SHADER_BIT, PIPELINE_STAGE_FLAGS::TESSELLATION_CONTROL_SHADER_BIT, PIPELINE_STAGE_FLAGS::TESSELLATION_EVALUATION_SHADER_BIT, PIPELINE_STAGE_FLAGS::GEOMETRY_SHADER_BIT, PIPELINE_STAGE_FLAGS::SHADING_RATE_ATTACHMENT_BIT, PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT, PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, PIPELINE_STAGE_FLAGS::LATE_FRAGMENT_TESTS_BIT, PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT }; + for (auto iter = graphicsPrimitivePipelineOrders.rbegin(); iter < graphicsPrimitivePipelineOrders.rend(); iter++) + { + const auto pipelineStage = *iter; + add(pipelineStage, laterStage); + laterStage |= pipelineStage; + } + + add(PIPELINE_STAGE_FLAGS::FRAGMENT_DENSITY_PROCESS_BIT, PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT); + } + constexpr const auto& operator[](const size_t ix) const {return data[ix];} + + private: + + constexpr void add(PIPELINE_STAGE_FLAGS stageFlag, PIPELINE_STAGE_FLAGS laterStageFlags) + { + const auto bitIx = hlsl::findLSB(static_cast(stageFlag)); + data[bitIx] |= laterStageFlags; + } + + PIPELINE_STAGE_FLAGS data[std::numeric_limits>::digits] = {}; + }; + + constexpr PerStageLaterStages bitToAccess = {}; + + core::bitflag retval = PIPELINE_STAGE_FLAGS::NONE; + while (bool(stages.value)) + { + const auto bitIx = hlsl::findLSB(static_cast(stages.value)); + retval |= bitToAccess[bitIx]; + stages ^= static_cast(0x1u< allAccessesFromStages(core::bitflag stages) +{ + struct PerStageAccesses + { + public: + constexpr PerStageAccesses() + { + init(PIPELINE_STAGE_FLAGS::HOST_BIT,ACCESS_FLAGS::HOST_READ_BIT|ACCESS_FLAGS::HOST_WRITE_BIT); + + constexpr auto TransferRW = ACCESS_FLAGS::TRANSFER_READ_BIT|ACCESS_FLAGS::TRANSFER_WRITE_BIT; + init(PIPELINE_STAGE_FLAGS::COPY_BIT,TransferRW); + init(PIPELINE_STAGE_FLAGS::CLEAR_BIT,ACCESS_FLAGS::TRANSFER_WRITE_BIT); + + constexpr auto MicromapRead = ACCESS_FLAGS::SHADER_READ_BITS;//|ACCESS_FLAGS::MICROMAP_READ_BIT; +// init(PIPELINE_STAGE_FLAGS::MICROMAP_BUILD_BIT,MicromapRead|ACCESS_FLAGS::MICROMAP_WRITE_BIT); // can micromaps be built indirectly? + + constexpr auto AccelerationStructureRW = ACCESS_FLAGS::ACCELERATION_STRUCTURE_READ_BIT|ACCESS_FLAGS::ACCELERATION_STRUCTURE_WRITE_BIT; + init(PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_COPY_BIT,TransferRW|AccelerationStructureRW); + init(PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_BUILD_BIT,ACCESS_FLAGS::INDIRECT_COMMAND_READ_BIT|MicromapRead|AccelerationStructureRW); + + init(PIPELINE_STAGE_FLAGS::COMMAND_PREPROCESS_BIT,ACCESS_FLAGS::COMMAND_PREPROCESS_READ_BIT|ACCESS_FLAGS::COMMAND_PREPROCESS_WRITE_BIT); + init(PIPELINE_STAGE_FLAGS::CONDITIONAL_RENDERING_BIT,ACCESS_FLAGS::CONDITIONAL_RENDERING_READ_BIT); + init(PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT,ACCESS_FLAGS::INDIRECT_COMMAND_READ_BIT); + + constexpr auto ShaderRW = ACCESS_FLAGS::SHADER_READ_BITS|ACCESS_FLAGS::SHADER_WRITE_BITS; + constexpr auto AllShaderStagesRW = ShaderRW^(ACCESS_FLAGS::INPUT_ATTACHMENT_READ_BIT|ACCESS_FLAGS::SHADER_BINDING_TABLE_READ_BIT); + init(PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT,AllShaderStagesRW); + init(PIPELINE_STAGE_FLAGS::INDEX_INPUT_BIT,ACCESS_FLAGS::INDEX_READ_BIT); + init(PIPELINE_STAGE_FLAGS::VERTEX_ATTRIBUTE_INPUT_BIT,ACCESS_FLAGS::VERTEX_ATTRIBUTE_READ_BIT); + init(PIPELINE_STAGE_FLAGS::VERTEX_SHADER_BIT,AllShaderStagesRW); + init(PIPELINE_STAGE_FLAGS::TESSELLATION_CONTROL_SHADER_BIT,AllShaderStagesRW); + init(PIPELINE_STAGE_FLAGS::TESSELLATION_EVALUATION_SHADER_BIT,AllShaderStagesRW); + init(PIPELINE_STAGE_FLAGS::GEOMETRY_SHADER_BIT,AllShaderStagesRW); +// init(PIPELINE_STAGE_FLAGS::TASK_SHADER_BIT,AllShaderStagesRW); +// init(PIPELINE_STAGE_FLAGS::MESH_SHADER_BIT,AllShaderStagesRW); + init(PIPELINE_STAGE_FLAGS::FRAGMENT_DENSITY_PROCESS_BIT,ACCESS_FLAGS::FRAGMENT_DENSITY_MAP_READ_BIT); + init(PIPELINE_STAGE_FLAGS::SHADING_RATE_ATTACHMENT_BIT,ACCESS_FLAGS::SHADING_RATE_ATTACHMENT_READ_BIT); + constexpr auto DepthStencilRW = ACCESS_FLAGS::DEPTH_STENCIL_ATTACHMENT_READ_BIT|ACCESS_FLAGS::DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + init(PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT,DepthStencilRW); + init(PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT,AllShaderStagesRW|ACCESS_FLAGS::INPUT_ATTACHMENT_READ_BIT); + init(PIPELINE_STAGE_FLAGS::LATE_FRAGMENT_TESTS_BIT,DepthStencilRW); + init(PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT,ACCESS_FLAGS::COLOR_ATTACHMENT_READ_BIT|ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT); + + init(PIPELINE_STAGE_FLAGS::RAY_TRACING_SHADER_BIT,AllShaderStagesRW|ACCESS_FLAGS::SHADER_BINDING_TABLE_READ_BIT); + + init(PIPELINE_STAGE_FLAGS::RESOLVE_BIT,TransferRW); + init(PIPELINE_STAGE_FLAGS::BLIT_BIT,TransferRW); + +// init(PIPELINE_STAGE_FLAGS::VIDEO_DECODE_BIT,ACCESS_FLAGS::VIDEO_DECODE_READ_BIT|ACCESS_FLAGS::VIDEO_DECODE_WRITE_BIT); +// init(PIPELINE_STAGE_FLAGS::VIDEO_ENCODE_BIT,ACCESS_FLAGS::VIDEO_ENCODE_READ_BIT|ACCESS_FLAGS::VIDEO_ENCODE_WRITE_BIT); +// init(PIPELINE_STAGE_FLAGS::OPTICAL_FLOW_BIT,ACCESS_FLAGS::OPTICAL_FLOW_READ_BIT|ACCESS_FLAGS::OPTICAL_FLOW_WRITE_BIT); + } + constexpr const auto& operator[](const size_t ix) const {return data[ix];} + + private: + + constexpr void init(PIPELINE_STAGE_FLAGS stageFlag, ACCESS_FLAGS accessFlags) + { + const auto bitIx = hlsl::findLSB(static_cast(stageFlag)); + data[bitIx] = accessFlags; + } + + ACCESS_FLAGS data[32] = {}; + }; + constexpr PerStageAccesses bitToAccess = {}; + + // TODO: add logically later or previous stages to make sure all other accesses remain valid + // or ideally expand the stages before calling `allAccessesFromStages` (TODO: add a `allLaterStages` and `allPreviouStages` basically) + + core::bitflag retval = ACCESS_FLAGS::NONE; + while (bool(stages.value)) + { + const auto bitIx = hlsl::findLSB(static_cast(stages.value)); + retval |= bitToAccess[bitIx]; + stages ^= static_cast(0x1u< allStagesFromAccesses(core::bitflag accesses) +{ + struct PerAccessStages + { + public: + constexpr PerAccessStages() + { + init(ACCESS_FLAGS::HOST_READ_BIT,PIPELINE_STAGE_FLAGS::HOST_BIT); + init(ACCESS_FLAGS::HOST_WRITE_BIT,PIPELINE_STAGE_FLAGS::HOST_BIT); + + init(ACCESS_FLAGS::TRANSFER_READ_BIT,PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS^PIPELINE_STAGE_FLAGS::CLEAR_BIT); + init(ACCESS_FLAGS::TRANSFER_WRITE_BIT,PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS); + + constexpr auto MicromapAccelerationStructureBuilds = PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_BUILD_BIT;//|PIPELINE_STAGE_FLAGS::MICROMAP_BUILD_BIT; +// init(ACCESS_FLAGS::MICROMAP_READ_BIT,MicromapAccelerationStructureBuilds); +// init(ACCESS_FLAGS::MICROMAP_WRITE_BIT,PIPELINE_STAGE_FLAGS::MICROMAP_BUILD_BIT); + + constexpr auto AllShaders = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT|PIPELINE_STAGE_FLAGS::PRE_RASTERIZATION_SHADERS_BITS|PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT|PIPELINE_STAGE_FLAGS::RAY_TRACING_SHADER_BIT; + constexpr auto AccelerationStructureOperations = PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_COPY_BIT|PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_BUILD_BIT; + init(ACCESS_FLAGS::ACCELERATION_STRUCTURE_READ_BIT,AccelerationStructureOperations|AllShaders); + init(ACCESS_FLAGS::ACCELERATION_STRUCTURE_WRITE_BIT,AccelerationStructureOperations); + + init(ACCESS_FLAGS::COMMAND_PREPROCESS_READ_BIT,PIPELINE_STAGE_FLAGS::COMMAND_PREPROCESS_BIT); + init(ACCESS_FLAGS::COMMAND_PREPROCESS_WRITE_BIT,PIPELINE_STAGE_FLAGS::COMMAND_PREPROCESS_BIT); + init(ACCESS_FLAGS::CONDITIONAL_RENDERING_READ_BIT,PIPELINE_STAGE_FLAGS::CONDITIONAL_RENDERING_BIT); + init(ACCESS_FLAGS::INDIRECT_COMMAND_READ_BIT,PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_BUILD_BIT|PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT); + + init(ACCESS_FLAGS::UNIFORM_READ_BIT,AllShaders); + init(ACCESS_FLAGS::SAMPLED_READ_BIT,AllShaders);//|PIPELINE_STAGE_FLAGS::MICROMAP_BUILD_BIT); + init(ACCESS_FLAGS::STORAGE_READ_BIT,AllShaders|MicromapAccelerationStructureBuilds); + init(ACCESS_FLAGS::STORAGE_WRITE_BIT,AllShaders); + + init(ACCESS_FLAGS::INDEX_READ_BIT,PIPELINE_STAGE_FLAGS::INDEX_INPUT_BIT); + init(ACCESS_FLAGS::VERTEX_ATTRIBUTE_READ_BIT,PIPELINE_STAGE_FLAGS::VERTEX_ATTRIBUTE_INPUT_BIT); + + init(ACCESS_FLAGS::FRAGMENT_DENSITY_MAP_READ_BIT,PIPELINE_STAGE_FLAGS::FRAGMENT_DENSITY_PROCESS_BIT); + init(ACCESS_FLAGS::SHADING_RATE_ATTACHMENT_READ_BIT,PIPELINE_STAGE_FLAGS::SHADING_RATE_ATTACHMENT_BIT); + constexpr auto FragmentTests = PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT|PIPELINE_STAGE_FLAGS::LATE_FRAGMENT_TESTS_BIT; + init(ACCESS_FLAGS::DEPTH_STENCIL_ATTACHMENT_READ_BIT,FragmentTests); + init(ACCESS_FLAGS::DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,FragmentTests); + init(ACCESS_FLAGS::INPUT_ATTACHMENT_READ_BIT,PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT); + init(ACCESS_FLAGS::COLOR_ATTACHMENT_READ_BIT,PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT); + init(ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT,PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT); + + init(ACCESS_FLAGS::SHADER_BINDING_TABLE_READ_BIT,PIPELINE_STAGE_FLAGS::RAY_TRACING_SHADER_BIT); + +// init(ACCESS_FLAGS::VIDEO_DECODE_READ_BIT,PIPELINE_STAGE_FLAGS::VIDEO_DECODE_BIT); +// init(ACCESS_FLAGS::VIDEO_DECODE_WRITE_BIT,PIPELINE_STAGE_FLAGS::VIDEO_DECODE_BIT); +// init(ACCESS_FLAGS::VIDEO_ENCODE_READ_BIT,PIPELINE_STAGE_FLAGS::VIDEO_ENCODE_BIT); +// init(ACCESS_FLAGS::VIDEO_ENCODE_WRITE_BIT,PIPELINE_STAGE_FLAGS::VIDEO_ENCODE_BIT); +// init(ACCESS_FLAGS::OPTICAL_FLOW_READ_BIT,PIPELINE_STAGE_FLAGS::OPTICAL_FLOW_BIT); +// init(ACCESS_FLAGS::OPTICAL_FLOW_WRITE_BIT,PIPELINE_STAGE_FLAGS::OPTICAL_FLOW_BIT); + } + constexpr const auto& operator[](const size_t ix) const {return data[ix];} + + private: + constexpr void init(ACCESS_FLAGS accessFlags, PIPELINE_STAGE_FLAGS stageFlags) + { + const auto bitIx = hlsl::findLSB(static_cast(accessFlags)); + data[bitIx] = stageFlags; + } + + PIPELINE_STAGE_FLAGS data[32] = {}; + }; + constexpr PerAccessStages bitToStage = {}; + + core::bitflag retval = PIPELINE_STAGE_FLAGS::NONE; + while (bool(accesses.value)) + { + const auto bitIx = hlsl::findLSB(static_cast(accesses.value)); + retval |= bitToStage[bitIx]; + accesses ^= static_cast(0x1u< -#include #include #include - namespace nbl::asset { +namespace +{ +using snorm_normal_t = hlsl::vector; +constexpr int8_t snorm_one = std::numeric_limits::max(); +constexpr int8_t snorm_neg_one = std::numeric_limits::min(); +constexpr auto snorm_positive_x = hlsl::vector(snorm_one, 0, 0, 0); +constexpr auto snorm_negative_x = hlsl::vector(snorm_neg_one, 0, 0, 0); +constexpr auto snorm_positive_y = hlsl::vector(0, snorm_one, 0, 0); +constexpr auto snorm_negative_y = hlsl::vector(0, snorm_neg_one, 0, 0); +constexpr auto snorm_positive_z = hlsl::vector(0, 0, snorm_one, 0); +constexpr auto snorm_negative_z = hlsl::vector(0, 0, snorm_neg_one, 0); + +constexpr auto snorm_all_ones = hlsl::vector(snorm_one, snorm_one, snorm_one, snorm_one); + +template + requires(std::is_same_v || std::is_same_v) +constexpr E_FORMAT get_uv_format() +{ + if constexpr(std::is_same_v) + { + return EF_R8G8_UNORM; + } else + { + return EF_R16G16_UNORM; + } +} +} + +template + requires(std::is_same_v || std::is_same_v) +static ICPUPolygonGeometry::SDataView createUvView(size_t vertexCount) +{ + const auto elementCount = 2; + const auto attrSize = sizeof(ElementT) * elementCount; + auto buff = ICPUBuffer::create({{attrSize * vertexCount,IBuffer::EUF_NONE}}); + hlsl::shapes::AABB<4, ElementT> aabb; + aabb.minVx = hlsl::vector(0,0,0,0); + aabb.maxVx = hlsl::vector(std::numeric_limits::max(), std::numeric_limits::max(), 0, 0); + + auto retval = ICPUPolygonGeometry::SDataView{ + .composed = { + .stride = attrSize, + }, + .src = { + .offset = 0, + .size = buff->getSize(), + .buffer = std::move(buff), + } + }; + + if constexpr(std::is_same_v) + { + retval.composed.encodedDataRange.u8 = aabb; + retval.composed.format = get_uv_format(); + retval.composed.rangeFormat = IGeometryBase::EAABBFormat::U8_NORM; + } + else if constexpr(std::is_same_v) + { + retval.composed.encodedDataRange.u16 = aabb; + retval.composed.format = get_uv_format(); + retval.composed.rangeFormat = IGeometryBase::EAABBFormat::U16_NORM; + } + + return retval; +} + +template + requires(std::is_same_v || std::is_same_v) +static ICPUPolygonGeometry::SDataView createIndexView(size_t indexCount, size_t maxIndex) +{ + + const auto bytesize = sizeof(IndexT) * indexCount; + auto indices = ICPUBuffer::create({bytesize,IBuffer::EUF_INDEX_BUFFER_BIT}); + + hlsl::shapes::AABB<4,IndexT> aabb; + aabb.minVx[0] = 0; + aabb.maxVx[0] = maxIndex; + + auto retval = ICPUPolygonGeometry::SDataView{ + .composed = { + .stride = sizeof(IndexT), + }, + .src = {.offset = 0,.size = bytesize,.buffer = std::move(indices)}, + }; + + if constexpr(std::is_same_v) + { + retval.composed.encodedDataRange.u16 = aabb; + retval.composed.format = EF_R16_UINT; + retval.composed.rangeFormat = IGeometryBase::EAABBFormat::U16; + } + else if constexpr(std::is_same_v) + { + retval.composed.encodedDataRange.u32 = aabb; + retval.composed.format = EF_R32_UINT; + retval.composed.rangeFormat = IGeometryBase::EAABBFormat::U32; + } + + return retval; +} + +template + requires(ElementCountV > 0 && ElementCountV <= 4) +static ICPUPolygonGeometry::SDataView createPositionView(size_t positionCount, const hlsl::shapes::AABB<4, hlsl::float32_t>& aabb) +{ + using position_t = hlsl::vector; + constexpr auto AttrSize = sizeof(position_t); + auto buff = ICPUBuffer::create({AttrSize * positionCount,IBuffer::EUF_NONE}); + + constexpr auto format = []() + { + if constexpr (ElementCountV == 1) return EF_R32_SFLOAT; + if constexpr (ElementCountV == 2) return EF_R32G32_SFLOAT; + if constexpr (ElementCountV == 3) return EF_R32G32B32_SFLOAT; + if constexpr (ElementCountV == 4) return EF_R32G32B32A32_SFLOAT; + }(); + + return { + .composed = { + .encodedDataRange = {.f32 = aabb}, + .stride = AttrSize, + .format = format, + .rangeFormat = IGeometryBase::EAABBFormat::F32 + }, + .src = {.offset = 0,.size = buff->getSize(),.buffer = std::move(buff)} + }; +} + +static ICPUPolygonGeometry::SDataView createSnormNormalView(size_t normalCount, const hlsl::shapes::AABB<4, int8_t>& aabb) +{ + constexpr auto AttrSize = sizeof(snorm_normal_t); + auto buff = ICPUBuffer::create({AttrSize * normalCount,IBuffer::EUF_NONE}); + return { + .composed = { + .encodedDataRange = {.s8=aabb}, + .stride = AttrSize, + .format = EF_R8G8B8A8_SNORM, + .rangeFormat = IGeometryBase::EAABBFormat::S8_NORM + }, + .src = {.offset=0,.size=buff->getSize(),.buffer=std::move(buff)} + }; +} + +static void encodeUv(hlsl::vector* uvDst, hlsl::float32_t2 uvSrc) +{ + uint32_t u32_uv = hlsl::packUnorm2x16(uvSrc); + memcpy(uvDst, &u32_uv, sizeof(uint16_t) * 2); +} + core::smart_refctd_ptr CGeometryCreator::createCube(const hlsl::float32_t3 size) const { using namespace hlsl; @@ -22,13 +169,15 @@ core::smart_refctd_ptr CGeometryCreator::createCube(const h auto retval = core::make_smart_refctd_ptr(); retval->setIndexing(IPolygonGeometryBase::TriangleList()); + constexpr auto CubeUniqueVertices = 24; + // Create indices using index_t = uint16_t; { - constexpr auto IndexCount = 36u; - constexpr auto bytesize = sizeof(index_t) * IndexCount; - auto indices = ICPUBuffer::create({bytesize,IBuffer::EUF_INDEX_BUFFER_BIT}); - auto u = reinterpret_cast(indices->getPointer()); + constexpr auto IndexCount = 36; + constexpr auto MaxIndex = CubeUniqueVertices - 1; + auto indexView = createIndexView(IndexCount, MaxIndex); + auto u = reinterpret_cast(indexView.src.buffer->getPointer()); for (uint32_t i=0u; i<6u; ++i) { u[i*6+0] = 4*i+0; @@ -38,84 +187,42 @@ core::smart_refctd_ptr CGeometryCreator::createCube(const h u[i*6+4] = 4*i+2; u[i*6+5] = 4*i+3; } - shapes::AABB<4,index_t> aabb; - aabb.minVx[0] = 0; - aabb.maxVx[0] = 23; - retval->setIndexView({ - .composed = { - .encodedDataRange = {.u16=aabb}, - .stride = sizeof(index_t), - .format = EF_R16_UINT, - .rangeFormat = IGeometryBase::EAABBFormat::U16 - }, - .src = {.offset=0,.size=bytesize,.buffer=std::move(indices)} - }); + retval->setIndexView(std::move(indexView)); } - constexpr auto CubeUniqueVertices = 24; // Create vertex attributes with NONE usage because we have no clue how they'll be used hlsl::float32_t3* positions; + // for now because no reliable RGB10A2 encode and scant support for 24-bit UTB formats - hlsl::vector* normals; - hlsl::vector* uvs; + snorm_normal_t* normals; + + using uv_element_t = uint8_t; + constexpr auto UnityUV = std::numeric_limits::max(); + hlsl::vector* uvs; { { - constexpr auto AttrSize = sizeof(decltype(*positions)); - auto buff = ICPUBuffer::create({AttrSize*CubeUniqueVertices,IBuffer::EUF_NONE}); - positions = reinterpret_cast(buff->getPointer()); shapes::AABB<4,float32_t> aabb; aabb.maxVx = float32_t4(size*0.5f,0.f); - aabb.minVx = -aabb.maxVx; - retval->visitAABB([aabb](auto& ref)->void - { - ref.minVx = hlsl::trunc(aabb.minVx); - ref.maxVx = hlsl::trunc(aabb.maxVx); - } - ); - retval->setPositionView({ - .composed = { - .encodedDataRange = {.f32=aabb}, - .stride = AttrSize, - .format = EF_R32G32B32_SFLOAT, - .rangeFormat = IGeometryBase::EAABBFormat::F32 - }, - .src = {.offset=0,.size=buff->getSize(),.buffer = std::move(buff)} - }); + aabb.minVx = - aabb.maxVx; + + auto positionView = createPositionView(CubeUniqueVertices, aabb); + positions = reinterpret_cast(positionView.src.buffer->getPointer()); + retval->setPositionView(std::move(positionView)); } { - constexpr auto AttrSize = sizeof(decltype(*normals)); - auto buff = ICPUBuffer::create({AttrSize*CubeUniqueVertices,IBuffer::EUF_NONE}); - normals = reinterpret_cast(buff->getPointer()); shapes::AABB<4,int8_t> aabb; - aabb.maxVx = hlsl::vector(127,127,127,0); + aabb.maxVx = snorm_all_ones; aabb.minVx = -aabb.maxVx; - retval->setNormalView({ - .composed = { - .encodedDataRange = {.s8=aabb}, - .stride = AttrSize, - .format = EF_R8G8B8A8_SNORM, - .rangeFormat = IGeometryBase::EAABBFormat::S8_NORM - }, - .src = {.offset=0,.size=buff->getSize(),.buffer=std::move(buff)} - }); + auto normalView = createSnormNormalView(CubeUniqueVertices, aabb); + normals = reinterpret_cast(normalView.src.buffer->getPointer()); + retval->setNormalView(std::move(normalView)); } + { - constexpr auto AttrSize = sizeof(decltype(*uvs)); - auto buff = ICPUBuffer::create({AttrSize*CubeUniqueVertices,IBuffer::EUF_NONE}); - uvs = reinterpret_cast(buff->getPointer()); - shapes::AABB<4,uint8_t> aabb; - aabb.minVx = hlsl::vector(0,0,0,0); - aabb.maxVx = hlsl::vector(255,255,0,0); - retval->getAuxAttributeViews()->push_back({ - .composed = { - .encodedDataRange = {.u8=aabb}, - .stride = AttrSize, - .format = EF_R8G8_UNORM, - .rangeFormat = IGeometryBase::EAABBFormat::U8_NORM - }, - .src = {.offset=0,.size=buff->getSize(),.buffer=std::move(buff)} - }); + auto uvView = createUvView(CubeUniqueVertices); + uvs = reinterpret_cast(uvView.src.buffer->getPointer()); + retval->getAuxAttributeViews()->push_back(std::move(uvView)); } } @@ -160,30 +267,31 @@ core::smart_refctd_ptr CGeometryCreator::createCube(const h // { - const hlsl::vector norm[6] = + const snorm_normal_t norm[6] = { - hlsl::vector(0, 0, 127), - hlsl::vector(127, 0, 0), - hlsl::vector(0, 0,-127), - hlsl::vector(-127, 0, 0), - hlsl::vector(0, 127, 0), - hlsl::vector(0,-127, 0) + snorm_positive_z, + snorm_positive_x, + snorm_negative_z, + snorm_negative_x, + snorm_positive_y, + snorm_negative_y }; - const hlsl::vector uv[4] = + const hlsl::vector uv[4] = { - hlsl::vector( 0,255), - hlsl::vector(255,255), - hlsl::vector(255, 0), - hlsl::vector( 0, 0) + hlsl::vector( 0, UnityUV), + hlsl::vector(UnityUV, UnityUV), + hlsl::vector(UnityUV, 0), + hlsl::vector( 0, 0) }; - for (size_t f=0ull; f<6ull; ++f) + + for (size_t f = 0ull; f < 6ull; ++f) { - const size_t v = f*4ull; + const size_t v = f * 4ull; - for (size_t i=0ull; i<4ull; ++i) + for (size_t i = 0ull; i < 4ull; ++i) { - normals[v+i] = vector(norm[f],0); - uvs[v+i] = uv[i]; + normals[v + i] = snorm_normal_t(norm[f]); + uvs[v + i] = uv[i]; } } } @@ -192,121 +300,12 @@ core::smart_refctd_ptr CGeometryCreator::createCube(const h return retval; } -#if 0 - -/* - a cylinder, a cone and a cross - point up on (0,1.f, 0.f ) -*/ -core::smart_refctd_ptr CGeometryCreator::createArrow( - const uint32_t tesselationCylinder, - const uint32_t tesselationCone, - const float height, - const float cylinderHeight, - const float width0, - const float width1, - const video::SColor vtxColor0, - const video::SColor vtxColor1 -) const +core::smart_refctd_ptr CGeometryCreator::createSphere(float radius, + uint32_t polyCountX, uint32_t polyCountY, CQuantNormalCache* const quantNormalCacheOverride) const { - assert(height > cylinderHeight); - - auto cylinder = createCylinderMesh(width0, cylinderHeight, tesselationCylinder, vtxColor0); - auto cone = createConeMesh(width1, height-cylinderHeight, tesselationCone, vtxColor1, vtxColor1); - - auto cylinderVertices = reinterpret_cast(cylinder.bindings[0].buffer->getPointer()); - auto coneVertices = reinterpret_cast(cone.bindings[0].buffer->getPointer()); - - auto cylinderIndecies = reinterpret_cast(cylinder.indexBuffer.buffer->getPointer()); - auto coneIndecies = reinterpret_cast(cone.indexBuffer.buffer->getPointer()); - - const auto cylinderVertexCount = cylinder.bindings[0].buffer->getSize() / sizeof(CylinderVertex); - const auto coneVertexCount = cone.bindings[0].buffer->getSize() / sizeof(ConeVertex); - const auto newArrowVertexCount = cylinderVertexCount + coneVertexCount; - - const auto cylinderIndexCount = cylinder.indexBuffer.buffer->getSize() / sizeof(uint16_t); - const auto coneIndexCount = cone.indexBuffer.buffer->getSize() / sizeof(uint16_t); - const auto newArrowIndexCount = cylinderIndexCount + coneIndexCount; - - for (auto i = 0ull; i < coneVertexCount; ++i) - { - core::vector3df_SIMD newPos = coneVertices[i].pos; - newPos.rotateYZByRAD(-1.5707963268); - - for (auto c = 0; c < 3; ++c) - coneVertices[i].pos[c] = newPos[c]; - } - - auto newArrowVertexBuffer = asset::ICPUBuffer::create({ newArrowVertexCount * sizeof(ArrowVertex) }); - newArrowVertexBuffer->setUsageFlags(newArrowVertexBuffer->getUsageFlags() | asset::IBuffer::EUF_VERTEX_BUFFER_BIT); - auto newArrowIndexBuffer = asset::ICPUBuffer::create({ newArrowIndexCount * sizeof(uint16_t) }); - newArrowIndexBuffer->setUsageFlags(newArrowIndexBuffer->getUsageFlags() | asset::IBuffer::EUF_INDEX_BUFFER_BIT); - - for (auto z = 0ull; z < newArrowVertexCount; ++z) - { - auto arrowVertex = reinterpret_cast(newArrowVertexBuffer->getPointer()) + z; - - if (z < cylinderVertexCount) - { - auto cylinderVertex = (cylinderVertices + z); - memcpy(arrowVertex, cylinderVertex, sizeof(ArrowVertex)); - } - else - { - auto coneVertex = (coneVertices + z - cylinderVertexCount); - memcpy(arrowVertex, coneVertex, offsetof(ConeVertex, normal)); // copy position and color - arrowVertex->uv[0] = 0; - arrowVertex->uv[1] = 0; - arrowVertex->normal = coneVertex->normal; - } - } - - { - auto ArrowIndices = reinterpret_cast(newArrowIndexBuffer->getPointer()); - auto newConeIndices = (ArrowIndices + cylinderIndexCount); - - memcpy(ArrowIndices, cylinderIndecies, sizeof(uint16_t) * cylinderIndexCount); - memcpy(newConeIndices, coneIndecies, sizeof(uint16_t) * coneIndexCount); - - for (auto i = 0ull; i < coneIndexCount; ++i) - *(newConeIndices + i) += cylinderVertexCount; - } - - return_type arrow; - - constexpr size_t vertexSize = sizeof(ArrowVertex); - arrow.inputParams = - { 0b1111u,0b1u, - { - {0u,EF_R32G32B32_SFLOAT,offsetof(ArrowVertex,pos)}, - {0u,EF_R8G8B8A8_UNORM,offsetof(ArrowVertex,color)}, - {0u,EF_R32G32_SFLOAT,offsetof(ArrowVertex,uv)}, - {0u,EF_A2B10G10R10_SNORM_PACK32,offsetof(ArrowVertex,normal)} - }, - {vertexSize,SVertexInputBindingParams::EVIR_PER_VERTEX} - }; - - arrow.bindings[0] = { 0, std::move(newArrowVertexBuffer) }; - arrow.indexBuffer = { 0, std::move(newArrowIndexBuffer) }; - arrow.indexCount = newArrowIndexCount; - arrow.indexType = EIT_16BIT; - - return arrow; -} + using namespace hlsl; -/* A sphere with proper normals and texture coords */ -core::smart_refctd_ptr CGeometryCreator::createSphere(float radius, uint32_t polyCountX, uint32_t polyCountY, IMeshManipulator* const meshManipulatorOverride) const -{ - // we are creating the sphere mesh here. - return_type retval; - constexpr size_t vertexSize = sizeof(CGeometryCreator::SphereVertex); - CQuantNormalCache* const quantNormalCache = (meshManipulatorOverride == nullptr) ? defaultMeshManipulator->getQuantNormalCache() : meshManipulatorOverride->getQuantNormalCache(); - retval.inputParams = { 0b1111u,0b1u,{ - {0u,EF_R32G32B32_SFLOAT,offsetof(SphereVertex,pos)}, - {0u,EF_R8G8B8A8_UNORM,offsetof(SphereVertex,color)}, - {0u,EF_R32G32_SFLOAT,offsetof(SphereVertex,uv)}, - {0u,EF_A2B10G10R10_SNORM_PACK32,offsetof(SphereVertex,normal)} - },{vertexSize,SVertexInputBindingParams::EVIR_PER_VERTEX} }; + CQuantNormalCache* const quantNormalCache = quantNormalCacheOverride == nullptr ? m_params.normalCache.get() : quantNormalCacheOverride; if (polyCountX < 2) polyCountX = 2; @@ -314,15 +313,21 @@ core::smart_refctd_ptr CGeometryCreator::createSphere(float polyCountY = 2; const uint32_t polyCountXPitch = polyCountX + 1; // get to same vertex on next level + const size_t vertexCount = (polyCountXPitch * polyCountY) + 2; - retval.indexCount = (polyCountX * polyCountY) * 6; - auto indices = asset::ICPUBuffer::create({ sizeof(uint32_t) * retval.indexCount }); + auto retval = core::make_smart_refctd_ptr(); + retval->setIndexing(IPolygonGeometryBase::TriangleList()); // Create indices { + using index_t = uint32_t; + + const auto indexCount = (polyCountX * polyCountY) * 6; + auto indexView = createIndexView(indexCount, vertexCount - 1); + auto indexPtr = reinterpret_cast(indexView.src.buffer->getPointer()); + uint32_t level = 0; size_t indexAddIx = 0; - uint32_t* indexPtr = (uint32_t*)indices->getPointer(); for (uint32_t p1 = 0; p1 < polyCountY - 1; ++p1) { //main quads, top to bottom @@ -378,23 +383,48 @@ core::smart_refctd_ptr CGeometryCreator::createSphere(float indexPtr[indexAddIx++] = polyCountSqM1 + polyCountX - 1; indexPtr[indexAddIx++] = polyCountSqM1; indexPtr[indexAddIx++] = polyCountSq1; + + retval->setIndexView(std::move(indexView)); + } - indices->setUsageFlags(indices->getUsageFlags() | asset::IBuffer::EUF_INDEX_BUFFER_BIT); - retval.indexBuffer = {0ull, std::move(indices)}; - // handle vertices + constexpr auto NormalCacheFormat = EF_R8G8B8_SNORM; + + // Create vertex attributes with NONE usage because we have no clue how they'll be used + hlsl::float32_t3* positions; + + snorm_normal_t* normals; + + using uv_element_t = uint16_t; + constexpr auto UnityUV = std::numeric_limits::max(); + + hlsl::vector* uvs; { - size_t vertexSize = 3 * 4 + 4 + 2 * 4 + 4; - size_t vertexCount = (polyCountXPitch * polyCountY) + 2; - auto vtxBuf = asset::ICPUBuffer::create({ vertexCount * vertexSize }); - auto* tmpMem = reinterpret_cast(vtxBuf->getPointer()); - for (size_t i = 0; i < vertexCount; i++) { - tmpMem[i * vertexSize + 3 * 4 + 0] = 255; - tmpMem[i * vertexSize + 3 * 4 + 1] = 255; - tmpMem[i * vertexSize + 3 * 4 + 2] = 255; - tmpMem[i * vertexSize + 3 * 4 + 3] = 255; + shapes::AABB<4, float32_t> aabb; + aabb.maxVx = float32_t4(radius, radius, radius, 0.0f); + aabb.minVx = float32_t4(-radius, -radius, -radius, 0.0f); + auto positionView = createPositionView(vertexCount, aabb); + positions = reinterpret_cast(positionView.src.buffer->getPointer()); + retval->setPositionView(std::move(positionView)); + } + { + shapes::AABB<4, int8_t> aabb; + aabb.maxVx = snorm_all_ones; + aabb.minVx = -aabb.maxVx; + auto normalView = createSnormNormalView(vertexCount, aabb); + normals = reinterpret_cast(normalView.src.buffer->getPointer()); + retval->setNormalView(std::move(normalView)); } + { + auto uvView = createUvView(vertexCount); + uvs = reinterpret_cast(uvView.src.buffer->getPointer()); + retval->getAuxAttributeViews()->push_back(std::move(uvView)); + } + } + + // fill vertices + { // calculate the angle which separates all points in a circle const float AngleX = 2 * core::PI() / polyCountX; const float AngleY = core::PI() / polyCountY; @@ -404,9 +434,7 @@ core::smart_refctd_ptr CGeometryCreator::createSphere(float // we don't start at 0. double ay = 0;//AngleY / 2; - - using quant_normal_t = CQuantNormalCache::value_type_t; - uint8_t* tmpMemPtr = tmpMem; + auto vertex_i = 0; for (uint32_t y = 0; y < polyCountY; ++y) { ay += AngleY; @@ -414,7 +442,7 @@ core::smart_refctd_ptr CGeometryCreator::createSphere(float axz = 0; // calculate the necessary vertices without the doubled one - uint8_t* oldTmpMemPtr = tmpMemPtr; + const auto old_vertex_i = vertex_i; for (uint32_t xz = 0; xz < polyCountX; ++xz) { // calculate points position @@ -423,9 +451,8 @@ core::smart_refctd_ptr CGeometryCreator::createSphere(float static_cast(cos(ay)), static_cast(sin(axz) * sinay)); // for spheres the normal is the position - core::vectorSIMDf normal(&pos.X); - normal.makeSafe3D(); - quant_normal_t quantizedNormal = quantNormalCache->quantize(normal); + const auto normal = pos; + const auto quantizedNormal = quantNormalCache->quantize(normal); pos *= radius; // calculate texture coordinates via sphere mapping @@ -433,229 +460,235 @@ core::smart_refctd_ptr CGeometryCreator::createSphere(float float tu = 0.5f; //if (y==0) //{ - if (normal.Y != -1.0f && normal.Y != 1.0f) - tu = static_cast(acos(core::clamp(normal.X / sinay, -1.0, 1.0)) * 0.5 * core::RECIPROCAL_PI()); - if (normal.Z < 0.0f) + if (normal.y != -1.0f && normal.y != 1.0f) + tu = static_cast(acos(core::clamp(normal.x / sinay, -1.0, 1.0)) * 0.5 * numbers::inv_pi); + if (normal.z < 0.0f) tu = 1 - tu; //} //else //tu = ((float*)(tmpMem+(i-polyCountXPitch)*vertexSize))[4]; - ((float*)tmpMemPtr)[0] = pos.X; - ((float*)tmpMemPtr)[1] = pos.Y; - ((float*)tmpMemPtr)[2] = pos.Z; - ((float*)tmpMemPtr)[4] = tu; - ((float*)tmpMemPtr)[5] = static_cast(ay * core::RECIPROCAL_PI()); - ((quant_normal_t*)tmpMemPtr)[6] = quantizedNormal; - static_assert(sizeof(quant_normal_t)==4u); + positions[vertex_i] = pos; + encodeUv(uvs + vertex_i, float32_t2(tu, static_cast(ay* numbers::inv_pi))); + memcpy(normals + vertex_i, &quantizedNormal, sizeof(quantizedNormal)); - tmpMemPtr += vertexSize; + vertex_i++; axz += AngleX; } // This is the doubled vertex on the initial position - ((float*)tmpMemPtr)[0] = ((float*)oldTmpMemPtr)[0]; - ((float*)tmpMemPtr)[1] = ((float*)oldTmpMemPtr)[1]; - ((float*)tmpMemPtr)[2] = ((float*)oldTmpMemPtr)[2]; - ((float*)tmpMemPtr)[4] = 1.f; - ((float*)tmpMemPtr)[5] = ((float*)oldTmpMemPtr)[5]; - ((uint32_t*)tmpMemPtr)[6] = ((uint32_t*)oldTmpMemPtr)[6]; - tmpMemPtr += vertexSize; + positions[vertex_i] = positions[old_vertex_i]; + uvs[vertex_i] = { UnityUV, uvs[old_vertex_i].y }; + normals[vertex_i] = normals[old_vertex_i]; + + vertex_i++; } // the vertex at the top of the sphere - ((float*)tmpMemPtr)[0] = 0.f; - ((float*)tmpMemPtr)[1] = radius; - ((float*)tmpMemPtr)[2] = 0.f; - ((float*)tmpMemPtr)[4] = 0.5f; - ((float*)tmpMemPtr)[5] = 0.f; - ((quant_normal_t*)tmpMemPtr)[6] = quantNormalCache->quantize(core::vectorSIMDf(0.f, 1.f, 0.f)); + positions[vertex_i] = { 0.f, radius, 0.f }; + uvs[vertex_i] = { 0, UnityUV / 2}; + const auto quantizedTopNormal = quantNormalCache->quantize(hlsl::float32_t3(0.f, 1.f, 0.f)); + memcpy(normals + vertex_i, &quantizedTopNormal, sizeof(quantizedTopNormal)); // the vertex at the bottom of the sphere - tmpMemPtr += vertexSize; - ((float*)tmpMemPtr)[0] = 0.f; - ((float*)tmpMemPtr)[1] = -radius; - ((float*)tmpMemPtr)[2] = 0.f; - ((float*)tmpMemPtr)[4] = 0.5f; - ((float*)tmpMemPtr)[5] = 1.f; - ((quant_normal_t*)tmpMemPtr)[6] = quantNormalCache->quantize(core::vectorSIMDf(0.f, -1.f, 0.f)); - - // recalculate bounding box - core::aabbox3df BoundingBox; - BoundingBox.reset(float32_t3(radius)); - BoundingBox.addInternalPoint(-radius, -radius, -radius); - - // set vertex buffer - vtxBuf->setUsageFlags(vtxBuf->getUsageFlags() | asset::IBuffer::EUF_VERTEX_BUFFER_BIT); - retval.bindings[0] = { 0ull,std::move(vtxBuf) }; - retval.indexType = asset::EIT_32BIT; - retval.bbox = BoundingBox; + vertex_i++; + positions[vertex_i] = { 0.f, -radius, 0.f }; + uvs[vertex_i] = { UnityUV / 2, UnityUV}; + const auto quantizedBottomNormal = quantNormalCache->quantize(hlsl::float32_t3(0.f, -1.f, 0.f)); + memcpy(normals + vertex_i, &quantizedBottomNormal, sizeof(quantizedBottomNormal)); } + CPolygonGeometryManipulator::recomputeContentHashes(retval.get()); return retval; } -/* A cylinder with proper normals and texture coords */ core::smart_refctd_ptr CGeometryCreator::createCylinder( float radius, float length, - uint32_t tesselation, const video::SColor& color, IMeshManipulator* const meshManipulatorOverride -) const + uint16_t tesselation, CQuantNormalCache* const quantNormalCacheOverride) const { - return_type retval; - constexpr size_t vertexSize = sizeof(CGeometryCreator::CylinderVertex); - CQuantNormalCache* const quantNormalCache = (meshManipulatorOverride == nullptr) ? defaultMeshManipulator->getQuantNormalCache() : meshManipulatorOverride->getQuantNormalCache(); - retval.inputParams = { 0b1111u,0b1u,{ - {0u,EF_R32G32B32_SFLOAT,offsetof(CylinderVertex,pos)}, - {0u,EF_R8G8B8A8_UNORM,offsetof(CylinderVertex,color)}, - {0u,EF_R32G32_SFLOAT,offsetof(CylinderVertex,uv)}, - {0u,EF_A2B10G10R10_SNORM_PACK32,offsetof(CylinderVertex,normal)} - },{vertexSize,SVertexInputBindingParams::EVIR_PER_VERTEX} }; - - const size_t vtxCnt = 2u*tesselation; - auto vtxBuf = asset::ICPUBuffer::create({ vtxCnt*sizeof(CylinderVertex) }); - - CylinderVertex* vertices = reinterpret_cast(vtxBuf->getPointer()); - for (auto i=0ull; i(tesselation); - const float step = 2.f*core::PI()*tesselationRec; - for (uint32_t i = 0u; iquantize(core::normalize(p)); - - memcpy(vertices[i].pos, p.pointer, 12u); - vertices[i].normal = n; - memcpy(vertices[i].color, glcolor, 4u); - vertices[i].uv[0] = float(i) * tesselationRec; - - vertices[i+halfIx] = vertices[i]; - vertices[i+halfIx].pos[2] = length; - vertices[i+halfIx].uv[1] = 1.f; - } - - constexpr uint32_t rows = 2u; - retval.indexCount = rows * 3u * tesselation; - auto idxBuf = asset::ICPUBuffer::create({ retval.indexCount *sizeof(uint16_t) }); - uint16_t* indices = (uint16_t*)idxBuf->getPointer(); - - for (uint32_t i = 0u, j = 0u; i < halfIx; ++i) - { - indices[j++] = i; - indices[j++] = (i+1u)!=halfIx ? (i+1u):0u; - indices[j++] = i+halfIx; - indices[j++] = i+halfIx; - indices[j++] = (i+1u)!=halfIx ? (i+1u):0u; - indices[j++] = (i+1u)!=halfIx ? (i+1u+halfIx):halfIx; - } - - // set vertex buffer - idxBuf->setUsageFlags(idxBuf->getUsageFlags() | asset::IBuffer::EUF_INDEX_BUFFER_BIT); - retval.indexBuffer = { 0ull, std::move(idxBuf) }; - vtxBuf->setUsageFlags(vtxBuf->getUsageFlags() | asset::IBuffer::EUF_VERTEX_BUFFER_BIT); - retval.bindings[0] = { 0ull, std::move(vtxBuf) }; - retval.indexType = asset::EIT_16BIT; - //retval.bbox = ?; + using namespace hlsl; + + CQuantNormalCache* const quantNormalCache = quantNormalCacheOverride == nullptr ? m_params.normalCache.get() : quantNormalCacheOverride; + + const auto halfIx = tesselation; + const uint32_t u32_vertexCount = 2 * tesselation; + if (u32_vertexCount > std::numeric_limits::max()) + return nullptr; + const auto vertexCount = static_cast(u32_vertexCount); + auto retval = core::make_smart_refctd_ptr(); + retval->setIndexing(IPolygonGeometryBase::TriangleList()); + + // Create indices + using index_t = uint16_t; + { + constexpr uint32_t RowCount = 2u; + const auto IndexCount = RowCount * 3 * tesselation; + auto indexView = createIndexView(IndexCount, vertexCount - 1); + auto u = reinterpret_cast(indexView.src.buffer->getPointer()); + + for (uint16_t i = 0u, j = 0u; i < halfIx; ++i) + { + u[j++] = i; + u[j++] = (i + 1u) != halfIx ? (i + 1u):0u; + u[j++] = i + halfIx; + u[j++] = i + halfIx; + u[j++] = (i + 1u)!= halfIx ? (i + 1u):0u; + u[j++] = (i + 1u)!= halfIx ? (i + 1u + halfIx) : halfIx; + } + + retval->setIndexView(std::move(indexView)); + } + + constexpr auto NormalCacheFormat = EF_R8G8B8_SNORM; + + // Create vertex attributes with NONE usage because we have no clue how they'll be used + hlsl::float32_t3* positions; + + snorm_normal_t* normals; + + using uv_element_t = uint16_t; + constexpr auto UnityUV = std::numeric_limits::max(); + hlsl::vector* uvs; + { + { + shapes::AABB<4, float32_t> aabb; + aabb.maxVx = float32_t4(radius, radius, length, 0.0f); + aabb.minVx = float32_t4(-radius, -radius, 0.0f, 0.0f); + auto positionView = createPositionView(vertexCount, aabb); + positions = reinterpret_cast(positionView.src.buffer->getPointer()); + retval->setPositionView(std::move(positionView)); + } + { + shapes::AABB<4, int8_t> aabb; + aabb.maxVx = hlsl::vector(127,127,127,0); + aabb.minVx = -aabb.maxVx; + auto normalView = createSnormNormalView(vertexCount, aabb); + normals = reinterpret_cast(normalView.src.buffer->getPointer()); + retval->setNormalView(std::move(normalView)); + } + { + auto uvView = createUvView(vertexCount); + uvs = reinterpret_cast(uvView.src.buffer->getPointer()); + retval->getAuxAttributeViews()->push_back(std::move(uvView)); + } + } + + const float tesselationRec = 1.f / static_cast(tesselation); + const float step = 2.f * numbers::pi * tesselationRec; + for (uint32_t i = 0u; i < tesselation; ++i) + { + const auto f_i = static_cast(i); + hlsl::float32_t3 p(std::cos(f_i * step), std::sin(f_i * step), 0.f); + const auto n = quantNormalCache->quantize(p); + p *= radius; + + positions[i] = { p.x, p.y, p.z }; + memcpy(normals + i, &n, sizeof(n)); + encodeUv(uvs + i, float32_t2(f_i * tesselationRec, 0.f)); + + positions[i + halfIx] = { p.x, p.y, length }; + normals[i + halfIx] = normals[i]; + uvs[i + halfIx] = { 1.f * tesselationRec, UnityUV }; + } + + CPolygonGeometryManipulator::recomputeContentHashes(retval.get()); return retval; } -/* A cone with proper normals and texture coords */ core::smart_refctd_ptr CGeometryCreator::createCone( - float radius, float length, uint32_t tesselation, - const video::SColor& colorTop, - const video::SColor& colorBottom, - float oblique, - IMeshManipulator* const meshManipulatorOverride -) const + float radius, float length, uint16_t tesselation, + float oblique, CQuantNormalCache* const quantNormalCacheOverride) const { - const size_t vtxCnt = tesselation * 2; - auto vtxBuf = asset::ICPUBuffer::create({ vtxCnt * sizeof(ConeVertex) }); - ConeVertex* vertices = reinterpret_cast(vtxBuf->getPointer()); - - ConeVertex* baseVertices = vertices; - ConeVertex* apexVertices = vertices + tesselation; - std::fill(vertices,vertices+vtxCnt, ConeVertex(core::vectorSIMDf(0.f),{},colorBottom)); - CQuantNormalCache* const quantNormalCache = (meshManipulatorOverride == nullptr) ? defaultMeshManipulator->getQuantNormalCache() : meshManipulatorOverride->getQuantNormalCache(); + using namespace hlsl; - const float step = (2.f*core::PI()) / tesselation; + const uint32_t u32_vertexCount = tesselation + 1; + if (u32_vertexCount > std::numeric_limits::max()) + return nullptr; + const auto vertexCount = static_cast(u32_vertexCount); - const core::vectorSIMDf apexVertexCoords(oblique, length, 0.0f); + auto retval = core::make_smart_refctd_ptr(); + retval->setIndexing(IPolygonGeometryBase::TriangleList()); - //vertex positions - for (uint32_t i = 0u; i < tesselation; i++) + // Create indices + using index_t = uint16_t; { - core::vectorSIMDf v(std::cos(i * step), 0.0f, std::sin(i * step), 0.0f); - v *= radius; + const auto IndexCount = 3 * tesselation; + + auto indexView = createIndexView(IndexCount, vertexCount - 1); + auto u = reinterpret_cast(indexView.src.buffer->getPointer()); + + const uint32_t apexVertexIndex = tesselation; + + for (uint32_t i = 0; i < tesselation; i++) + { + u[i * 3] = apexVertexIndex; + u[(i * 3) + 1] = i; + u[(i * 3) + 2] = i == (tesselation - 1) ? 0 : i + 1; + } - memcpy(baseVertices[i].pos, v.pointer, sizeof(float) * 3); - memcpy(apexVertices[i].pos, apexVertexCoords.pointer, sizeof(float) * 3); + retval->setIndexView(std::move(indexView)); } - //vertex normals - for (uint32_t i = 0; i < tesselation; i++) + // Create vertex attributes with NONE usage because we have no clue how they'll be used + hlsl::float32_t3* positions; { - const core::vectorSIMDf v0ToApex = apexVertexCoords - core::vectorSIMDf(vertices[i].pos[0], vertices[i].pos[1], vertices[i].pos[2]); - - uint32_t nextVertexIndex = i == (tesselation - 1) ? 0 : i + 1; - core::vectorSIMDf u1 = core::vectorSIMDf(baseVertices[nextVertexIndex].pos[0], baseVertices[nextVertexIndex].pos[1], baseVertices[nextVertexIndex].pos[2]); - u1 -= core::vectorSIMDf(baseVertices[i].pos[0], baseVertices[i].pos[1], baseVertices[i].pos[2]); - float angleWeight = std::acos(core::dot(core::normalize(apexVertexCoords), core::normalize(u1)).x); - u1 = core::normalize(core::cross(v0ToApex, u1)) * angleWeight; - - uint32_t prevVertexIndex = i == 0 ? (tesselation - 1) : i - 1; - core::vectorSIMDf u2 = core::vectorSIMDf(baseVertices[prevVertexIndex].pos[0], baseVertices[prevVertexIndex].pos[1], baseVertices[prevVertexIndex].pos[2]); - u2 -= core::vectorSIMDf(baseVertices[i].pos[0], baseVertices[i].pos[1], baseVertices[i].pos[2]); - angleWeight = std::acos(core::dot(core::normalize(apexVertexCoords), core::normalize(u2)).x); - u2 = core::normalize(core::cross(u2, v0ToApex)) * angleWeight; - - baseVertices[i].normal = quantNormalCache->quantize(core::normalize(u1 + u2)); - apexVertices[i].normal = quantNormalCache->quantize(core::normalize(u1)); + { + shapes::AABB<4, float32_t> aabb; + aabb.maxVx = float32_t4(radius, radius, length, 0.0f); + aabb.minVx = float32_t4(-radius, -radius, 0.0f, 0.0f); + auto positionView = createPositionView(vertexCount, aabb); + positions = reinterpret_cast(positionView.src.buffer->getPointer()); + retval->setPositionView(std::move(positionView)); + } } - auto idxBuf = asset::ICPUBuffer::create({ 3u * tesselation * sizeof(uint16_t) }); - uint16_t* indices = (uint16_t*)idxBuf->getPointer(); + const float step = (2.f*core::PI()) / tesselation; + + const hlsl::float32_t3 apexVertexCoords(oblique, length, 0.0f); - const uint32_t firstIndexOfBaseVertices = 0; - const uint32_t firstIndexOfApexVertices = tesselation; - for (uint32_t i = 0; i < tesselation; i++) + const auto apexVertexBase_i = tesselation; + + for (uint32_t i = 0u; i < tesselation; i++) { - indices[i * 3] = firstIndexOfApexVertices + i; - indices[(i * 3) + 1] = firstIndexOfBaseVertices + i; - indices[(i * 3) + 2] = i == (tesselation - 1) ? firstIndexOfBaseVertices : firstIndexOfBaseVertices + i + 1; + hlsl::float32_t3 v(std::cos(i * step), 0.0f, std::sin(i * step)); + v *= radius; + positions[i] = v; } + positions[apexVertexBase_i] = apexVertexCoords; - return_type cone; - - constexpr size_t vertexSize = sizeof(ConeVertex); - cone.inputParams = - { 0b111u,0b1u, - { - {0u,EF_R32G32B32_SFLOAT,offsetof(ConeVertex,pos)}, - {0u,EF_R8G8B8A8_UNORM,offsetof(ConeVertex,color)}, - {0u,EF_A2B10G10R10_SNORM_PACK32,offsetof(ConeVertex,normal)} - }, - {vertexSize,SVertexInputBindingParams::EVIR_PER_VERTEX} - }; + CPolygonGeometryManipulator::recomputeContentHashes(retval.get()); + return retval; +} - vtxBuf->addUsageFlags(asset::IBuffer::EUF_VERTEX_BUFFER_BIT); - cone.bindings[0] = { 0, std::move(vtxBuf) }; - idxBuf->addUsageFlags(asset::IBuffer::EUF_INDEX_BUFFER_BIT); - cone.indexBuffer = { 0, std::move(idxBuf) }; - cone.indexCount = cone.indexBuffer.buffer->getSize() / sizeof(uint16_t); - cone.indexType = EIT_16BIT; +core::smart_refctd_ptr CGeometryCreator::createArrow( + const uint16_t tesselationCylinder, + const uint16_t tesselationCone, + const float height, + const float cylinderHeight, + const float width0, + const float width1 +) const +{ + assert(height > cylinderHeight); + + auto cylinder = createCylinder(width0, cylinderHeight, tesselationCylinder); + auto cone = createCone(width1, height-cylinderHeight, tesselationCone); + + auto collection = core::make_smart_refctd_ptr(); + auto* geometries = collection->getGeometries(); + geometries->push_back({ + .geometry = cylinder + }); + const auto coneTransform = hlsl::math::linalg::rotation_mat(hlsl::numbers::pi * -0.5f, hlsl::float32_t3(1.f, 0.f, 0.f)); + geometries->push_back({ + .transform = hlsl::float32_t3x4(coneTransform), + .geometry = cone + }); + return collection; - return cone; } -#endif core::smart_refctd_ptr CGeometryCreator::createRectangle(const hlsl::float32_t2 size) const { @@ -673,94 +706,56 @@ core::smart_refctd_ptr CGeometryCreator::createRectangle(co 3---2 */ const index_t indices[] = {0,3,1,1,3,2}; - auto buffer = ICPUBuffer::create({ - {sizeof(indices),IBuffer::EUF_INDEX_BUFFER_BIT}, - const_cast((const void*)indices) // TODO: temporary till two different creation params (adopting needs non const void, copying needs const void only - }); - shapes::AABB<4,index_t> aabb; - aabb.minVx[0] = 0; - aabb.maxVx[0] = 3; - retval->setIndexView({ - .composed = { - .encodedDataRange = {.u16=aabb}, - .stride = sizeof(index_t), - .format = EF_R16_UINT, - .rangeFormat = IGeometryBase::EAABBFormat::U16 - }, - .src = {.offset=0,.size=buffer->getSize(),.buffer=std::move(buffer)} - }); + auto indexView = createIndexView(std::size(indices), 3); + memcpy(indexView.src.buffer->getPointer(), indices, sizeof(indices)); + retval->setIndexView(std::move(indexView)); } + constexpr auto VertexCount = 4; // Create vertices { { - const hlsl::float32_t2 positions[] = { + const hlsl::float32_t2 positions[VertexCount] = { hlsl::float32_t2(-size.x, size.y), hlsl::float32_t2( size.x, size.y), hlsl::float32_t2( size.x,-size.y), hlsl::float32_t2(-size.x,-size.y) }; - auto buff = ICPUBuffer::create({{sizeof(positions),IBuffer::EUF_NONE},(void*)positions}); shapes::AABB<4,float32_t> aabb; aabb.minVx = float32_t4(-size,0.f,0.f); aabb.maxVx = float32_t4( size,0.f,0.f); - retval->visitAABB([aabb](auto& ref)->void - { - ref.minVx = hlsl::trunc(aabb.minVx); - ref.maxVx = hlsl::trunc(aabb.maxVx); - } - ); - retval->setPositionView({ - .composed = { - .encodedDataRange = {.f32=aabb}, - .stride = sizeof(positions[0]), - .format = EF_R32G32_SFLOAT, - .rangeFormat = IGeometryBase::EAABBFormat::F32 - }, - .src = {.offset=0,.size=buff->getSize(),.buffer = std::move(buff)} - }); + auto positionView = createPositionView<2>(VertexCount, aabb); + memcpy(positionView.src.buffer->getPointer(), positions, sizeof(positions)); + retval->setPositionView(std::move(positionView)); } { - const hlsl::vector normals[] = { - hlsl::vector(0,0,127,0), - hlsl::vector(0,0,127,0), - hlsl::vector(0,0,127,0), - hlsl::vector(0,0,127,0) + const hlsl::vector normals[VertexCount] = { + snorm_positive_z, + snorm_positive_z, + snorm_positive_z, + snorm_positive_z, }; - auto buff = ICPUBuffer::create({{sizeof(normals),IBuffer::EUF_NONE},(void*)normals}); shapes::AABB<4,int8_t> aabb; - aabb.maxVx = hlsl::vector(0,0,127,0); - aabb.minVx = -aabb.maxVx; - retval->setNormalView({ - .composed = { - .encodedDataRange = {.s8=aabb}, - .stride = sizeof(normals[0]), - .format = EF_R8G8B8A8_SNORM, - .rangeFormat = IGeometryBase::EAABBFormat::S8_NORM - }, - .src = {.offset=0,.size=buff->getSize(),.buffer=std::move(buff)} - }); + aabb.maxVx = snorm_positive_z; + aabb.minVx = snorm_normal_t(0, 0, 0, 0); + auto normalView = createSnormNormalView(VertexCount, aabb); + memcpy(normalView.src.buffer->getPointer(), normals, sizeof(normals)); + retval->setNormalView(std::move(normalView)); } { - const hlsl::vector uvs[] = { - hlsl::vector( 0,255), - hlsl::vector(255,255), - hlsl::vector(255, 0), - hlsl::vector( 0, 0) + using uv_element_t = uint8_t; + constexpr auto MaxUvVal = std::numeric_limits::max(); + const hlsl::vector uvsData[VertexCount] = { + hlsl::vector( 0, MaxUvVal), + hlsl::vector(MaxUvVal, MaxUvVal), + hlsl::vector(MaxUvVal, 0), + hlsl::vector( 0, 0) }; - auto buff = ICPUBuffer::create({{sizeof(uvs),IBuffer::EUF_NONE},(void*)uvs}); - shapes::AABB<4,uint8_t> aabb; - aabb.minVx = hlsl::vector(0,0,0,0); - aabb.maxVx = hlsl::vector(255,255,0,0); - retval->getAuxAttributeViews()->push_back({ - .composed = { - .encodedDataRange = {.u8=aabb}, - .stride = sizeof(uvs[0]), - .format = EF_R8G8_UNORM, - .rangeFormat = IGeometryBase::EAABBFormat::U8_NORM - }, - .src = {.offset=0,.size=buff->getSize(),.buffer=std::move(buff)} - }); + hlsl::vector* uvs; + auto uvView = createUvView(VertexCount); + uvs = reinterpret_cast(uvView.src.buffer->getPointer()); + memcpy(uvs, uvsData, sizeof(uvsData)); + retval->getAuxAttributeViews()->push_back(std::move(uvView)); } } @@ -783,68 +778,36 @@ core::smart_refctd_ptr CGeometryCreator::createDisk(const f const size_t vertexCount = 2u + tesselation; float32_t2* positions; + // for now because no reliable RGB10A2 encode and scant support for 24-bit UTB formats - hlsl::vector* normals; + snorm_normal_t* normals; // - constexpr uint16_t UnityUV = 0xffffu; - uint16_t2* uvs; + using uv_element_t = uint16_t; + constexpr uint16_t UnityUV = std::numeric_limits::max(); + hlsl::vector* uvs; { { - constexpr auto AttrSize = sizeof(decltype(*positions)); - auto buff = ICPUBuffer::create({AttrSize*vertexCount,IBuffer::EUF_NONE}); - positions = reinterpret_cast(buff->getPointer()); shapes::AABB<4,float32_t> aabb; - aabb.maxVx = float32_t4(radius,radius,0.f,0.f); + aabb.maxVx = float32_t4(radius,radius, 0.f, 0.f); aabb.minVx = -aabb.maxVx; - retval->visitAABB([aabb](auto& ref)->void - { - ref.minVx = hlsl::trunc(aabb.minVx); - ref.maxVx = hlsl::trunc(aabb.maxVx); - } - ); - retval->setPositionView({ - .composed = { - .encodedDataRange = {.f32=aabb}, - .stride = AttrSize, - .format = EF_R32G32_SFLOAT, - .rangeFormat = IGeometryBase::EAABBFormat::F32 - }, - .src = {.offset=0,.size=buff->getSize(),.buffer = std::move(buff)} - }); + auto positionView = createPositionView<2>(vertexCount, aabb); + positions = reinterpret_cast(positionView.src.buffer->getPointer()); + retval->setPositionView(std::move(positionView)); } { constexpr auto AttrSize = sizeof(decltype(*normals)); auto buff = ICPUBuffer::create({AttrSize*vertexCount,IBuffer::EUF_NONE}); - normals = reinterpret_cast(buff->getPointer()); shapes::AABB<4,int8_t> aabb; - aabb.maxVx = hlsl::vector(0,0,127,0); + aabb.maxVx = snorm_positive_z; aabb.minVx = -aabb.maxVx; - retval->setNormalView({ - .composed = { - .encodedDataRange = {.s8=aabb}, - .stride = AttrSize, - .format = EF_R8G8B8A8_SNORM, - .rangeFormat = IGeometryBase::EAABBFormat::S8_NORM - }, - .src = {.offset=0,.size=buff->getSize(),.buffer=std::move(buff)} - }); + auto normalView = createSnormNormalView(vertexCount, aabb); + normals = reinterpret_cast(normalView.src.buffer->getPointer()); + retval->setNormalView(std::move(normalView)); } { - constexpr auto AttrSize = sizeof(decltype(*uvs)); - auto buff = ICPUBuffer::create({AttrSize*vertexCount,IBuffer::EUF_NONE}); - uvs = reinterpret_cast(buff->getPointer()); - shapes::AABB<4,uint16_t> aabb; - aabb.minVx = uint16_t4(0,0,0,0); - aabb.maxVx = uint16_t4(UnityUV,UnityUV,0,0); - retval->getAuxAttributeViews()->push_back({ - .composed = { - .encodedDataRange = {.u16=aabb}, - .stride = AttrSize, - .format = EF_R16G16_UNORM, - .rangeFormat = IGeometryBase::EAABBFormat::U16_NORM - }, - .src = {.offset=0,.size=buff->getSize(),.buffer=std::move(buff)} - }); + auto uvView = createUvView(vertexCount); + uvs = reinterpret_cast(uvView.src.buffer->getPointer()); + retval->getAuxAttributeViews()->push_back(std::move(uvView)); } } @@ -865,26 +828,27 @@ core::smart_refctd_ptr CGeometryCreator::createDisk(const f *(uvs++) = uint16_t2(t*UnityUV+0.5f,0); } } - std::fill_n(normals,vertexCount,hlsl::vector(0,0,127,0)); + std::fill_n(normals,vertexCount, snorm_positive_z); CPolygonGeometryManipulator::recomputeContentHashes(retval.get()); return retval; } -#if 0 /* Helpful Icosphere class implementation used to compute and create icopshere's vertices and indecies. Polyhedron subdividing icosahedron (20 tris) by N-times iteration - The icosphere with N=1 (default) has 80 triangles by subdividing a triangle - of icosahedron into 4 triangles. If N=0, it is identical to icosahedron. + The icosphere with N=1 (default) has 80 triangles by subdividing a triangle + of icosahedron into 4 triangles. If N=0, it is identical to icosahedron. */ class Icosphere { public: - Icosphere(float radius = 1.0f, int subdivision = 1, bool smooth = false) : radius(radius), subdivision(subdivision), smooth(smooth), interleavedStride(32) + using index_t = uint32_t; + + Icosphere(float radius = 1.0f, int subdivision = 1, bool smooth = false) : radius(radius), subdivision(subdivision), smooth(smooth) { if (smooth) buildVerticesSmooth(); @@ -895,30 +859,22 @@ class Icosphere ~Icosphere() {} unsigned int getVertexCount() const { return (unsigned int)vertices.size() / 3; } - unsigned int getNormalCount() const { return (unsigned int)normals.size() / 3; } - unsigned int getTexCoordCount() const { return (unsigned int)texCoords.size() / 2; } unsigned int getIndexCount() const { return (unsigned int)indices.size(); } unsigned int getLineIndexCount() const { return (unsigned int)lineIndices.size(); } unsigned int getTriangleCount() const { return getIndexCount() / 3; } - unsigned int getVertexSize() const { return (unsigned int)vertices.size() * sizeof(float); } // # of bytes + unsigned int getPositionSize() const { return (unsigned int)vertices.size() * sizeof(float); } // # of bytes unsigned int getNormalSize() const { return (unsigned int)normals.size() * sizeof(float); } unsigned int getTexCoordSize() const { return (unsigned int)texCoords.size() * sizeof(float); } - unsigned int getIndexSize() const { return (unsigned int)indices.size() * sizeof(unsigned int); } + unsigned int getIndexSize() const { return (unsigned int)indices.size() * sizeof(index_t); } unsigned int getLineIndexSize() const { return (unsigned int)lineIndices.size() * sizeof(unsigned int); } - const float* getVertices() const { return vertices.data(); } + const float* getPositions() const { return vertices.data(); } const float* getNormals() const { return normals.data(); } const float* getTexCoords() const { return texCoords.data(); } const unsigned int* getIndices() const { return indices.data(); } const unsigned int* getLineIndices() const { return lineIndices.data(); } - // for interleaved vertices: V/N/T - unsigned int getInterleavedVertexCount() const { return getVertexCount(); } // # of vertices - unsigned int getInterleavedVertexSize() const { return (unsigned int)interleavedVertices.size() * sizeof(float); } // # of bytes - int getInterleavedStride() const { return interleavedStride; } // should be 32 bytes - const float* getInterleavedVertices() const { return interleavedVertices.data(); } - protected: private: @@ -1015,14 +971,14 @@ class Icosphere texture coordinate is shared or no. If it is on the line segments, it is also non-shared point - 00 01 02 03 04 - /\ /\ /\ /\ /\ - / \/ \/ \/ \/ \ + 00 01 02 03 04 + /\ /\ /\ /\ /\ + / \/ \/ \/ \/ \ 05 06 07 08 09 \ - \ 10 11 12 13 14 + \ 10 11 12 13 14 \ /\ /\ /\ /\ / \/ \/ \/ \/ \/ - 15 16 17 18 19 + 15 16 17 18 19 */ static inline bool isSharedTexCoord(const float t[2]) @@ -1096,11 +1052,6 @@ class Icosphere vertices[i] *= scale; vertices[i + 1] *= scale; vertices[i + 2] *= scale; - - // for interleaved array - interleavedVertices[j] *= scale; - interleavedVertices[j + 1] *= scale; - interleavedVertices[j + 2] *= scale; } } @@ -1264,9 +1215,6 @@ class Icosphere // subdivide icosahedron subdivideVerticesFlat(); - - // generate interleaved vertex array as well - buildInterleavedVertices(); } /* @@ -1489,8 +1437,6 @@ class Icosphere // subdivide icosahedron subdivideVerticesSmooth(); - // generate interleaved vertex array as well - buildInterleavedVertices(); } /* divide a trinage into 4 sub triangles and repeat N times @@ -1588,7 +1534,7 @@ class Icosphere v1 / \ newV1 *---* newV3 - / \ / \ + / \ / \ v2---*---v3 newV2 */ @@ -1666,27 +1612,6 @@ class Icosphere stride must be 32 bytes */ - void buildInterleavedVertices() - { - core::vector().swap(interleavedVertices); - - std::size_t i, j; - std::size_t count = vertices.size(); - for (i = 0, j = 0; i < count; i += 3, j += 2) - { - interleavedVertices.push_back(vertices[i]); - interleavedVertices.push_back(vertices[i + 1]); - interleavedVertices.push_back(vertices[i + 2]); - - interleavedVertices.push_back(normals[i]); - interleavedVertices.push_back(normals[i + 1]); - interleavedVertices.push_back(normals[i + 2]); - - interleavedVertices.push_back(texCoords[j]); - interleavedVertices.push_back(texCoords[j + 1]); - } - } - void addVertex(float x, float y, float z) { vertices.push_back(x); @@ -1754,8 +1679,8 @@ class Icosphere add 7 sub edge lines per triangle to array using 6 indices (CCW) i1 / : (i1, i2) - i2---i6 : (i2, i6) - / \ / : (i2, i3), (i2, i4), (i6, i4) + i2---i6 : (i2, i6) + / \ / : (i2, i3), (i2, i4), (i6, i4) i3---i4---i5 : (i3, i4), (i4, i5) */ @@ -1830,46 +1755,73 @@ class Icosphere core::vector lineIndices; std::map, uint32_t> sharedIndices; // indices of shared vertices, key is tex coord (s,t) - // interleaved - core::vector interleavedVertices; - uint32_t interleavedStride; // # of bytes to hop to the next vertex (should be 32 bytes) }; core::smart_refctd_ptr CGeometryCreator::createIcoSphere(float radius, uint32_t subdivision, bool smooth) const { - Icosphere IcosphereData(radius, subdivision, smooth); - - return_type icosphereGeometry; - constexpr size_t vertexSize = sizeof(IcosphereVertex); + Icosphere icosphere(radius, subdivision, smooth); - icosphereGeometry.inputParams = - { 0b111u,0b1u, - { - {0u, EF_R32G32B32_SFLOAT, offsetof(IcosphereVertex,pos)}, - {0u, EF_R32G32B32_SFLOAT, offsetof(IcosphereVertex,normals)}, - {0u, EF_R32G32_SFLOAT, offsetof(IcosphereVertex,uv)} - }, - {vertexSize,SVertexInputBindingParams::EVIR_PER_VERTEX} - }; + auto retval = core::make_smart_refctd_ptr(); + retval->setIndexing(IPolygonGeometryBase::TriangleList()); + + using namespace hlsl; + + // Create indices + { + auto indexView = createIndexView(icosphere.getIndexCount(), icosphere.getVertexCount() - 1); + memcpy(indexView.src.buffer->getPointer(), icosphere.getIndices(), icosphere.getIndexSize()); + retval->setIndexView(std::move(indexView)); + } - auto vertexBuffer = asset::ICPUBuffer::create({ IcosphereData.getInterleavedVertexSize() }); - auto indexBuffer = asset::ICPUBuffer::create({ IcosphereData.getIndexSize() }); + { + { + shapes::AABB<4, float32_t> aabb; + aabb.maxVx = float32_t4(radius, radius, radius, 0.f); + aabb.minVx = -aabb.maxVx; + auto positionView = createPositionView(icosphere.getVertexCount(), aabb); + memcpy(positionView.src.buffer->getPointer(), icosphere.getPositions(), icosphere.getPositionSize()); + retval->setPositionView(std::move(positionView)); + } + { + using normal_t = float32_t3; + constexpr auto AttrSize = sizeof(normal_t); + auto buff = ICPUBuffer::create({icosphere.getNormalSize(), IBuffer::EUF_NONE}); + const auto normals = reinterpret_cast(buff->getPointer()); + memcpy(normals, icosphere.getNormals(), icosphere.getNormalSize()); + shapes::AABB<4,float32_t> aabb; + aabb.maxVx = float32_t4(1, 1, 1, 0.f); + aabb.minVx = -aabb.maxVx; + retval->setNormalView({ + .composed = { + .encodedDataRange = {.f32 = aabb}, + .stride = AttrSize, + .format = EF_R32G32B32_SFLOAT, + .rangeFormat = IGeometryBase::EAABBFormat::F32 + }, + .src = {.offset = 0,.size = buff->getSize(),.buffer = std::move(buff)}, + }); + } + { + using uv_element_t = uint16_t; + hlsl::vector* uvs; + auto uvView = createUvView(icosphere.getVertexCount()); + uvs = reinterpret_cast(uvView.src.buffer->getPointer()); - memcpy(vertexBuffer->getPointer(), IcosphereData.getInterleavedVertices(), vertexBuffer->getSize()); - memcpy(indexBuffer->getPointer(), IcosphereData.getIndices(), indexBuffer->getSize()); + for (auto uv_i = 0u; uv_i < icosphere.getVertexCount(); uv_i++) + { + const auto texCoords = icosphere.getTexCoords(); + encodeUv(uvs + uv_i, float32_t2(texCoords[2 * uv_i], texCoords[(2 * uv_i) + 1])); + } - vertexBuffer->addUsageFlags(asset::IBuffer::EUF_VERTEX_BUFFER_BIT); - icosphereGeometry.bindings[0] = { 0, std::move(vertexBuffer) }; - indexBuffer->addUsageFlags(asset::IBuffer::EUF_INDEX_BUFFER_BIT); - icosphereGeometry.indexBuffer = { 0, std::move(indexBuffer) }; - icosphereGeometry.indexCount = IcosphereData.getIndexCount(); - icosphereGeometry.indexType = EIT_32BIT; + retval->getAuxAttributeViews()->push_back(std::move(uvView)); + } + } - return icosphereGeometry; + CPolygonGeometryManipulator::recomputeContentHashes(retval.get()); + return retval; } -#endif } // end namespace nbl::asset diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt index 9e4f35fc57..3b9fe1c39a 100644 --- a/src/nbl/builtin/CMakeLists.txt +++ b/src/nbl/builtin/CMakeLists.txt @@ -290,6 +290,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/format/shared_exp.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/format.hlsl") #linear algebra LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/linalg/fast_affine.hlsl") +LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/linalg/transform.hlsl") # TODO: rename `equations` to `polynomials` probably LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/functions.hlsl") LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/geometry.hlsl") diff --git a/src/nbl/video/utilities/CAssetConverter.cpp b/src/nbl/video/utilities/CAssetConverter.cpp index d004660e42..fed2d68cf0 100644 --- a/src/nbl/video/utilities/CAssetConverter.cpp +++ b/src/nbl/video/utilities/CAssetConverter.cpp @@ -3024,7 +3024,7 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult uint16_t alignment = hlsl::max(0x1u<(alignof(float),alignment); } uint16_t indexSize = 0; @@ -5061,7 +5061,7 @@ ISemaphore::future_t CAssetConverter::convert_impl(SReserveResul uint16_t alignment = hlsl::max(0x1u<(alignof(float),alignment); } uint16_t indexSize = 0u; @@ -5265,7 +5265,7 @@ ISemaphore::future_t CAssetConverter::convert_impl(SReserveResul } if (geom.hasTransform()) { - offset = core::alignUp(offset,alignof(float)); + offset = core::alignUp(offset, IAccelerationStructure::TransformDataMinAlignment); outGeom.transform = {.offset=offset,.buffer=smart_refctd_ptr(scratchBuffer)}; memcpyCallback.data = &geom.transform; if (!streamDataToScratch(offset,sizeof(geom.transform),memcpyCallback))