diff --git a/include/nbl/asset/ECommonEnums.h b/include/nbl/asset/ECommonEnums.h
index c07a0ced6a..f830b270d3 100644
--- a/include/nbl/asset/ECommonEnums.h
+++ b/include/nbl/asset/ECommonEnums.h
@@ -185,292 +185,13 @@ struct SMemoryBarrier
     }
 };
 
-inline core::bitflag<PIPELINE_STAGE_FLAGS> allPreviousStages(core::bitflag<PIPELINE_STAGE_FLAGS> stages)
-{
-    struct PerStagePreviousStages
-    {
-        public:
-            constexpr PerStagePreviousStages()
-            {
-                // set all stage to have itself as their previous stages
-                for (auto i = 0; i < std::numeric_limits<PIPELINE_STAGE_FLAGS>::digits; i++)
-                  data[i] = static_cast<PIPELINE_STAGE_FLAGS>(i);
-
-                add(PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT);
-
-                add(PIPELINE_STAGE_FLAGS::RAY_TRACING_SHADER_BIT, PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT);
-
-                // graphics primitive pipeline
-                PIPELINE_STAGE_FLAGS primitivePrevStage = PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT;
-                for (auto pipelineStage : {PIPELINE_STAGE_FLAGS::INDEX_INPUT_BIT, PIPELINE_STAGE_FLAGS::VERTEX_ATTRIBUTE_INPUT_BIT, PIPELINE_STAGE_FLAGS::VERTEX_SHADER_BIT, PIPELINE_STAGE_FLAGS::TESSELLATION_CONTROL_SHADER_BIT, PIPELINE_STAGE_FLAGS::TESSELLATION_EVALUATION_SHADER_BIT, PIPELINE_STAGE_FLAGS::GEOMETRY_SHADER_BIT, PIPELINE_STAGE_FLAGS::SHADING_RATE_ATTACHMENT_BIT, PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT, PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, PIPELINE_STAGE_FLAGS::LATE_FRAGMENT_TESTS_BIT, PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT})
-                {
-                    if (pipelineStage == PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT)
-                      primitivePrevStage |= PIPELINE_STAGE_FLAGS::FRAGMENT_DENSITY_PROCESS_BIT;
-                    add(pipelineStage, primitivePrevStage);
-                    primitivePrevStage |= pipelineStage;
-                }
-
-
-            }
-            constexpr const auto& operator[](const size_t ix) const {return data[ix];}
-
-        private:
-            constexpr static uint8_t findLSB(size_t val)
-            {
-                for (size_t ix=0ull; ix<sizeof(size_t)*8; ix++)
-                if ((0x1ull<<ix)&val)
-                    return ix;
-                return ~0u;
-            }
-            constexpr void add(PIPELINE_STAGE_FLAGS stageFlag, PIPELINE_STAGE_FLAGS previousStageFlags)
-            {
-                const auto bitIx = findLSB(static_cast<size_t>(stageFlag));
-                data[bitIx] |= previousStageFlags;
-            }
-
-            PIPELINE_STAGE_FLAGS data[std::numeric_limits<std::underlying_type_t<PIPELINE_STAGE_FLAGS>>::digits] = {};
-    };
-
-    constexpr PerStagePreviousStages bitToAccess = {};
-
-    core::bitflag<PIPELINE_STAGE_FLAGS> retval = PIPELINE_STAGE_FLAGS::NONE;
-    while (bool(stages.value))
-    {
-        const auto bitIx = hlsl::findLSB(stages);
-        retval |= bitToAccess[bitIx];
-        stages ^= static_cast<PIPELINE_STAGE_FLAGS>(0x1u<<bitIx);
-    }
-
-    return retval;
-}
-
-inline core::bitflag<PIPELINE_STAGE_FLAGS> allLaterStages(core::bitflag<PIPELINE_STAGE_FLAGS> stages)
-{
-    struct PerStageLaterStages
-    {
-        public:
-            constexpr PerStageLaterStages()
-            {
-                // set all stage to have itself as their next stages
-                for (auto i = 0; i < std::numeric_limits<PIPELINE_STAGE_FLAGS>::digits; i++)
-                  data[i] = static_cast<PIPELINE_STAGE_FLAGS>(i);
-
-                add(PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT, PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT);
-                add(PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT, PIPELINE_STAGE_FLAGS::RAY_TRACING_SHADER_BIT);
-
-                // graphics primitive pipeline
-                PIPELINE_STAGE_FLAGS laterStage = PIPELINE_STAGE_FLAGS::NONE;
-                const auto graphicsPrimitivePipelineOrders = std::array{ PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT, PIPELINE_STAGE_FLAGS::INDEX_INPUT_BIT, PIPELINE_STAGE_FLAGS::VERTEX_ATTRIBUTE_INPUT_BIT, PIPELINE_STAGE_FLAGS::VERTEX_SHADER_BIT, PIPELINE_STAGE_FLAGS::TESSELLATION_CONTROL_SHADER_BIT, PIPELINE_STAGE_FLAGS::TESSELLATION_EVALUATION_SHADER_BIT, PIPELINE_STAGE_FLAGS::GEOMETRY_SHADER_BIT, PIPELINE_STAGE_FLAGS::SHADING_RATE_ATTACHMENT_BIT, PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT, PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, PIPELINE_STAGE_FLAGS::LATE_FRAGMENT_TESTS_BIT, PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT };
-                for (auto iter = graphicsPrimitivePipelineOrders.rbegin(); iter < graphicsPrimitivePipelineOrders.rend(); iter++)
-                {
-                    const auto pipelineStage = *iter;
-                    add(pipelineStage, laterStage);
-                    laterStage |= pipelineStage;
-                }
-
-                add(PIPELINE_STAGE_FLAGS::FRAGMENT_DENSITY_PROCESS_BIT, PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT);
-            }
-            constexpr const auto& operator[](const size_t ix) const {return data[ix];}
-
-        private:
-            constexpr static uint8_t findLSB(size_t val)
-            {
-                for (size_t ix=0ull; ix<sizeof(size_t)*8; ix++)
-                if ((0x1ull<<ix)&val)
-                    return ix;
-                return ~0u;
-            }
-            constexpr void add(PIPELINE_STAGE_FLAGS stageFlag, PIPELINE_STAGE_FLAGS laterStageFlags)
-            {
-                const auto bitIx = findLSB(static_cast<size_t>(stageFlag));
-                data[bitIx] |= laterStageFlags;
-            }
-
-            PIPELINE_STAGE_FLAGS data[std::numeric_limits<std::underlying_type_t<PIPELINE_STAGE_FLAGS>>::digits] = {};
-    };
-
-    constexpr PerStageLaterStages bitToAccess = {};
-
-    core::bitflag<PIPELINE_STAGE_FLAGS> retval = PIPELINE_STAGE_FLAGS::NONE;
-    while (bool(stages.value))
-    {
-        const auto bitIx = hlsl::findLSB(stages);
-        retval |= bitToAccess[bitIx];
-        stages ^= static_cast<PIPELINE_STAGE_FLAGS>(0x1u<<bitIx);
-    }
-
-    return retval;
-}
-
-inline core::bitflag<ACCESS_FLAGS> allAccessesFromStages(core::bitflag<PIPELINE_STAGE_FLAGS> stages)
-{
-    struct PerStageAccesses
-    {
-        public:
-            constexpr PerStageAccesses()
-            {
-                init(PIPELINE_STAGE_FLAGS::HOST_BIT,ACCESS_FLAGS::HOST_READ_BIT|ACCESS_FLAGS::HOST_WRITE_BIT);
-
-                constexpr auto TransferRW = ACCESS_FLAGS::TRANSFER_READ_BIT|ACCESS_FLAGS::TRANSFER_WRITE_BIT;
-                init(PIPELINE_STAGE_FLAGS::COPY_BIT,TransferRW);
-                init(PIPELINE_STAGE_FLAGS::CLEAR_BIT,ACCESS_FLAGS::TRANSFER_WRITE_BIT);
-
-                constexpr auto MicromapRead = ACCESS_FLAGS::SHADER_READ_BITS;//|ACCESS_FLAGS::MICROMAP_READ_BIT;
-//                init(PIPELINE_STAGE_FLAGS::MICROMAP_BUILD_BIT,MicromapRead|ACCESS_FLAGS::MICROMAP_WRITE_BIT); // can micromaps be built indirectly?
-                
-                constexpr auto AccelerationStructureRW = ACCESS_FLAGS::ACCELERATION_STRUCTURE_READ_BIT|ACCESS_FLAGS::ACCELERATION_STRUCTURE_WRITE_BIT;
-                init(PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_COPY_BIT,TransferRW|AccelerationStructureRW);
-                init(PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_BUILD_BIT,ACCESS_FLAGS::INDIRECT_COMMAND_READ_BIT|MicromapRead|AccelerationStructureRW);
-
-                init(PIPELINE_STAGE_FLAGS::COMMAND_PREPROCESS_BIT,ACCESS_FLAGS::COMMAND_PREPROCESS_READ_BIT|ACCESS_FLAGS::COMMAND_PREPROCESS_WRITE_BIT);
-                init(PIPELINE_STAGE_FLAGS::CONDITIONAL_RENDERING_BIT,ACCESS_FLAGS::CONDITIONAL_RENDERING_READ_BIT);
-                init(PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT,ACCESS_FLAGS::INDIRECT_COMMAND_READ_BIT);
-
-                constexpr auto ShaderRW = ACCESS_FLAGS::SHADER_READ_BITS|ACCESS_FLAGS::SHADER_WRITE_BITS;
-                constexpr auto AllShaderStagesRW = ShaderRW^(ACCESS_FLAGS::INPUT_ATTACHMENT_READ_BIT|ACCESS_FLAGS::SHADER_BINDING_TABLE_READ_BIT);
-                init(PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT,AllShaderStagesRW);
-                init(PIPELINE_STAGE_FLAGS::INDEX_INPUT_BIT,ACCESS_FLAGS::INDEX_READ_BIT);
-                init(PIPELINE_STAGE_FLAGS::VERTEX_ATTRIBUTE_INPUT_BIT,ACCESS_FLAGS::VERTEX_ATTRIBUTE_READ_BIT);
-                init(PIPELINE_STAGE_FLAGS::VERTEX_SHADER_BIT,AllShaderStagesRW);
-                init(PIPELINE_STAGE_FLAGS::TESSELLATION_CONTROL_SHADER_BIT,AllShaderStagesRW);
-                init(PIPELINE_STAGE_FLAGS::TESSELLATION_EVALUATION_SHADER_BIT,AllShaderStagesRW);
-                init(PIPELINE_STAGE_FLAGS::GEOMETRY_SHADER_BIT,AllShaderStagesRW);
-//                init(PIPELINE_STAGE_FLAGS::TASK_SHADER_BIT,AllShaderStagesRW);
-//                init(PIPELINE_STAGE_FLAGS::MESH_SHADER_BIT,AllShaderStagesRW);
-                init(PIPELINE_STAGE_FLAGS::FRAGMENT_DENSITY_PROCESS_BIT,ACCESS_FLAGS::FRAGMENT_DENSITY_MAP_READ_BIT);
-                init(PIPELINE_STAGE_FLAGS::SHADING_RATE_ATTACHMENT_BIT,ACCESS_FLAGS::SHADING_RATE_ATTACHMENT_READ_BIT);
-                constexpr auto DepthStencilRW = ACCESS_FLAGS::DEPTH_STENCIL_ATTACHMENT_READ_BIT|ACCESS_FLAGS::DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
-                init(PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT,DepthStencilRW);
-                init(PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT,AllShaderStagesRW|ACCESS_FLAGS::INPUT_ATTACHMENT_READ_BIT);
-                init(PIPELINE_STAGE_FLAGS::LATE_FRAGMENT_TESTS_BIT,DepthStencilRW);
-                init(PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT,ACCESS_FLAGS::COLOR_ATTACHMENT_READ_BIT|ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT);
-
-                init(PIPELINE_STAGE_FLAGS::RAY_TRACING_SHADER_BIT,AllShaderStagesRW|ACCESS_FLAGS::SHADER_BINDING_TABLE_READ_BIT);
-
-                init(PIPELINE_STAGE_FLAGS::RESOLVE_BIT,TransferRW);
-                init(PIPELINE_STAGE_FLAGS::BLIT_BIT,TransferRW);
-
-//                init(PIPELINE_STAGE_FLAGS::VIDEO_DECODE_BIT,ACCESS_FLAGS::VIDEO_DECODE_READ_BIT|ACCESS_FLAGS::VIDEO_DECODE_WRITE_BIT);
-//                init(PIPELINE_STAGE_FLAGS::VIDEO_ENCODE_BIT,ACCESS_FLAGS::VIDEO_ENCODE_READ_BIT|ACCESS_FLAGS::VIDEO_ENCODE_WRITE_BIT);
-//                init(PIPELINE_STAGE_FLAGS::OPTICAL_FLOW_BIT,ACCESS_FLAGS::OPTICAL_FLOW_READ_BIT|ACCESS_FLAGS::OPTICAL_FLOW_WRITE_BIT);
-            }
-            constexpr const auto& operator[](const size_t ix) const {return data[ix];}
+core::bitflag<PIPELINE_STAGE_FLAGS> allPreviousStages(core::bitflag<PIPELINE_STAGE_FLAGS> stages);
 
-        private:
-            constexpr static uint8_t findLSB(size_t val)
-            {
-                for (size_t ix=0ull; ix<sizeof(size_t)*8; ix++)
-                if ((0x1ull<<ix)&val)
-                    return ix;
-                return ~0u;
-            }
-            constexpr void init(PIPELINE_STAGE_FLAGS stageFlag, ACCESS_FLAGS accessFlags)
-            {
-                const auto bitIx = findLSB(static_cast<size_t>(stageFlag));
-                data[bitIx] = accessFlags;
-            }
+core::bitflag<PIPELINE_STAGE_FLAGS> allLaterStages(core::bitflag<PIPELINE_STAGE_FLAGS> stages);
 
-            ACCESS_FLAGS data[32] = {};
-    };
-    constexpr PerStageAccesses bitToAccess = {};
+core::bitflag<ACCESS_FLAGS> allAccessesFromStages(core::bitflag<PIPELINE_STAGE_FLAGS> stages);
 
-    // TODO: add logically later or previous stages to make sure all other accesses remain valid
-    // or ideally expand the stages before calling `allAccessesFromStages` (TODO: add a `allLaterStages` and `allPreviouStages` basically)
-
-    core::bitflag<ACCESS_FLAGS> retval = ACCESS_FLAGS::NONE;
-    while (bool(stages.value))
-    {
-        const auto bitIx = hlsl::findLSB(stages);
-        retval |= bitToAccess[bitIx];
-        stages ^= static_cast<PIPELINE_STAGE_FLAGS>(0x1u<<bitIx);
-    }
-
-    return retval;
-}
-
-inline core::bitflag<PIPELINE_STAGE_FLAGS> allStagesFromAccesses(core::bitflag<ACCESS_FLAGS> accesses)
-{
-    struct PerAccessStages
-    {
-        public:
-            constexpr PerAccessStages()
-            {
-                init(ACCESS_FLAGS::HOST_READ_BIT,PIPELINE_STAGE_FLAGS::HOST_BIT);
-                init(ACCESS_FLAGS::HOST_WRITE_BIT,PIPELINE_STAGE_FLAGS::HOST_BIT);
-
-                init(ACCESS_FLAGS::TRANSFER_READ_BIT,PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS^PIPELINE_STAGE_FLAGS::CLEAR_BIT);
-                init(ACCESS_FLAGS::TRANSFER_WRITE_BIT,PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS);
-
-                constexpr auto MicromapAccelerationStructureBuilds = PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_BUILD_BIT;//|PIPELINE_STAGE_FLAGS::MICROMAP_BUILD_BIT;
-//                init(ACCESS_FLAGS::MICROMAP_READ_BIT,MicromapAccelerationStructureBuilds);
-//                init(ACCESS_FLAGS::MICROMAP_WRITE_BIT,PIPELINE_STAGE_FLAGS::MICROMAP_BUILD_BIT);
-                
-                constexpr auto AllShaders = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT|PIPELINE_STAGE_FLAGS::PRE_RASTERIZATION_SHADERS_BITS|PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT|PIPELINE_STAGE_FLAGS::RAY_TRACING_SHADER_BIT;
-                constexpr auto AccelerationStructureOperations = PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_COPY_BIT|PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_BUILD_BIT;
-                init(ACCESS_FLAGS::ACCELERATION_STRUCTURE_READ_BIT,AccelerationStructureOperations|AllShaders);
-                init(ACCESS_FLAGS::ACCELERATION_STRUCTURE_WRITE_BIT,AccelerationStructureOperations);
-
-                init(ACCESS_FLAGS::COMMAND_PREPROCESS_READ_BIT,PIPELINE_STAGE_FLAGS::COMMAND_PREPROCESS_BIT);
-                init(ACCESS_FLAGS::COMMAND_PREPROCESS_WRITE_BIT,PIPELINE_STAGE_FLAGS::COMMAND_PREPROCESS_BIT);
-                init(ACCESS_FLAGS::CONDITIONAL_RENDERING_READ_BIT,PIPELINE_STAGE_FLAGS::CONDITIONAL_RENDERING_BIT);
-                init(ACCESS_FLAGS::INDIRECT_COMMAND_READ_BIT,PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_BUILD_BIT|PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT);
-
-                init(ACCESS_FLAGS::UNIFORM_READ_BIT,AllShaders);
-                init(ACCESS_FLAGS::SAMPLED_READ_BIT,AllShaders);//|PIPELINE_STAGE_FLAGS::MICROMAP_BUILD_BIT);
-                init(ACCESS_FLAGS::STORAGE_READ_BIT,AllShaders|MicromapAccelerationStructureBuilds);
-                init(ACCESS_FLAGS::STORAGE_WRITE_BIT,AllShaders);
-
-                init(ACCESS_FLAGS::INDEX_READ_BIT,PIPELINE_STAGE_FLAGS::INDEX_INPUT_BIT);
-                init(ACCESS_FLAGS::VERTEX_ATTRIBUTE_READ_BIT,PIPELINE_STAGE_FLAGS::VERTEX_ATTRIBUTE_INPUT_BIT);
-
-                init(ACCESS_FLAGS::FRAGMENT_DENSITY_MAP_READ_BIT,PIPELINE_STAGE_FLAGS::FRAGMENT_DENSITY_PROCESS_BIT);
-                init(ACCESS_FLAGS::SHADING_RATE_ATTACHMENT_READ_BIT,PIPELINE_STAGE_FLAGS::SHADING_RATE_ATTACHMENT_BIT);
-                constexpr auto FragmentTests = PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT|PIPELINE_STAGE_FLAGS::LATE_FRAGMENT_TESTS_BIT;
-                init(ACCESS_FLAGS::DEPTH_STENCIL_ATTACHMENT_READ_BIT,FragmentTests);
-                init(ACCESS_FLAGS::DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,FragmentTests);
-                init(ACCESS_FLAGS::INPUT_ATTACHMENT_READ_BIT,PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT);
-                init(ACCESS_FLAGS::COLOR_ATTACHMENT_READ_BIT,PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT);
-                init(ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT,PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT);
-
-                init(ACCESS_FLAGS::SHADER_BINDING_TABLE_READ_BIT,PIPELINE_STAGE_FLAGS::RAY_TRACING_SHADER_BIT);
-
-//                init(ACCESS_FLAGS::VIDEO_DECODE_READ_BIT,PIPELINE_STAGE_FLAGS::VIDEO_DECODE_BIT);
-//                init(ACCESS_FLAGS::VIDEO_DECODE_WRITE_BIT,PIPELINE_STAGE_FLAGS::VIDEO_DECODE_BIT);
-//                init(ACCESS_FLAGS::VIDEO_ENCODE_READ_BIT,PIPELINE_STAGE_FLAGS::VIDEO_ENCODE_BIT);
-//                init(ACCESS_FLAGS::VIDEO_ENCODE_WRITE_BIT,PIPELINE_STAGE_FLAGS::VIDEO_ENCODE_BIT);
-//                init(ACCESS_FLAGS::OPTICAL_FLOW_READ_BIT,PIPELINE_STAGE_FLAGS::OPTICAL_FLOW_BIT);
-//                init(ACCESS_FLAGS::OPTICAL_FLOW_WRITE_BIT,PIPELINE_STAGE_FLAGS::OPTICAL_FLOW_BIT);
-            }
-            constexpr const auto& operator[](const size_t ix) const {return data[ix];}
-
-        private:
-            constexpr static uint8_t findLSB(size_t val)
-            {
-                for (size_t ix=0ull; ix<sizeof(size_t)*8; ix++)
-                if ((0x1ull<<ix)&val)
-                    return ix;
-                return ~0u;
-            }
-            constexpr void init(ACCESS_FLAGS accessFlags, PIPELINE_STAGE_FLAGS stageFlags)
-            {
-                const auto bitIx = findLSB(static_cast<size_t>(accessFlags));
-                data[bitIx] = stageFlags;
-            }
-
-            PIPELINE_STAGE_FLAGS data[32] = {};
-    };
-    constexpr PerAccessStages bitToStage = {};
-
-    core::bitflag<PIPELINE_STAGE_FLAGS> retval = PIPELINE_STAGE_FLAGS::NONE;
-    while (bool(accesses.value))
-    {
-        const auto bitIx = hlsl::findLSB(accesses);
-        retval |= bitToStage[bitIx];
-        accesses ^= static_cast<ACCESS_FLAGS>(0x1u<<bitIx);
-    }
-
-    return retval;
-}
+core::bitflag<PIPELINE_STAGE_FLAGS> allStagesFromAccesses(core::bitflag<ACCESS_FLAGS> accesses);
 
 }
 
diff --git a/include/nbl/asset/IAccelerationStructure.h b/include/nbl/asset/IAccelerationStructure.h
index 829d10bcd8..755c81e477 100644
--- a/include/nbl/asset/IAccelerationStructure.h
+++ b/include/nbl/asset/IAccelerationStructure.h
@@ -23,6 +23,8 @@ namespace nbl::asset
 class IAccelerationStructure : public virtual core::IReferenceCounted
 {
 	public:
+		static constexpr inline size_t TransformDataMinAlignment = 16;
+
 		// build flags, we don't expose flags that don't make sense for certain levels
 		enum class BUILD_FLAGS : uint8_t
 		{
diff --git a/include/nbl/asset/IBuffer.h b/include/nbl/asset/IBuffer.h
index 8c3b8f95ef..6f8c1bb35b 100644
--- a/include/nbl/asset/IBuffer.h
+++ b/include/nbl/asset/IBuffer.h
@@ -100,7 +100,7 @@ struct SBufferRange
 	inline operator SBufferRange<const BufferType>&() {return *reinterpret_cast<SBufferRange<const BufferType>*>(this);}
 	inline operator const SBufferRange<const BufferType>&() const {return *reinterpret_cast<const SBufferRange<const BufferType>*>(this);}
 
-	template<typename BT> requires std::is_same_v<std::remove_const_t<BT>,BufferType>
+	template<typename BT> requires (std::is_const_v<BT> && std::is_base_of_v<IBuffer,std::remove_const_t<BT>>)
 	inline operator SBufferBinding<BT>() const { return {.offset=offset,.buffer=buffer}; }
 
 	explicit inline operator bool() const {return isValid();}
diff --git a/include/nbl/asset/IPolygonGeometry.h b/include/nbl/asset/IPolygonGeometry.h
index a8ab97aa4d..f21e7f7233 100644
--- a/include/nbl/asset/IPolygonGeometry.h
+++ b/include/nbl/asset/IPolygonGeometry.h
@@ -203,6 +203,26 @@ class IPolygonGeometry : public IIndexableGeometry<BufferType>, public IPolygonG
         // For User defined semantics
         inline const core::vector<SDataView>& getAuxAttributeViews() const {return m_auxAttributeViews;}
 
+        inline E_INDEX_TYPE getIndexType() const
+        {
+            auto indexType = EIT_UNKNOWN;
+            // disallowed index format
+            if (base_t::m_indexView)
+            {
+                switch (base_t::m_indexView.composed.format)
+                {
+                    case EF_R16_UINT:
+                        indexType = EIT_16BIT;
+                        break;
+                    case EF_R32_UINT: [[fallthrough]];
+                        indexType = EIT_32BIT;
+                        break;
+                    default:
+                        break;
+                }
+            }
+            return indexType;
+        }
 
         // Does not set the `transform` or `geometryFlags` fields, because it doesn't care about it.
         // Also won't set second set of vertex data, opacity mipmaps, etc.
@@ -212,30 +232,12 @@ class IPolygonGeometry : public IIndexableGeometry<BufferType>, public IPolygonG
             // must be a triangle list, but don't want to compare pointers
             if (m_indexing && m_indexing->knownTopology()==EPT_TRIANGLE_LIST)// && m_indexing->degree() == TriangleList()->degree() && m_indexing->rate() == TriangleList->rate())
             {
-                auto indexType = EIT_UNKNOWN;
-                // disallowed index format
-                if (base_t::m_indexView)
-                {
-                    switch (base_t::m_indexView.composed.format)
-                    {
-                        case EF_R16_UINT:
-                            indexType = EIT_16BIT;
-                            break;
-                        case EF_R32_UINT: [[fallthrough]];
-                            indexType = EIT_32BIT;
-                            break;
-                        default:
-                            break;
-                    }
-                    if (indexType==EIT_UNKNOWN)
-                        return retval;
-                }
                 retval.vertexData[0] = base_t::m_positionView.src;
                 retval.indexData = base_t::m_indexView.src;
                 retval.maxVertex = base_t::m_positionView.getElementCount() - 1;
                 retval.vertexStride = base_t::m_positionView.composed.getStride();
                 retval.vertexFormat = base_t::m_positionView.composed.format;
-                retval.indexType = indexType;
+                retval.indexType = getIndexType();
             }
             return retval;
         }
diff --git a/include/nbl/asset/utils/CDirQuantCacheBase.h b/include/nbl/asset/utils/CDirQuantCacheBase.h
index c4e7174013..c02499b849 100644
--- a/include/nbl/asset/utils/CDirQuantCacheBase.h
+++ b/include/nbl/asset/utils/CDirQuantCacheBase.h
@@ -43,13 +43,13 @@ class CDirQuantCacheBase
 				
 				Vector8u3() : x(0u),y(0u),z(0u) {}
 				Vector8u3(const Vector8u3&) = default;
-				explicit Vector8u3(const core::vectorSIMDu32& val)
+				explicit Vector8u3(const hlsl::uint32_t4& val)
 				{
 					operator=(val);
 				}
 
 				Vector8u3& operator=(const Vector8u3&) = default;
-				Vector8u3& operator=(const core::vectorSIMDu32& val)
+				Vector8u3& operator=(const hlsl::uint32_t4& val)
 				{
 					x = val.x;
 					y = val.y;
@@ -57,11 +57,12 @@ class CDirQuantCacheBase
 					return *this;
 				}
 
-				inline core::vectorSIMDu32 getValue() const
+				hlsl::uint32_t4 getValue() const
 				{
-					return core::vectorSIMDu32(x,y,z);
+					return { x, y, z, 0 };
 				}
 
+
 			private:
 				uint8_t x;
 				uint8_t y;
@@ -74,13 +75,13 @@ class CDirQuantCacheBase
 				
 				Vector8u4() : x(0u),y(0u),z(0u),w(0u) {}
 				Vector8u4(const Vector8u4&) = default;
-				explicit Vector8u4(const core::vectorSIMDu32& val)
+				explicit Vector8u4(const hlsl::uint32_t4& val)
 				{
 					operator=(val);
 				}
 
 				Vector8u4& operator=(const Vector8u4&) = default;
-				Vector8u4& operator=(const core::vectorSIMDu32& val)
+				Vector8u4& operator=(const hlsl::uint32_t4& val)
 				{
 					x = val.x;
 					y = val.y;
@@ -89,9 +90,9 @@ class CDirQuantCacheBase
 					return *this;
 				}
 
-				inline core::vectorSIMDu32 getValue() const
+				hlsl::uint32_t4 getValue() const
 				{
-					return core::vectorSIMDu32(x,y,z,w);
+					return { x, y, z, w };
 				}
 				
 			private:
@@ -108,16 +109,16 @@ class CDirQuantCacheBase
 
 				Vector1010102() : storage(0u) {}
 				Vector1010102(const Vector1010102&) = default;
-				explicit Vector1010102(const core::vectorSIMDu32& val)
+				explicit Vector1010102(const hlsl::uint32_t4& val)
 				{
 					operator=(val);
 				}
 
 				Vector1010102& operator=(const Vector1010102&) = default;
-				Vector1010102& operator=(const core::vectorSIMDu32& val)
+				Vector1010102& operator=(const hlsl::uint32_t4& val)
 				{
-					constexpr auto storageBits = quantizationBits+1u;
-					storage = val.x|(val.y<<storageBits)|(val.z<<(storageBits*2u));
+					constexpr auto storageBits = quantizationBits + 1u;
+					storage = val.x | (val.y << storageBits) | (val.z << (storageBits * 2u));
 					return *this;
 				}
 
@@ -130,13 +131,13 @@ class CDirQuantCacheBase
 					return storage==other.storage;
 				}
 
-				inline core::vectorSIMDu32 getValue() const
+				hlsl::uint32_t4 getValue() const
 				{
-					constexpr auto storageBits = quantizationBits+1u;
-					const core::vectorSIMDu32 mask((0x1u<<storageBits)-1u);
-					return core::vectorSIMDu32(storage,storage>>storageBits,storage>>(storageBits*2u))&mask;
+					constexpr auto storageBits = quantizationBits + 1u;
+					const auto mask = (0x1u << storageBits) - 1u;
+					return { storage & mask, (storage >> storageBits) & mask, (storage >> (storageBits * 2)) & mask, 0};
 				}
-				
+
 			private:
 				uint32_t storage;
 		};
@@ -149,13 +150,13 @@ class CDirQuantCacheBase
 				
 				Vector16u3() : x(0u),y(0u),z(0u) {}
 				Vector16u3(const Vector16u3&) = default;
-				explicit Vector16u3(const core::vectorSIMDu32& val)
+				explicit Vector16u3(const hlsl::uint32_t4& val)
 				{
 					operator=(val);
 				}
 
 				Vector16u3& operator=(const Vector16u3&) = default;
-				Vector16u3& operator=(const core::vectorSIMDu32& val)
+				Vector16u3& operator=(const hlsl::uint32_t4& val)
 				{
 					x = val.x;
 					y = val.y;
@@ -163,11 +164,11 @@ class CDirQuantCacheBase
 					return *this;
 				}
 
-				inline core::vectorSIMDu32 getValue() const
+				hlsl::uint32_t4 getValue() const
 				{
-					return core::vectorSIMDu32(x,y,z);
+					return { x, y, z, 0 };
 				}
-				
+
 			private:
 				uint16_t x;
 				uint16_t y;
@@ -180,13 +181,13 @@ class CDirQuantCacheBase
 
 				Vector16u4() : x(0u),y(0u),z(0u),w(0u) {}
 				Vector16u4(const Vector16u4&) = default;
-				explicit Vector16u4(const core::vectorSIMDu32& val)
+				explicit Vector16u4(const hlsl::uint32_t4& val)
 				{
 					operator=(val);
 				}
 
 				Vector16u4& operator=(const Vector16u4&) = default;
-				Vector16u4& operator=(const core::vectorSIMDu32& val)
+				Vector16u4& operator=(const hlsl::uint32_t4& val)
 				{
 					x = val.x;
 					y = val.y;
@@ -195,11 +196,11 @@ class CDirQuantCacheBase
 					return *this;
 				}
 
-				inline core::vectorSIMDu32 getValue() const
+				hlsl::float32_t4 getValue() const
 				{
-					return core::vectorSIMDu32(x,y,z,w);
+					return { x, y, z, w };
 				}
-				
+
 			private:
 				uint16_t x;
 				uint16_t y;
@@ -377,11 +378,30 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl::
 		std::tuple<cache_type_t<Formats>...> cache;
 		
 		template<uint32_t dimensions, E_FORMAT CacheFormat>
-		value_type_t<CacheFormat> quantize(const core::vectorSIMDf& value)
+		value_type_t<CacheFormat> quantize(const hlsl::vector<hlsl::float32_t, dimensions>& value)
 		{
-			const auto negativeMask = value < core::vectorSIMDf(0.0f);
+			using float32_tN = hlsl::vector<hlsl::float32_t, dimensions>;
+
+			auto to_vec_t4 = []<typename T>(hlsl::vector<T, dimensions> src, T padValue) -> hlsl::vector<T, 4>
+			{
+				if constexpr(dimensions == 1)
+				{
+					return {src.x, padValue, padValue, padValue};
+				} else if constexpr (dimensions == 2)
+				{
+					return {src.x, src.y, padValue, padValue};
+				} else if constexpr (dimensions == 3)
+				{
+					return {src.x, src.y, src.z, padValue};
+				} else if constexpr (dimensions == 4)
+				{
+					return {src.x, src.y, src.z, src.w};
+				}
+			};
+
+			const auto negativeMask = to_vec_t4(lessThan(value, float32_tN(0.0f)), false);
 
-			const core::vectorSIMDf absValue = abs(value);
+			const float32_tN absValue = abs(value);
 			const auto key = Key(absValue);
 
 			constexpr auto quantizationBits = quantization_bits_v<CacheFormat>;
@@ -393,32 +413,50 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl::
 					quantized = found->second;
 				else
 				{
-					const core::vectorSIMDf fit = findBestFit<dimensions,quantizationBits>(absValue);
+					const auto fit = findBestFit<dimensions,quantizationBits>(absValue);
+
+					const auto abs_fit = to_vec_t4(abs(fit), 0.f);
+					quantized = hlsl::uint32_t4(abs_fit.x, abs_fit.y, abs_fit.z, abs_fit.w);
 
-					quantized = core::vectorSIMDu32(core::abs(fit));
 					insertIntoCache<CacheFormat>(key,quantized);
 				}
 			}
 
-			const core::vectorSIMDu32 xorflag((0x1u<<(quantizationBits+1u))-1u);
-			auto restoredAsVec = quantized.getValue()^core::mix(core::vectorSIMDu32(0u),xorflag,negativeMask);
-			restoredAsVec += core::mix(core::vectorSIMDu32(0u),core::vectorSIMDu32(1u),negativeMask);
-			return value_type_t<CacheFormat>(restoredAsVec&xorflag);
+			auto select = [](hlsl::uint32_t4 val1, hlsl::uint32_t4 val2, hlsl::bool4 mask)
+			{
+					hlsl::uint32_t4 retval;
+					retval.x = mask.x ? val2.x : val1.x;
+					retval.y = mask.y ? val2.y : val1.y;
+					retval.z = mask.z ? val2.z : val1.z;
+					retval.w = mask.w ? val2.w : val1.w;
+					return retval;
+			};
+;
+			// create all one bits
+			const hlsl::uint32_t4 xorflag((0x1u << (quantizationBits + 1u)) - 1u);
+
+			// for positive number xoring with 0 keep its value
+			// for negative number we xor with all one which will flip the bits, then we add one later. Flipping the bits then adding one will turn positive number into negative number
+			auto restoredAsVec = quantized.getValue() ^ select(hlsl::uint32_t4(0u), hlsl::uint32_t4(xorflag), negativeMask);
+			restoredAsVec += hlsl::uint32_t4(negativeMask);
+
+			return value_type_t<CacheFormat>(restoredAsVec);
 		}
 
 		template<uint32_t dimensions, uint32_t quantizationBits>
-		static inline core::vectorSIMDf findBestFit(const core::vectorSIMDf& value)
+		static inline hlsl::vector<hlsl::float32_t, dimensions> findBestFit(const hlsl::vector<hlsl::float32_t, dimensions>& value)
 		{
+			using float32_tN = hlsl::vector<hlsl::float32_t, dimensions>;
 			static_assert(dimensions>1u,"No point");
 			static_assert(dimensions<=4u,"High Dimensions are Hard!");
-			// precise normalize
-			const auto vectorForDots = value.preciseDivision(length(value));
+
+			const auto vectorForDots = hlsl::normalize(value);
 
 			//
-			core::vectorSIMDf fittingVector;
-			core::vectorSIMDf floorOffset;
+			float32_tN fittingVector;
+			float32_tN floorOffset = {};
 			constexpr uint32_t cornerCount = (0x1u<<(dimensions-1u))-1u;
-			core::vectorSIMDf corners[cornerCount] = {};
+			float32_tN corners[cornerCount] = {};
 			{
 				uint32_t maxDirCompIndex = 0u;
 				for (auto i=1u; i<dimensions; i++)
@@ -430,9 +468,9 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl::
 				if (maxDirectionComp < std::sqrtf(0.9998f / float(dimensions)))
 				{
 					_NBL_DEBUG_BREAK_IF(true);
-					return core::vectorSIMDf(0.f);
+					return float32_tN(0.f);
 				}
-				fittingVector = value.preciseDivision(core::vectorSIMDf(maxDirectionComp));
+				fittingVector = value / maxDirectionComp;
 				floorOffset[maxDirCompIndex] = 0.499f;
 				const uint32_t localCorner[7][3] = {
 					{1,0,0},
@@ -452,12 +490,12 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl::
 				}
 			}
 
-			core::vectorSIMDf bestFit;
+			float32_tN bestFit;
 			float closestTo1 = -1.f;
-			auto evaluateFit = [&](const core::vectorSIMDf& newFit) -> void
+			auto evaluateFit = [&](const float32_tN& newFit) -> void
 			{
-				auto newFitLen = core::length(newFit);
-				const float dp = core::dot<core::vectorSIMDf>(newFit,vectorForDots).preciseDivision(newFitLen)[0];
+				auto newFitLen = length(newFit);
+				const float dp = hlsl::dot(newFit,vectorForDots) / (newFitLen);
 				if (dp > closestTo1)
 				{
 					closestTo1 = dp;
@@ -466,18 +504,18 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl::
 			};
 
 			constexpr uint32_t cubeHalfSize = (0x1u << quantizationBits) - 1u;
-			const core::vectorSIMDf cubeHalfSizeND = core::vectorSIMDf(cubeHalfSize);
+			const float32_tN cubeHalfSizeND = hlsl::promote<float32_tN>(cubeHalfSize);
 			for (uint32_t n=cubeHalfSize; n>0u; n--)
 			{
 				//we'd use float addition in the interest of speed, to increment the loop
 				//but adding a small number to a large one loses precision, so multiplication preferrable
-				core::vectorSIMDf bottomFit = core::floor(fittingVector*float(n)+floorOffset);
-				if ((bottomFit<=cubeHalfSizeND).all())
+				const auto bottomFit = glm::floor(fittingVector * float(n) + floorOffset);
+				if (hlsl::all(glm::lessThanEqual(bottomFit, cubeHalfSizeND)))
 					evaluateFit(bottomFit);
-				for (auto i=0u; i<cornerCount; i++)
+				for (auto i = 0u; i < cornerCount; i++)
 				{
 					auto bottomFitTmp = bottomFit+corners[i];
-					if ((bottomFitTmp<=cubeHalfSizeND).all())
+					if (hlsl::all(glm::lessThanEqual(bottomFitTmp, cubeHalfSizeND)))
 						evaluateFit(bottomFitTmp);
 				}
 			}
diff --git a/include/nbl/asset/utils/CGeometryCreator.h b/include/nbl/asset/utils/CGeometryCreator.h
index 87d7a0ef5e..bd5281cde3 100644
--- a/include/nbl/asset/utils/CGeometryCreator.h
+++ b/include/nbl/asset/utils/CGeometryCreator.h
@@ -11,6 +11,8 @@
 // legacy, needs to be removed
 #include "SColor.h"
 
+#include "nbl/asset/ICPUGeometryCollection.h"
+
 
 namespace nbl::asset
 {
@@ -58,11 +60,10 @@ class NBL_API2 CGeometryCreator final : public core::IReferenceCounted
 		\param colorCone color of the cone
 		\return Generated mesh.
 		*/
-		core::smart_refctd_ptr<ICPUPolygonGeometry> createArrow(const uint32_t tesselationCylinder = 4,
-				const uint32_t tesselationCone = 8, const float height = 1.f,
+		core::smart_refctd_ptr<ICPUGeometryCollection> createArrow(const uint16_t tesselationCylinder = 4,
+				const uint16_t tesselationCone = 8, const float height = 1.f,
 				const float cylinderHeight = 0.6f, const float widthCylinder = 0.05f,
-				const float widthCone = 0.3f, const video::SColor colorCylinder = 0xFFFFFFFF,
-				const video::SColor colorCone = 0xFFFFFFFF) const;
+				const float widthCone = 0.3f) const;
 
 
 		//! Create a sphere mesh.
@@ -86,8 +87,8 @@ class NBL_API2 CGeometryCreator final : public core::IReferenceCounted
 		\return Generated mesh.
 		*/
 		core::smart_refctd_ptr<ICPUPolygonGeometry> createCylinder(float radius, float length,
-				uint32_t tesselation,
-				const video::SColor& color=video::SColor(0xffffffff), CQuantNormalCache* const quantNormalCacheOverride=nullptr) const;
+				uint16_t tesselation,
+				CQuantNormalCache* const quantNormalCacheOverride=nullptr) const;
 
 		//! Create a cone mesh.
 		/**
@@ -99,9 +100,7 @@ class NBL_API2 CGeometryCreator final : public core::IReferenceCounted
 		\param oblique (to be documented)
 		\return Generated mesh.
 		*/
-		core::smart_refctd_ptr<ICPUPolygonGeometry> createCone(float radius, float length, uint32_t tesselation,
-				const video::SColor& colorTop=video::SColor(0xffffffff),
-				const video::SColor& colorBottom=video::SColor(0xffffffff),
+		core::smart_refctd_ptr<ICPUPolygonGeometry> createCone(float radius, float length, uint16_t tesselation,
 				float oblique=0.f, CQuantNormalCache* const quantNormalCacheOverride=nullptr) const;
 
 		core::smart_refctd_ptr<ICPUPolygonGeometry> createRectangle(const hlsl::float32_t2 size={0.5f,0.5f}) const;
diff --git a/include/nbl/asset/utils/CQuantNormalCache.h b/include/nbl/asset/utils/CQuantNormalCache.h
index 92703d9d37..31b7d403d2 100644
--- a/include/nbl/asset/utils/CQuantNormalCache.h
+++ b/include/nbl/asset/utils/CQuantNormalCache.h
@@ -19,7 +19,7 @@ namespace impl
 
 struct VectorUV
 {
-	inline VectorUV(const core::vectorSIMDf& absNormal)
+	inline VectorUV(const hlsl::float32_t3& absNormal)
 	{
 		const float rcpManhattanNorm = 1.f / (absNormal.x + absNormal.y + absNormal.z);
 		u = absNormal.x * rcpManhattanNorm;
@@ -56,9 +56,8 @@ class CQuantNormalCache : public CDirQuantCacheBase<impl::VectorUV,impl::QuantNo
 
 	public:
 		template<E_FORMAT CacheFormat>
-		value_type_t<CacheFormat> quantize(core::vectorSIMDf normal)
+		value_type_t<CacheFormat> quantize(hlsl::float32_t3 normal)
 		{
-			normal.makeSafe3D();
 			return Base::quantize<3u,CacheFormat>(normal);
 		}
 };
diff --git a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl
index f92dddfb26..8bfed025ce 100644
--- a/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl
+++ b/include/nbl/builtin/hlsl/cpp_compat/impl/intrinsics_impl.hlsl
@@ -348,8 +348,14 @@ requires concepts::IntegralScalar<T>
 struct find_lsb_helper<T>
 {
 	using return_t = int32_t;
-	static inline T __call(const T arg)
+	NBL_CONSTEXPR_FUNC static inline T __call(const T arg)
 	{
+		if constexpr (std::is_constant_evaluated())
+		{
+      for (T ix = T(0); ix < sizeof(size_t) * 8; ix++)
+        if ((T(1) << ix) & arg) return ix;
+      return ~T(0);
+		}
 		return glm::findLSB<T>(arg);
 	}
 };
@@ -369,7 +375,7 @@ requires std::is_enum_v<EnumType>
 struct find_lsb_helper<EnumType>
 {
 	using return_t = int32_t;
-	static int32_t __call(NBL_CONST_REF_ARG(EnumType) val)
+	NBL_CONSTEXPR_FUNC static int32_t __call(NBL_CONST_REF_ARG(EnumType) val)
 	{
 		using underlying_t = std::underlying_type_t<EnumType>;
 		return find_lsb_helper<underlying_t>::__call(static_cast<underlying_t>(val));
diff --git a/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl b/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl
index a5747a5fb7..7198bae563 100644
--- a/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl
+++ b/include/nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl
@@ -68,7 +68,7 @@ inline typename matrix_traits<Matrix>::scalar_type determinant(NBL_CONST_REF_ARG
 }
 
 template<typename T>
-inline typename cpp_compat_intrinsics_impl::find_lsb_helper<T>::return_t findLSB(NBL_CONST_REF_ARG(T) val)
+NBL_CONSTEXPR_FUNC inline typename cpp_compat_intrinsics_impl::find_lsb_helper<T>::return_t findLSB(NBL_CONST_REF_ARG(T) val)
 {
 	return cpp_compat_intrinsics_impl::find_lsb_helper<T>::__call(val);
 }
diff --git a/include/nbl/builtin/hlsl/math/linalg/transform.hlsl b/include/nbl/builtin/hlsl/math/linalg/transform.hlsl
new file mode 100644
index 0000000000..943bd313f0
--- /dev/null
+++ b/include/nbl/builtin/hlsl/math/linalg/transform.hlsl
@@ -0,0 +1,57 @@
+// Copyright (C) 2018-2024 - DevSH Graphics Programming Sp. z O.O.
+// This file is part of the "Nabla Engine".
+// For conditions of distribution and use, see copyright notice in nabla.h
+#ifndef _NBL_BUILTIN_HLSL_MATH_LINALG_TRANSFORM_INCLUDED_
+#define _NBL_BUILTIN_HLSL_MATH_LINALG_TRANSFORM_INCLUDED_
+
+
+#include <nbl/builtin/hlsl/mpl.hlsl>
+#include <nbl/builtin/hlsl/cpp_compat/intrinsics.hlsl>
+#include <nbl/builtin/hlsl/concepts.hlsl>
+
+
+namespace nbl
+{
+namespace hlsl
+{
+namespace math
+{
+namespace linalg
+{
+
+/// Builds a rotation 4 * 4 matrix created from an axis vector and an angle.
+///
+/// @param angle Rotation angle expressed in radians.
+/// @param axis Rotation axis, must be normalized.
+///
+/// @tparam T A floating-point scalar type
+template <typename T>
+matrix<T, 4, 4> rotation_mat(T angle, vector<T, 3> const& axis)
+{
+  T const a = angle;
+  T const c = cos(a);
+  T const s = sin(a);
+
+  vector<T, 3> temp((T(1) - c) * axis);
+
+  matrix<T, 4, 4> rotation;
+  rotation[0][0] = c + temp[0] * axis[0];
+  rotation[0][1] = temp[1] * axis[0] - s * axis[2];
+  rotation[0][2] = temp[2] * axis[0] + s * axis[1];
+
+  rotation[1][0] = temp[0] * axis[1] + s * axis[2];
+  rotation[1][1] = c + temp[1] * axis[1];
+  rotation[1][2] = temp[2] * axis[1] - s * axis[0];
+
+  rotation[2][0] = temp[0] * axis[2] - s * axis[1];
+  rotation[2][1] = temp[1] * axis[2] + s * axis[0];
+  rotation[2][2] = c + temp[2] * axis[2];
+
+  return rotation;
+}
+
+}
+}
+}
+}
+#endif
diff --git a/include/nbl/builtin/hlsl/spirv_intrinsics/raytracing.hlsl b/include/nbl/builtin/hlsl/spirv_intrinsics/raytracing.hlsl
index 3a49450d7c..41f56e225e 100644
--- a/include/nbl/builtin/hlsl/spirv_intrinsics/raytracing.hlsl
+++ b/include/nbl/builtin/hlsl/spirv_intrinsics/raytracing.hlsl
@@ -46,11 +46,21 @@ bool rayQueryProceedKHR([[vk::ext_reference]] RayQueryKHR query);
 [[vk::ext_instruction(spv::OpRayQueryGetIntersectionTypeKHR)]]
 int rayQueryGetIntersectionTypeKHR([[vk::ext_reference]] RayQueryKHR query, uint32_t committed);
 
+[[vk::ext_capability(spv::CapabilityRayQueryKHR)]]
+[[vk::ext_extension("SPV_KHR_ray_query")]]
+[[vk::ext_instruction(spv::OpRayQueryGetIntersectionInstanceCustomIndexKHR)]]
+int rayQueryGetIntersectionInstanceCustomIndexKHR([[vk::ext_reference]] RayQueryKHR query, uint32_t committed);
+
 [[vk::ext_capability(spv::CapabilityRayQueryKHR)]]
 [[vk::ext_extension("SPV_KHR_ray_query")]]
 [[vk::ext_instruction(spv::OpRayQueryGetIntersectionInstanceIdKHR)]]
 int rayQueryGetIntersectionInstanceIdKHR([[vk::ext_reference]] RayQueryKHR query, uint32_t committed);
 
+[[vk::ext_capability(spv::CapabilityRayQueryKHR)]]
+[[vk::ext_extension("SPV_KHR_ray_query")]]
+[[vk::ext_instruction(spv::OpRayQueryGetIntersectionGeometryIndexKHR)]]
+int rayQueryGetIntersectionGeometryIndexKHR([[vk::ext_reference]] RayQueryKHR query, uint32_t committed);
+
 [[vk::ext_capability(spv::CapabilityRayQueryKHR)]]
 [[vk::ext_extension("SPV_KHR_ray_query")]]
 [[vk::ext_instruction(spv::OpRayQueryGetIntersectionPrimitiveIndexKHR)]]
@@ -61,6 +71,11 @@ int rayQueryGetIntersectionPrimitiveIndexKHR([[vk::ext_reference]] RayQueryKHR q
 [[vk::ext_instruction(spv::OpRayQueryGetIntersectionBarycentricsKHR)]]
 float2 rayQueryGetIntersectionBarycentricsKHR([[vk::ext_reference]] RayQueryKHR query, uint32_t committed);
 
+[[vk::ext_capability(spv::CapabilityRayQueryKHR)]]
+[[vk::ext_extension("SPV_KHR_ray_query")]]
+[[vk::ext_instruction(spv::OpRayQueryGetIntersectionFrontFaceKHR)]]
+float2 rayQueryGetIntersectionFrontFaceKHR([[vk::ext_reference]] RayQueryKHR query, uint32_t committed);
+
 // position fetch for ray tracing uses gl_HitTriangleVertexPositionsEXT -> HitTriangleVertexPositionsKHR decorated OpVariable
 [[vk::ext_builtin_input(spv::BuiltInHitTriangleVertexPositionsKHR)]]
 static const float32_t3 HitTriangleVertexPositionsKHR[3];
diff --git a/include/nbl/core/util/bitflag.h b/include/nbl/core/util/bitflag.h
index 1731c0cac3..62bec57d49 100644
--- a/include/nbl/core/util/bitflag.h
+++ b/include/nbl/core/util/bitflag.h
@@ -60,7 +60,7 @@ namespace nbl::hlsl::cpp_compat_intrinsics_impl
 	struct find_lsb_helper<core::bitflag<ENUM_TYPE>>
 	{
 		using return_t = int32_t;
-		static return_t __call(NBL_CONST_REF_ARG(core::bitflag<ENUM_TYPE>) val)
+		NBL_CONSTEXPR_FUNC static return_t __call(NBL_CONST_REF_ARG(core::bitflag<ENUM_TYPE>) val)
 		{
 			return find_lsb_helper<ENUM_TYPE>::__call(val.value);
 		}
diff --git a/src/nbl/CMakeLists.txt b/src/nbl/CMakeLists.txt
index e7193e3eaf..935beffe2c 100755
--- a/src/nbl/CMakeLists.txt
+++ b/src/nbl/CMakeLists.txt
@@ -151,6 +151,7 @@ set(NBL_UI_SOURCES
 )
 set(NBL_ASSET_SOURCES	
 # Assets
+	asset/ECommonEnums.cpp
 	asset/IAsset.cpp
 	asset/IRenderpass.cpp
 	asset/IAssetManager.cpp
diff --git a/src/nbl/asset/ECommonEnums.cpp b/src/nbl/asset/ECommonEnums.cpp
new file mode 100644
index 0000000000..2366b25f99
--- /dev/null
+++ b/src/nbl/asset/ECommonEnums.cpp
@@ -0,0 +1,268 @@
+#include "nbl/asset/ECommonEnums.h"
+
+namespace nbl::asset
+{
+
+core::bitflag<PIPELINE_STAGE_FLAGS> allPreviousStages(core::bitflag<PIPELINE_STAGE_FLAGS> stages)
+{
+	struct PerStagePreviousStages
+	{
+		public:
+			constexpr PerStagePreviousStages()
+			{
+				// set all stage to have itself as their previous stages
+				for (auto i = 0; i < std::numeric_limits<PIPELINE_STAGE_FLAGS>::digits; i++)
+					data[i] = static_cast<PIPELINE_STAGE_FLAGS>(i);
+
+				add(PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT);
+
+				add(PIPELINE_STAGE_FLAGS::RAY_TRACING_SHADER_BIT, PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT);
+
+				// graphics primitive pipeline
+				PIPELINE_STAGE_FLAGS primitivePrevStage = PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT;
+				for (auto pipelineStage : {PIPELINE_STAGE_FLAGS::INDEX_INPUT_BIT, PIPELINE_STAGE_FLAGS::VERTEX_ATTRIBUTE_INPUT_BIT, PIPELINE_STAGE_FLAGS::VERTEX_SHADER_BIT, PIPELINE_STAGE_FLAGS::TESSELLATION_CONTROL_SHADER_BIT, PIPELINE_STAGE_FLAGS::TESSELLATION_EVALUATION_SHADER_BIT, PIPELINE_STAGE_FLAGS::GEOMETRY_SHADER_BIT, PIPELINE_STAGE_FLAGS::SHADING_RATE_ATTACHMENT_BIT, PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT, PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, PIPELINE_STAGE_FLAGS::LATE_FRAGMENT_TESTS_BIT, PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT})
+				{
+					if (pipelineStage == PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT)
+						primitivePrevStage |= PIPELINE_STAGE_FLAGS::FRAGMENT_DENSITY_PROCESS_BIT;
+					add(pipelineStage, primitivePrevStage);
+					primitivePrevStage |= pipelineStage;
+				}
+
+
+			}
+			constexpr const auto& operator[](const size_t ix) const {return data[ix];}
+
+		private:
+
+			constexpr void add(PIPELINE_STAGE_FLAGS stageFlag, PIPELINE_STAGE_FLAGS previousStageFlags)
+			{
+				const auto bitIx = hlsl::findLSB(static_cast<size_t>(stageFlag));
+				data[bitIx] |= previousStageFlags;
+			}
+
+			PIPELINE_STAGE_FLAGS data[std::numeric_limits<std::underlying_type_t<PIPELINE_STAGE_FLAGS>>::digits] = {};
+	};
+
+	constexpr PerStagePreviousStages bitToAccess = {};
+
+	core::bitflag<PIPELINE_STAGE_FLAGS> retval = PIPELINE_STAGE_FLAGS::NONE;
+	while (bool(stages.value))
+	{
+		const auto bitIx = hlsl::findLSB(static_cast<size_t>(stages.value));
+		retval |= bitToAccess[bitIx];
+		stages ^= static_cast<PIPELINE_STAGE_FLAGS>(0x1u<<bitIx);
+	}
+
+	return retval;
+}
+
+core::bitflag<PIPELINE_STAGE_FLAGS> allLaterStages(core::bitflag<PIPELINE_STAGE_FLAGS> stages)
+{
+	struct PerStageLaterStages
+	{
+		public:
+			constexpr PerStageLaterStages()
+			{
+				// set all stage to have itself as their next stages
+				for (auto i = 0; i < std::numeric_limits<PIPELINE_STAGE_FLAGS>::digits; i++)
+					data[i] = static_cast<PIPELINE_STAGE_FLAGS>(i);
+
+				add(PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT, PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT);
+				add(PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT, PIPELINE_STAGE_FLAGS::RAY_TRACING_SHADER_BIT);
+
+				// graphics primitive pipeline
+				PIPELINE_STAGE_FLAGS laterStage = PIPELINE_STAGE_FLAGS::NONE;
+				const auto graphicsPrimitivePipelineOrders = std::array{ PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT, PIPELINE_STAGE_FLAGS::INDEX_INPUT_BIT, PIPELINE_STAGE_FLAGS::VERTEX_ATTRIBUTE_INPUT_BIT, PIPELINE_STAGE_FLAGS::VERTEX_SHADER_BIT, PIPELINE_STAGE_FLAGS::TESSELLATION_CONTROL_SHADER_BIT, PIPELINE_STAGE_FLAGS::TESSELLATION_EVALUATION_SHADER_BIT, PIPELINE_STAGE_FLAGS::GEOMETRY_SHADER_BIT, PIPELINE_STAGE_FLAGS::SHADING_RATE_ATTACHMENT_BIT, PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT, PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, PIPELINE_STAGE_FLAGS::LATE_FRAGMENT_TESTS_BIT, PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT };
+				for (auto iter = graphicsPrimitivePipelineOrders.rbegin(); iter < graphicsPrimitivePipelineOrders.rend(); iter++)
+				{
+					const auto pipelineStage = *iter;
+					add(pipelineStage, laterStage);
+					laterStage |= pipelineStage;
+				}
+
+				add(PIPELINE_STAGE_FLAGS::FRAGMENT_DENSITY_PROCESS_BIT, PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT);
+			}
+			constexpr const auto& operator[](const size_t ix) const {return data[ix];}
+
+		private:
+
+			constexpr void add(PIPELINE_STAGE_FLAGS stageFlag, PIPELINE_STAGE_FLAGS laterStageFlags)
+			{
+				const auto bitIx = hlsl::findLSB(static_cast<size_t>(stageFlag));
+				data[bitIx] |= laterStageFlags;
+			}
+
+			PIPELINE_STAGE_FLAGS data[std::numeric_limits<std::underlying_type_t<PIPELINE_STAGE_FLAGS>>::digits] = {};
+	};
+
+	constexpr PerStageLaterStages bitToAccess = {};
+
+	core::bitflag<PIPELINE_STAGE_FLAGS> retval = PIPELINE_STAGE_FLAGS::NONE;
+	while (bool(stages.value))
+	{
+		const auto bitIx = hlsl::findLSB(static_cast<size_t>(stages.value));
+		retval |= bitToAccess[bitIx];
+		stages ^= static_cast<PIPELINE_STAGE_FLAGS>(0x1u<<bitIx);
+	}
+
+	return retval;
+}
+
+core::bitflag<ACCESS_FLAGS> allAccessesFromStages(core::bitflag<PIPELINE_STAGE_FLAGS> stages)
+{
+	struct PerStageAccesses
+	{
+		public:
+			constexpr PerStageAccesses()
+			{
+        init(PIPELINE_STAGE_FLAGS::HOST_BIT,ACCESS_FLAGS::HOST_READ_BIT|ACCESS_FLAGS::HOST_WRITE_BIT);
+
+        constexpr auto TransferRW = ACCESS_FLAGS::TRANSFER_READ_BIT|ACCESS_FLAGS::TRANSFER_WRITE_BIT;
+        init(PIPELINE_STAGE_FLAGS::COPY_BIT,TransferRW);
+        init(PIPELINE_STAGE_FLAGS::CLEAR_BIT,ACCESS_FLAGS::TRANSFER_WRITE_BIT);
+
+        constexpr auto MicromapRead = ACCESS_FLAGS::SHADER_READ_BITS;//|ACCESS_FLAGS::MICROMAP_READ_BIT;
+//                init(PIPELINE_STAGE_FLAGS::MICROMAP_BUILD_BIT,MicromapRead|ACCESS_FLAGS::MICROMAP_WRITE_BIT); // can micromaps be built indirectly?
+        
+        constexpr auto AccelerationStructureRW = ACCESS_FLAGS::ACCELERATION_STRUCTURE_READ_BIT|ACCESS_FLAGS::ACCELERATION_STRUCTURE_WRITE_BIT;
+        init(PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_COPY_BIT,TransferRW|AccelerationStructureRW);
+        init(PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_BUILD_BIT,ACCESS_FLAGS::INDIRECT_COMMAND_READ_BIT|MicromapRead|AccelerationStructureRW);
+
+        init(PIPELINE_STAGE_FLAGS::COMMAND_PREPROCESS_BIT,ACCESS_FLAGS::COMMAND_PREPROCESS_READ_BIT|ACCESS_FLAGS::COMMAND_PREPROCESS_WRITE_BIT);
+        init(PIPELINE_STAGE_FLAGS::CONDITIONAL_RENDERING_BIT,ACCESS_FLAGS::CONDITIONAL_RENDERING_READ_BIT);
+        init(PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT,ACCESS_FLAGS::INDIRECT_COMMAND_READ_BIT);
+
+        constexpr auto ShaderRW = ACCESS_FLAGS::SHADER_READ_BITS|ACCESS_FLAGS::SHADER_WRITE_BITS;
+        constexpr auto AllShaderStagesRW = ShaderRW^(ACCESS_FLAGS::INPUT_ATTACHMENT_READ_BIT|ACCESS_FLAGS::SHADER_BINDING_TABLE_READ_BIT);
+        init(PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT,AllShaderStagesRW);
+        init(PIPELINE_STAGE_FLAGS::INDEX_INPUT_BIT,ACCESS_FLAGS::INDEX_READ_BIT);
+        init(PIPELINE_STAGE_FLAGS::VERTEX_ATTRIBUTE_INPUT_BIT,ACCESS_FLAGS::VERTEX_ATTRIBUTE_READ_BIT);
+        init(PIPELINE_STAGE_FLAGS::VERTEX_SHADER_BIT,AllShaderStagesRW);
+        init(PIPELINE_STAGE_FLAGS::TESSELLATION_CONTROL_SHADER_BIT,AllShaderStagesRW);
+        init(PIPELINE_STAGE_FLAGS::TESSELLATION_EVALUATION_SHADER_BIT,AllShaderStagesRW);
+        init(PIPELINE_STAGE_FLAGS::GEOMETRY_SHADER_BIT,AllShaderStagesRW);
+//                init(PIPELINE_STAGE_FLAGS::TASK_SHADER_BIT,AllShaderStagesRW);
+//                init(PIPELINE_STAGE_FLAGS::MESH_SHADER_BIT,AllShaderStagesRW);
+        init(PIPELINE_STAGE_FLAGS::FRAGMENT_DENSITY_PROCESS_BIT,ACCESS_FLAGS::FRAGMENT_DENSITY_MAP_READ_BIT);
+        init(PIPELINE_STAGE_FLAGS::SHADING_RATE_ATTACHMENT_BIT,ACCESS_FLAGS::SHADING_RATE_ATTACHMENT_READ_BIT);
+        constexpr auto DepthStencilRW = ACCESS_FLAGS::DEPTH_STENCIL_ATTACHMENT_READ_BIT|ACCESS_FLAGS::DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
+        init(PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT,DepthStencilRW);
+        init(PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT,AllShaderStagesRW|ACCESS_FLAGS::INPUT_ATTACHMENT_READ_BIT);
+        init(PIPELINE_STAGE_FLAGS::LATE_FRAGMENT_TESTS_BIT,DepthStencilRW);
+        init(PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT,ACCESS_FLAGS::COLOR_ATTACHMENT_READ_BIT|ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT);
+
+        init(PIPELINE_STAGE_FLAGS::RAY_TRACING_SHADER_BIT,AllShaderStagesRW|ACCESS_FLAGS::SHADER_BINDING_TABLE_READ_BIT);
+
+        init(PIPELINE_STAGE_FLAGS::RESOLVE_BIT,TransferRW);
+        init(PIPELINE_STAGE_FLAGS::BLIT_BIT,TransferRW);
+
+//                init(PIPELINE_STAGE_FLAGS::VIDEO_DECODE_BIT,ACCESS_FLAGS::VIDEO_DECODE_READ_BIT|ACCESS_FLAGS::VIDEO_DECODE_WRITE_BIT);
+//                init(PIPELINE_STAGE_FLAGS::VIDEO_ENCODE_BIT,ACCESS_FLAGS::VIDEO_ENCODE_READ_BIT|ACCESS_FLAGS::VIDEO_ENCODE_WRITE_BIT);
+//                init(PIPELINE_STAGE_FLAGS::OPTICAL_FLOW_BIT,ACCESS_FLAGS::OPTICAL_FLOW_READ_BIT|ACCESS_FLAGS::OPTICAL_FLOW_WRITE_BIT);
+			}
+			constexpr const auto& operator[](const size_t ix) const {return data[ix];}
+
+		private:
+				
+			constexpr void init(PIPELINE_STAGE_FLAGS stageFlag, ACCESS_FLAGS accessFlags)
+			{
+				const auto bitIx = hlsl::findLSB(static_cast<size_t>(stageFlag));
+				data[bitIx] = accessFlags;
+			}
+
+			ACCESS_FLAGS data[32] = {};
+	};
+	constexpr PerStageAccesses bitToAccess = {};
+
+	// TODO: add logically later or previous stages to make sure all other accesses remain valid
+	// or ideally expand the stages before calling `allAccessesFromStages` (TODO: add a `allLaterStages` and `allPreviouStages` basically)
+
+	core::bitflag<ACCESS_FLAGS> retval = ACCESS_FLAGS::NONE;
+	while (bool(stages.value))
+	{
+		const auto bitIx = hlsl::findLSB(static_cast<size_t>(stages.value));
+		retval |= bitToAccess[bitIx];
+		stages ^= static_cast<PIPELINE_STAGE_FLAGS>(0x1u<<bitIx);
+	}
+
+	return retval;
+}
+
+core::bitflag<PIPELINE_STAGE_FLAGS> allStagesFromAccesses(core::bitflag<ACCESS_FLAGS> accesses)
+{
+	struct PerAccessStages
+	{
+		public:
+			constexpr PerAccessStages()
+			{
+        init(ACCESS_FLAGS::HOST_READ_BIT,PIPELINE_STAGE_FLAGS::HOST_BIT);
+        init(ACCESS_FLAGS::HOST_WRITE_BIT,PIPELINE_STAGE_FLAGS::HOST_BIT);
+
+        init(ACCESS_FLAGS::TRANSFER_READ_BIT,PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS^PIPELINE_STAGE_FLAGS::CLEAR_BIT);
+        init(ACCESS_FLAGS::TRANSFER_WRITE_BIT,PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS);
+
+        constexpr auto MicromapAccelerationStructureBuilds = PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_BUILD_BIT;//|PIPELINE_STAGE_FLAGS::MICROMAP_BUILD_BIT;
+//                init(ACCESS_FLAGS::MICROMAP_READ_BIT,MicromapAccelerationStructureBuilds);
+//                init(ACCESS_FLAGS::MICROMAP_WRITE_BIT,PIPELINE_STAGE_FLAGS::MICROMAP_BUILD_BIT);
+        
+        constexpr auto AllShaders = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT|PIPELINE_STAGE_FLAGS::PRE_RASTERIZATION_SHADERS_BITS|PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT|PIPELINE_STAGE_FLAGS::RAY_TRACING_SHADER_BIT;
+        constexpr auto AccelerationStructureOperations = PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_COPY_BIT|PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_BUILD_BIT;
+        init(ACCESS_FLAGS::ACCELERATION_STRUCTURE_READ_BIT,AccelerationStructureOperations|AllShaders);
+        init(ACCESS_FLAGS::ACCELERATION_STRUCTURE_WRITE_BIT,AccelerationStructureOperations);
+
+        init(ACCESS_FLAGS::COMMAND_PREPROCESS_READ_BIT,PIPELINE_STAGE_FLAGS::COMMAND_PREPROCESS_BIT);
+        init(ACCESS_FLAGS::COMMAND_PREPROCESS_WRITE_BIT,PIPELINE_STAGE_FLAGS::COMMAND_PREPROCESS_BIT);
+        init(ACCESS_FLAGS::CONDITIONAL_RENDERING_READ_BIT,PIPELINE_STAGE_FLAGS::CONDITIONAL_RENDERING_BIT);
+        init(ACCESS_FLAGS::INDIRECT_COMMAND_READ_BIT,PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_BUILD_BIT|PIPELINE_STAGE_FLAGS::DISPATCH_INDIRECT_COMMAND_BIT);
+
+        init(ACCESS_FLAGS::UNIFORM_READ_BIT,AllShaders);
+        init(ACCESS_FLAGS::SAMPLED_READ_BIT,AllShaders);//|PIPELINE_STAGE_FLAGS::MICROMAP_BUILD_BIT);
+        init(ACCESS_FLAGS::STORAGE_READ_BIT,AllShaders|MicromapAccelerationStructureBuilds);
+        init(ACCESS_FLAGS::STORAGE_WRITE_BIT,AllShaders);
+
+        init(ACCESS_FLAGS::INDEX_READ_BIT,PIPELINE_STAGE_FLAGS::INDEX_INPUT_BIT);
+        init(ACCESS_FLAGS::VERTEX_ATTRIBUTE_READ_BIT,PIPELINE_STAGE_FLAGS::VERTEX_ATTRIBUTE_INPUT_BIT);
+
+        init(ACCESS_FLAGS::FRAGMENT_DENSITY_MAP_READ_BIT,PIPELINE_STAGE_FLAGS::FRAGMENT_DENSITY_PROCESS_BIT);
+        init(ACCESS_FLAGS::SHADING_RATE_ATTACHMENT_READ_BIT,PIPELINE_STAGE_FLAGS::SHADING_RATE_ATTACHMENT_BIT);
+        constexpr auto FragmentTests = PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT|PIPELINE_STAGE_FLAGS::LATE_FRAGMENT_TESTS_BIT;
+        init(ACCESS_FLAGS::DEPTH_STENCIL_ATTACHMENT_READ_BIT,FragmentTests);
+        init(ACCESS_FLAGS::DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,FragmentTests);
+        init(ACCESS_FLAGS::INPUT_ATTACHMENT_READ_BIT,PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT);
+        init(ACCESS_FLAGS::COLOR_ATTACHMENT_READ_BIT,PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT);
+        init(ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT,PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT);
+
+        init(ACCESS_FLAGS::SHADER_BINDING_TABLE_READ_BIT,PIPELINE_STAGE_FLAGS::RAY_TRACING_SHADER_BIT);
+
+//                init(ACCESS_FLAGS::VIDEO_DECODE_READ_BIT,PIPELINE_STAGE_FLAGS::VIDEO_DECODE_BIT);
+//                init(ACCESS_FLAGS::VIDEO_DECODE_WRITE_BIT,PIPELINE_STAGE_FLAGS::VIDEO_DECODE_BIT);
+//                init(ACCESS_FLAGS::VIDEO_ENCODE_READ_BIT,PIPELINE_STAGE_FLAGS::VIDEO_ENCODE_BIT);
+//                init(ACCESS_FLAGS::VIDEO_ENCODE_WRITE_BIT,PIPELINE_STAGE_FLAGS::VIDEO_ENCODE_BIT);
+//                init(ACCESS_FLAGS::OPTICAL_FLOW_READ_BIT,PIPELINE_STAGE_FLAGS::OPTICAL_FLOW_BIT);
+//                init(ACCESS_FLAGS::OPTICAL_FLOW_WRITE_BIT,PIPELINE_STAGE_FLAGS::OPTICAL_FLOW_BIT);
+			}
+			constexpr const auto& operator[](const size_t ix) const {return data[ix];}
+
+		private:
+			constexpr void init(ACCESS_FLAGS accessFlags, PIPELINE_STAGE_FLAGS stageFlags)
+			{
+				const auto bitIx = hlsl::findLSB(static_cast<size_t>(accessFlags));
+				data[bitIx] = stageFlags;
+			}
+
+			PIPELINE_STAGE_FLAGS data[32] = {};
+	};
+	constexpr PerAccessStages bitToStage = {};
+
+	core::bitflag<PIPELINE_STAGE_FLAGS> retval = PIPELINE_STAGE_FLAGS::NONE;
+	while (bool(accesses.value))
+	{
+		const auto bitIx = hlsl::findLSB(static_cast<size_t>(accesses.value));
+		retval |= bitToStage[bitIx];
+		accesses ^= static_cast<ACCESS_FLAGS>(0x1u<<bitIx);
+	}
+
+	return retval;
+}
+}
+
diff --git a/src/nbl/asset/utils/CGeometryCreator.cpp b/src/nbl/asset/utils/CGeometryCreator.cpp
index 11a05fa90c..85fcf7bf77 100644
--- a/src/nbl/asset/utils/CGeometryCreator.cpp
+++ b/src/nbl/asset/utils/CGeometryCreator.cpp
@@ -5,16 +5,163 @@
 
 #include "nbl/asset/utils/CGeometryCreator.h"
 #include "nbl/builtin/hlsl/tgmath.hlsl"
+#include "nbl/builtin/hlsl/math/linalg/transform.hlsl"
 
-#include <iostream>
-#include <iomanip>
 #include <cmath>
 #include <cstdint>
 
-
 namespace nbl::asset
 {
 
+namespace
+{
+using snorm_normal_t = hlsl::vector<int8_t, 4>;
+constexpr int8_t snorm_one = std::numeric_limits<int8_t>::max();
+constexpr int8_t snorm_neg_one = std::numeric_limits<int8_t>::min();
+constexpr auto snorm_positive_x = hlsl::vector<int8_t, 4>(snorm_one, 0, 0, 0);
+constexpr auto snorm_negative_x = hlsl::vector<int8_t, 4>(snorm_neg_one, 0, 0, 0);
+constexpr auto snorm_positive_y = hlsl::vector<int8_t, 4>(0, snorm_one, 0, 0);
+constexpr auto snorm_negative_y = hlsl::vector<int8_t, 4>(0, snorm_neg_one, 0, 0);
+constexpr auto snorm_positive_z = hlsl::vector<int8_t, 4>(0, 0, snorm_one, 0);
+constexpr auto snorm_negative_z = hlsl::vector<int8_t, 4>(0, 0, snorm_neg_one, 0);
+
+constexpr auto snorm_all_ones = hlsl::vector<int8_t, 4>(snorm_one, snorm_one, snorm_one, snorm_one);
+
+template <typename ElementT>
+  requires(std::is_same_v<ElementT, uint8_t> || std::is_same_v<ElementT, uint16_t>)
+constexpr E_FORMAT get_uv_format()
+{
+  if constexpr(std::is_same_v<ElementT, uint8_t>)
+  {
+    return EF_R8G8_UNORM;
+  } else
+  {
+    return EF_R16G16_UNORM;
+  }
+}
+}
+
+template <typename ElementT>
+	requires(std::is_same_v<ElementT, uint8_t> || std::is_same_v<ElementT, uint16_t>)
+static ICPUPolygonGeometry::SDataView createUvView(size_t vertexCount)
+{
+	const auto elementCount = 2;
+	const auto attrSize = sizeof(ElementT) * elementCount;
+	auto buff = ICPUBuffer::create({{attrSize * vertexCount,IBuffer::EUF_NONE}});
+	hlsl::shapes::AABB<4, ElementT> aabb;
+	aabb.minVx = hlsl::vector<ElementT, 4>(0,0,0,0);
+	aabb.maxVx = hlsl::vector<ElementT, 4>(std::numeric_limits<ElementT>::max(), std::numeric_limits<ElementT>::max(), 0, 0);
+
+	auto retval = ICPUPolygonGeometry::SDataView{
+		.composed = {
+			.stride = attrSize,
+		},
+		.src = {
+			.offset = 0,
+			.size = buff->getSize(),
+			.buffer = std::move(buff),
+		}
+	};
+
+	if constexpr(std::is_same_v<ElementT, uint8_t>)
+	{
+		retval.composed.encodedDataRange.u8 = aabb;
+		retval.composed.format = get_uv_format<ElementT>();
+		retval.composed.rangeFormat = IGeometryBase::EAABBFormat::U8_NORM;
+	}
+	else if constexpr(std::is_same_v<ElementT, uint16_t>)
+	{
+		retval.composed.encodedDataRange.u16 = aabb;
+		retval.composed.format = get_uv_format<ElementT>();
+		retval.composed.rangeFormat = IGeometryBase::EAABBFormat::U16_NORM;
+	}
+
+	return retval;
+}
+
+template <typename IndexT>
+	requires(std::is_same_v<IndexT, uint16_t> || std::is_same_v<IndexT, uint32_t>)
+static ICPUPolygonGeometry::SDataView createIndexView(size_t indexCount, size_t maxIndex)
+{
+	
+	const auto bytesize = sizeof(IndexT) * indexCount;
+	auto indices = ICPUBuffer::create({bytesize,IBuffer::EUF_INDEX_BUFFER_BIT});
+
+	hlsl::shapes::AABB<4,IndexT> aabb;
+	aabb.minVx[0] = 0;
+	aabb.maxVx[0] = maxIndex;
+
+	auto retval = ICPUPolygonGeometry::SDataView{
+		.composed = {
+			.stride = sizeof(IndexT),
+		},
+		.src = {.offset = 0,.size = bytesize,.buffer = std::move(indices)},
+	};
+
+	if constexpr(std::is_same_v<IndexT, uint16_t>)
+	{
+		retval.composed.encodedDataRange.u16 = aabb;
+		retval.composed.format = EF_R16_UINT;
+		retval.composed.rangeFormat = IGeometryBase::EAABBFormat::U16;
+	}
+	else if constexpr(std::is_same_v<IndexT, uint32_t>)
+	{
+		retval.composed.encodedDataRange.u32 = aabb;
+		retval.composed.format = EF_R32_UINT;
+		retval.composed.rangeFormat = IGeometryBase::EAABBFormat::U32;
+	}
+
+	return retval;
+}
+
+template <size_t ElementCountV = 3>
+	requires(ElementCountV > 0 && ElementCountV <= 4)
+static ICPUPolygonGeometry::SDataView createPositionView(size_t positionCount, const hlsl::shapes::AABB<4, hlsl::float32_t>& aabb)
+{
+	using position_t = hlsl::vector<hlsl::float32_t, ElementCountV>;
+	constexpr auto AttrSize = sizeof(position_t);
+	auto buff = ICPUBuffer::create({AttrSize * positionCount,IBuffer::EUF_NONE});
+
+	constexpr auto format = []()
+	{
+		if constexpr (ElementCountV == 1) return EF_R32_SFLOAT;
+		if constexpr (ElementCountV == 2) return EF_R32G32_SFLOAT;
+		if constexpr (ElementCountV == 3) return EF_R32G32B32_SFLOAT;
+		if constexpr (ElementCountV == 4) return EF_R32G32B32A32_SFLOAT;
+	}();
+
+	return {
+		.composed = {
+			.encodedDataRange = {.f32 = aabb},
+			.stride = AttrSize,
+			.format = format,
+			.rangeFormat = IGeometryBase::EAABBFormat::F32
+		},
+		.src = {.offset = 0,.size = buff->getSize(),.buffer = std::move(buff)}
+	};
+}
+
+static ICPUPolygonGeometry::SDataView createSnormNormalView(size_t normalCount, const hlsl::shapes::AABB<4, int8_t>& aabb)
+{
+	constexpr auto AttrSize = sizeof(snorm_normal_t);
+	auto buff = ICPUBuffer::create({AttrSize * normalCount,IBuffer::EUF_NONE});
+	return {
+		.composed = {
+			.encodedDataRange = {.s8=aabb},
+			.stride = AttrSize,
+			.format = EF_R8G8B8A8_SNORM,
+			.rangeFormat = IGeometryBase::EAABBFormat::S8_NORM
+		},
+		.src = {.offset=0,.size=buff->getSize(),.buffer=std::move(buff)}
+	};
+}
+
+static void encodeUv(hlsl::vector<uint16_t, 2>* uvDst, hlsl::float32_t2 uvSrc)
+{
+	uint32_t u32_uv = hlsl::packUnorm2x16(uvSrc);
+	memcpy(uvDst, &u32_uv, sizeof(uint16_t) * 2);
+}
+
 core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCube(const hlsl::float32_t3 size) const
 {
 	using namespace hlsl;
@@ -22,13 +169,15 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCube(const h
 	auto retval = core::make_smart_refctd_ptr<ICPUPolygonGeometry>();
 	retval->setIndexing(IPolygonGeometryBase::TriangleList());
 
+	constexpr auto CubeUniqueVertices = 24;
+
 	// Create indices
 	using index_t = uint16_t;
 	{
-		constexpr auto IndexCount = 36u;
-		constexpr auto bytesize = sizeof(index_t) * IndexCount;
-		auto indices = ICPUBuffer::create({bytesize,IBuffer::EUF_INDEX_BUFFER_BIT});
-		auto u = reinterpret_cast<index_t*>(indices->getPointer());
+		constexpr auto IndexCount = 36;
+		constexpr auto MaxIndex = CubeUniqueVertices - 1;
+		auto indexView = createIndexView<index_t>(IndexCount, MaxIndex);
+		auto u = reinterpret_cast<index_t*>(indexView.src.buffer->getPointer());
 		for (uint32_t i=0u; i<6u; ++i)
 		{
 			u[i*6+0] = 4*i+0;
@@ -38,84 +187,42 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCube(const h
 			u[i*6+4] = 4*i+2;
 			u[i*6+5] = 4*i+3;
 		}
-		shapes::AABB<4,index_t> aabb;
-		aabb.minVx[0] = 0;
-		aabb.maxVx[0] = 23;
-		retval->setIndexView({
-			.composed = {
-				.encodedDataRange = {.u16=aabb},
-				.stride = sizeof(index_t),
-				.format = EF_R16_UINT,
-				.rangeFormat = IGeometryBase::EAABBFormat::U16
-			},
-			.src = {.offset=0,.size=bytesize,.buffer=std::move(indices)}
-		});
+		retval->setIndexView(std::move(indexView));
 	}
 
-	constexpr auto CubeUniqueVertices = 24;
 
 	// Create vertex attributes with NONE usage because we have no clue how they'll be used
 	hlsl::float32_t3* positions;
+
 	// for now because no reliable RGB10A2 encode and scant support for 24-bit UTB formats
-	hlsl::vector<int8_t,4>* normals;
-	hlsl::vector<uint8_t,2>* uvs;
+	snorm_normal_t* normals;
+
+	using uv_element_t = uint8_t;
+	constexpr auto UnityUV = std::numeric_limits<uv_element_t>::max();
+	hlsl::vector<uv_element_t,2>* uvs;
 	{
 		{
-			constexpr auto AttrSize = sizeof(decltype(*positions));
-			auto buff = ICPUBuffer::create({AttrSize*CubeUniqueVertices,IBuffer::EUF_NONE});
-			positions = reinterpret_cast<decltype(positions)>(buff->getPointer());
 			shapes::AABB<4,float32_t> aabb;
 			aabb.maxVx = float32_t4(size*0.5f,0.f);
-			aabb.minVx = -aabb.maxVx;
-			retval->visitAABB([aabb](auto& ref)->void
-				{
-					ref.minVx = hlsl::trunc(aabb.minVx);
-					ref.maxVx = hlsl::trunc(aabb.maxVx);
-				}
-			);
-			retval->setPositionView({
-				.composed = {
-					.encodedDataRange = {.f32=aabb},
-					.stride = AttrSize,
-					.format = EF_R32G32B32_SFLOAT,
-					.rangeFormat = IGeometryBase::EAABBFormat::F32
-				},
-				.src = {.offset=0,.size=buff->getSize(),.buffer = std::move(buff)}
-			});
+			aabb.minVx = - aabb.maxVx;
+
+			auto positionView = createPositionView(CubeUniqueVertices, aabb);
+			positions = reinterpret_cast<decltype(positions)>(positionView.src.buffer->getPointer());
+			retval->setPositionView(std::move(positionView));
 		}
 		{
-			constexpr auto AttrSize = sizeof(decltype(*normals));
-			auto buff = ICPUBuffer::create({AttrSize*CubeUniqueVertices,IBuffer::EUF_NONE});
-			normals = reinterpret_cast<decltype(normals)>(buff->getPointer());
 			shapes::AABB<4,int8_t> aabb;
-			aabb.maxVx = hlsl::vector<int8_t,4>(127,127,127,0);
+			aabb.maxVx = snorm_all_ones;
 			aabb.minVx = -aabb.maxVx;
-			retval->setNormalView({
-				.composed = {
-					.encodedDataRange = {.s8=aabb},
-					.stride = AttrSize,
-					.format = EF_R8G8B8A8_SNORM,
-					.rangeFormat = IGeometryBase::EAABBFormat::S8_NORM
-				},
-				.src = {.offset=0,.size=buff->getSize(),.buffer=std::move(buff)}
-			});
+			auto normalView = createSnormNormalView(CubeUniqueVertices, aabb);
+			normals = reinterpret_cast<decltype(normals)>(normalView.src.buffer->getPointer());
+			retval->setNormalView(std::move(normalView));
 		}
+
 		{
-			constexpr auto AttrSize = sizeof(decltype(*uvs));
-			auto buff = ICPUBuffer::create({AttrSize*CubeUniqueVertices,IBuffer::EUF_NONE});
-			uvs = reinterpret_cast<decltype(uvs)>(buff->getPointer());
-			shapes::AABB<4,uint8_t> aabb;
-			aabb.minVx = hlsl::vector<uint8_t,4>(0,0,0,0);
-			aabb.maxVx = hlsl::vector<uint8_t,4>(255,255,0,0);
-			retval->getAuxAttributeViews()->push_back({
-				.composed = {
-					.encodedDataRange = {.u8=aabb},
-					.stride = AttrSize,
-					.format = EF_R8G8_UNORM,
-					.rangeFormat = IGeometryBase::EAABBFormat::U8_NORM
-				},
-				.src = {.offset=0,.size=buff->getSize(),.buffer=std::move(buff)}
-			});
+			auto uvView = createUvView<uv_element_t>(CubeUniqueVertices);
+			uvs = reinterpret_cast<decltype(uvs)>(uvView.src.buffer->getPointer());
+			retval->getAuxAttributeViews()->push_back(std::move(uvView));
 		}
 	}
 
@@ -160,30 +267,31 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCube(const h
 
 	//
 	{
-		const hlsl::vector<int8_t, 3> norm[6] =
+		const snorm_normal_t norm[6] =
 		{
-			hlsl::vector<int8_t,3>(0, 0, 127),
-			hlsl::vector<int8_t,3>(127, 0, 0),
-			hlsl::vector<int8_t,3>(0, 0,-127),
-			hlsl::vector<int8_t,3>(-127, 0, 0),
-			hlsl::vector<int8_t,3>(0, 127, 0),
-			hlsl::vector<int8_t,3>(0,-127, 0)
+			snorm_positive_z,
+			snorm_positive_x,
+			snorm_negative_z,
+			snorm_negative_x,
+			snorm_positive_y,
+			snorm_negative_y
 		};
-		const hlsl::vector<uint8_t, 2> uv[4] =
+		const hlsl::vector<uv_element_t, 2> uv[4] =
 		{
-			hlsl::vector<uint8_t,2>(  0,255),
-			hlsl::vector<uint8_t,2>(255,255),
-			hlsl::vector<uint8_t,2>(255,  0),
-			hlsl::vector<uint8_t,2>(  0,  0)
+			hlsl::vector<uv_element_t,2>(  0, UnityUV),
+			hlsl::vector<uv_element_t,2>(UnityUV, UnityUV),
+			hlsl::vector<uv_element_t,2>(UnityUV,  0),
+			hlsl::vector<uv_element_t,2>(  0,  0)
 		};
-		for (size_t f=0ull; f<6ull; ++f)
+
+		for (size_t f = 0ull; f < 6ull; ++f)
 		{
-			const size_t v = f*4ull;
+			const size_t v = f * 4ull;
 
-			for (size_t i=0ull; i<4ull; ++i)
+			for (size_t i = 0ull; i < 4ull; ++i)
 			{
-				normals[v+i] = vector<int8_t,4>(norm[f],0);
-				uvs[v+i] = uv[i];
+				normals[v + i] = snorm_normal_t(norm[f]);
+				uvs[v + i] = uv[i];
 			}
 		}
 	}
@@ -192,121 +300,12 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCube(const h
 	return retval;
 }
 
-#if 0
-
-/*
-	a cylinder, a cone and a cross
-	point up on (0,1.f, 0.f )
-*/
-core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createArrow(
-	const uint32_t tesselationCylinder,
-	const uint32_t tesselationCone,
-	const float height,
-	const float cylinderHeight,
-	const float width0,
-	const float width1,
-	const video::SColor vtxColor0,
-	const video::SColor vtxColor1
-) const
+core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createSphere(float radius,
+				uint32_t polyCountX, uint32_t polyCountY, CQuantNormalCache* const quantNormalCacheOverride) const
 {
-    assert(height > cylinderHeight);
-
-    auto cylinder = createCylinderMesh(width0, cylinderHeight, tesselationCylinder, vtxColor0);
-    auto cone = createConeMesh(width1, height-cylinderHeight, tesselationCone, vtxColor1, vtxColor1);
-
-	auto cylinderVertices = reinterpret_cast<CylinderVertex*>(cylinder.bindings[0].buffer->getPointer());
-	auto coneVertices = reinterpret_cast<ConeVertex*>(cone.bindings[0].buffer->getPointer());
-
-	auto cylinderIndecies = reinterpret_cast<uint16_t*>(cylinder.indexBuffer.buffer->getPointer());
-	auto coneIndecies = reinterpret_cast<uint16_t*>(cone.indexBuffer.buffer->getPointer());
-
-	const auto cylinderVertexCount = cylinder.bindings[0].buffer->getSize() / sizeof(CylinderVertex);
-	const auto coneVertexCount = cone.bindings[0].buffer->getSize() / sizeof(ConeVertex);
-	const auto newArrowVertexCount = cylinderVertexCount + coneVertexCount;
-
-	const auto cylinderIndexCount = cylinder.indexBuffer.buffer->getSize() / sizeof(uint16_t);
-	const auto coneIndexCount = cone.indexBuffer.buffer->getSize() / sizeof(uint16_t);
-	const auto newArrowIndexCount = cylinderIndexCount + coneIndexCount;
-
-	for (auto i = 0ull; i < coneVertexCount; ++i)
-	{
-		core::vector3df_SIMD newPos = coneVertices[i].pos;
-		newPos.rotateYZByRAD(-1.5707963268);
-
-		for (auto c = 0; c < 3; ++c)
-			coneVertices[i].pos[c] = newPos[c];
-	}
-
-	auto newArrowVertexBuffer = asset::ICPUBuffer::create({ newArrowVertexCount * sizeof(ArrowVertex) });
-	newArrowVertexBuffer->setUsageFlags(newArrowVertexBuffer->getUsageFlags() | asset::IBuffer::EUF_VERTEX_BUFFER_BIT);
-	auto newArrowIndexBuffer = asset::ICPUBuffer::create({ newArrowIndexCount * sizeof(uint16_t) });
-	newArrowIndexBuffer->setUsageFlags(newArrowIndexBuffer->getUsageFlags() | asset::IBuffer::EUF_INDEX_BUFFER_BIT);
-
-	for (auto z = 0ull; z < newArrowVertexCount; ++z)
-	{
-		auto arrowVertex = reinterpret_cast<ArrowVertex*>(newArrowVertexBuffer->getPointer()) + z;
-
-		if (z < cylinderVertexCount)
-		{
-			auto cylinderVertex = (cylinderVertices + z);
-			memcpy(arrowVertex, cylinderVertex, sizeof(ArrowVertex));
-		}
-		else
-		{
-			auto coneVertex = (coneVertices + z - cylinderVertexCount);
-			memcpy(arrowVertex, coneVertex, offsetof(ConeVertex, normal)); // copy position and color
-			arrowVertex->uv[0] = 0;
-			arrowVertex->uv[1] = 0;
-			arrowVertex->normal = coneVertex->normal;
-		}
-	}
-
-	{
-		auto ArrowIndices = reinterpret_cast<uint16_t*>(newArrowIndexBuffer->getPointer());
-		auto newConeIndices = (ArrowIndices + cylinderIndexCount);
-
-		memcpy(ArrowIndices, cylinderIndecies, sizeof(uint16_t) * cylinderIndexCount);
-		memcpy(newConeIndices, coneIndecies, sizeof(uint16_t) * coneIndexCount);
-
-		for (auto i = 0ull; i < coneIndexCount; ++i)
-			*(newConeIndices + i) += cylinderVertexCount;
-	}
-
-	return_type arrow;
-
-	constexpr size_t vertexSize = sizeof(ArrowVertex);
-	arrow.inputParams = 
-	{ 0b1111u,0b1u,
-		{
-			{0u,EF_R32G32B32_SFLOAT,offsetof(ArrowVertex,pos)},
-			{0u,EF_R8G8B8A8_UNORM,offsetof(ArrowVertex,color)},
-			{0u,EF_R32G32_SFLOAT,offsetof(ArrowVertex,uv)},
-			{0u,EF_A2B10G10R10_SNORM_PACK32,offsetof(ArrowVertex,normal)}
-		},
-		{vertexSize,SVertexInputBindingParams::EVIR_PER_VERTEX} 
-	};
-
-	arrow.bindings[0] = { 0, std::move(newArrowVertexBuffer) }; 
-	arrow.indexBuffer = { 0, std::move(newArrowIndexBuffer) };
-	arrow.indexCount = newArrowIndexCount;
-	arrow.indexType = EIT_16BIT;
-
-    return arrow;
-}
+	using namespace hlsl;
 
-/* A sphere with proper normals and texture coords */
-core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createSphere(float radius, uint32_t polyCountX, uint32_t polyCountY, IMeshManipulator* const meshManipulatorOverride) const
-{
-	// we are creating the sphere mesh here.
-	return_type retval;
-	constexpr size_t vertexSize = sizeof(CGeometryCreator::SphereVertex);
-	CQuantNormalCache* const quantNormalCache = (meshManipulatorOverride == nullptr) ? defaultMeshManipulator->getQuantNormalCache() : meshManipulatorOverride->getQuantNormalCache();
-	retval.inputParams = { 0b1111u,0b1u,{
-											{0u,EF_R32G32B32_SFLOAT,offsetof(SphereVertex,pos)},
-											{0u,EF_R8G8B8A8_UNORM,offsetof(SphereVertex,color)},
-											{0u,EF_R32G32_SFLOAT,offsetof(SphereVertex,uv)},
-											{0u,EF_A2B10G10R10_SNORM_PACK32,offsetof(SphereVertex,normal)}
-										},{vertexSize,SVertexInputBindingParams::EVIR_PER_VERTEX} };
+	CQuantNormalCache* const quantNormalCache = quantNormalCacheOverride == nullptr ? m_params.normalCache.get() : quantNormalCacheOverride;
 
 	if (polyCountX < 2)
 		polyCountX = 2;
@@ -314,15 +313,21 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createSphere(float
 		polyCountY = 2;
 
 	const uint32_t polyCountXPitch = polyCountX + 1; // get to same vertex on next level
+	const size_t vertexCount = (polyCountXPitch * polyCountY) + 2;
 
-	retval.indexCount = (polyCountX * polyCountY) * 6;
-	auto indices = asset::ICPUBuffer::create({ sizeof(uint32_t) * retval.indexCount });
+	auto retval = core::make_smart_refctd_ptr<ICPUPolygonGeometry>();
+	retval->setIndexing(IPolygonGeometryBase::TriangleList());
 
 	// Create indices
 	{
+		using index_t = uint32_t;
+
+		const auto indexCount = (polyCountX * polyCountY) * 6;
+		auto indexView = createIndexView<index_t>(indexCount, vertexCount - 1);
+		auto indexPtr = reinterpret_cast<index_t*>(indexView.src.buffer->getPointer());
+
 		uint32_t level = 0;
 		size_t indexAddIx = 0;
-		uint32_t* indexPtr = (uint32_t*)indices->getPointer();
 		for (uint32_t p1 = 0; p1 < polyCountY - 1; ++p1)
 		{
 			//main quads, top to bottom
@@ -378,23 +383,48 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createSphere(float
 		indexPtr[indexAddIx++] = polyCountSqM1 + polyCountX - 1;
 		indexPtr[indexAddIx++] = polyCountSqM1;
 		indexPtr[indexAddIx++] = polyCountSq1;
+
+		retval->setIndexView(std::move(indexView));
+
 	}
-	indices->setUsageFlags(indices->getUsageFlags() | asset::IBuffer::EUF_INDEX_BUFFER_BIT);
-	retval.indexBuffer = {0ull, std::move(indices)};
 
-	// handle vertices
+	constexpr auto NormalCacheFormat = EF_R8G8B8_SNORM;
+
+	// Create vertex attributes with NONE usage because we have no clue how they'll be used
+	hlsl::float32_t3* positions;
+
+	snorm_normal_t* normals;
+
+	using uv_element_t = uint16_t;
+	constexpr auto UnityUV = std::numeric_limits<uv_element_t>::max();
+
+	hlsl::vector<uv_element_t, 2>* uvs;
 	{
-		size_t vertexSize = 3 * 4 + 4 + 2 * 4 + 4;
-		size_t vertexCount = (polyCountXPitch * polyCountY) + 2;
-		auto vtxBuf = asset::ICPUBuffer::create({ vertexCount * vertexSize });
-		auto* tmpMem = reinterpret_cast<uint8_t*>(vtxBuf->getPointer());
-		for (size_t i = 0; i < vertexCount; i++)
 		{
-			tmpMem[i * vertexSize + 3 * 4 + 0] = 255;
-			tmpMem[i * vertexSize + 3 * 4 + 1] = 255;
-			tmpMem[i * vertexSize + 3 * 4 + 2] = 255;
-			tmpMem[i * vertexSize + 3 * 4 + 3] = 255;
+			shapes::AABB<4, float32_t> aabb;
+			aabb.maxVx = float32_t4(radius, radius, radius, 0.0f);
+			aabb.minVx = float32_t4(-radius, -radius, -radius, 0.0f);
+			auto positionView = createPositionView(vertexCount, aabb);
+			positions = reinterpret_cast<decltype(positions)>(positionView.src.buffer->getPointer());
+			retval->setPositionView(std::move(positionView));
+		}
+		{
+			shapes::AABB<4, int8_t> aabb;
+			aabb.maxVx = snorm_all_ones;
+			aabb.minVx = -aabb.maxVx;
+			auto normalView = createSnormNormalView(vertexCount, aabb);
+			normals = reinterpret_cast<decltype(normals)>(normalView.src.buffer->getPointer());
+			retval->setNormalView(std::move(normalView));
 		}
+		{
+			auto uvView = createUvView<uv_element_t>(vertexCount);
+			uvs = reinterpret_cast<decltype(uvs)>(uvView.src.buffer->getPointer());
+			retval->getAuxAttributeViews()->push_back(std::move(uvView));
+		}
+	}
+
+	// fill vertices
+	{
 		// calculate the angle which separates all points in a circle
 		const float AngleX = 2 * core::PI<float>() / polyCountX;
 		const float AngleY = core::PI<float>() / polyCountY;
@@ -404,9 +434,7 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createSphere(float
 		// we don't start at 0.
 
 		double ay = 0;//AngleY / 2;
-
-		using quant_normal_t = CQuantNormalCache::value_type_t<EF_A2B10G10R10_SNORM_PACK32>;
-		uint8_t* tmpMemPtr = tmpMem;
+		auto vertex_i = 0;
 		for (uint32_t y = 0; y < polyCountY; ++y)
 		{
 			ay += AngleY;
@@ -414,7 +442,7 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createSphere(float
 			axz = 0;
 
 			// calculate the necessary vertices without the doubled one
-			uint8_t* oldTmpMemPtr = tmpMemPtr;
+			const auto old_vertex_i = vertex_i;
 			for (uint32_t xz = 0; xz < polyCountX; ++xz)
 			{
 				// calculate points position
@@ -423,9 +451,8 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createSphere(float
 					static_cast<float>(cos(ay)),
 					static_cast<float>(sin(axz) * sinay));
 				// for spheres the normal is the position
-				core::vectorSIMDf normal(&pos.X);
-				normal.makeSafe3D();
-				quant_normal_t quantizedNormal = quantNormalCache->quantize<EF_A2B10G10R10_SNORM_PACK32>(normal);
+				const auto normal = pos;
+				const auto quantizedNormal = quantNormalCache->quantize<NormalCacheFormat>(normal);
 				pos *= radius;
 
 				// calculate texture coordinates via sphere mapping
@@ -433,229 +460,235 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createSphere(float
 				float tu = 0.5f;
 				//if (y==0)
 				//{
-				if (normal.Y != -1.0f && normal.Y != 1.0f)
-					tu = static_cast<float>(acos(core::clamp(normal.X / sinay, -1.0, 1.0)) * 0.5 * core::RECIPROCAL_PI<double>());
-				if (normal.Z < 0.0f)
+				if (normal.y != -1.0f && normal.y != 1.0f)
+					tu = static_cast<float>(acos(core::clamp(normal.x / sinay, -1.0, 1.0)) * 0.5 * numbers::inv_pi<float32_t>);
+				if (normal.z < 0.0f)
 					tu = 1 - tu;
 				//}
 				//else
 					//tu = ((float*)(tmpMem+(i-polyCountXPitch)*vertexSize))[4];
 
-				((float*)tmpMemPtr)[0] = pos.X;
-				((float*)tmpMemPtr)[1] = pos.Y;
-				((float*)tmpMemPtr)[2] = pos.Z;
-				((float*)tmpMemPtr)[4] = tu;
-				((float*)tmpMemPtr)[5] = static_cast<float>(ay * core::RECIPROCAL_PI<double>());
-				((quant_normal_t*)tmpMemPtr)[6] = quantizedNormal;
-				static_assert(sizeof(quant_normal_t)==4u);
+				positions[vertex_i] = pos;
+				encodeUv(uvs + vertex_i, float32_t2(tu, static_cast<float>(ay* numbers::inv_pi<float32_t>)));
+				memcpy(normals + vertex_i, &quantizedNormal, sizeof(quantizedNormal));
 
-				tmpMemPtr += vertexSize;
+				vertex_i++;
 				axz += AngleX;
 			}
 			// This is the doubled vertex on the initial position
 
-			((float*)tmpMemPtr)[0] = ((float*)oldTmpMemPtr)[0];
-			((float*)tmpMemPtr)[1] = ((float*)oldTmpMemPtr)[1];
-			((float*)tmpMemPtr)[2] = ((float*)oldTmpMemPtr)[2];
-			((float*)tmpMemPtr)[4] = 1.f;
-			((float*)tmpMemPtr)[5] = ((float*)oldTmpMemPtr)[5];
-			((uint32_t*)tmpMemPtr)[6] = ((uint32_t*)oldTmpMemPtr)[6];
-			tmpMemPtr += vertexSize;
+			positions[vertex_i] = positions[old_vertex_i];
+			uvs[vertex_i] = { UnityUV, uvs[old_vertex_i].y };
+			normals[vertex_i] = normals[old_vertex_i];
+
+			vertex_i++;
 		}
 
 		// the vertex at the top of the sphere
-		((float*)tmpMemPtr)[0] = 0.f;
-		((float*)tmpMemPtr)[1] = radius;
-		((float*)tmpMemPtr)[2] = 0.f;
-		((float*)tmpMemPtr)[4] = 0.5f;
-		((float*)tmpMemPtr)[5] = 0.f;
-		((quant_normal_t*)tmpMemPtr)[6] = quantNormalCache->quantize<EF_A2B10G10R10_SNORM_PACK32>(core::vectorSIMDf(0.f, 1.f, 0.f));
+		positions[vertex_i] = { 0.f, radius, 0.f };
+		uvs[vertex_i] = { 0, UnityUV / 2};
+		const auto quantizedTopNormal = quantNormalCache->quantize<NormalCacheFormat>(hlsl::float32_t3(0.f, 1.f, 0.f));
+		memcpy(normals + vertex_i, &quantizedTopNormal, sizeof(quantizedTopNormal));
 
 		// the vertex at the bottom of the sphere
-		tmpMemPtr += vertexSize;
-		((float*)tmpMemPtr)[0] = 0.f;
-		((float*)tmpMemPtr)[1] = -radius;
-		((float*)tmpMemPtr)[2] = 0.f;
-		((float*)tmpMemPtr)[4] = 0.5f;
-		((float*)tmpMemPtr)[5] = 1.f;
-		((quant_normal_t*)tmpMemPtr)[6] = quantNormalCache->quantize<EF_A2B10G10R10_SNORM_PACK32>(core::vectorSIMDf(0.f, -1.f, 0.f));
-
-		// recalculate bounding box
-		core::aabbox3df BoundingBox;
-		BoundingBox.reset(float32_t3(radius));
-		BoundingBox.addInternalPoint(-radius, -radius, -radius);
-
-		// set vertex buffer
-		vtxBuf->setUsageFlags(vtxBuf->getUsageFlags() | asset::IBuffer::EUF_VERTEX_BUFFER_BIT);
-		retval.bindings[0] = { 0ull,std::move(vtxBuf) };
-		retval.indexType = asset::EIT_32BIT;
-		retval.bbox = BoundingBox;
+		vertex_i++;
+		positions[vertex_i] = { 0.f, -radius, 0.f };
+		uvs[vertex_i] = { UnityUV / 2, UnityUV};
+		const auto quantizedBottomNormal = quantNormalCache->quantize<NormalCacheFormat>(hlsl::float32_t3(0.f, -1.f, 0.f));
+		memcpy(normals + vertex_i, &quantizedBottomNormal, sizeof(quantizedBottomNormal));
 	}
 
+	CPolygonGeometryManipulator::recomputeContentHashes(retval.get());
 	return retval;
 }
 
-/* A cylinder with proper normals and texture coords */
 core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCylinder(
 	float radius, float length,
-	uint32_t tesselation, const video::SColor& color, IMeshManipulator* const meshManipulatorOverride
-) const
+	uint16_t tesselation, CQuantNormalCache* const quantNormalCacheOverride) const
 {
-	return_type retval;
-	constexpr size_t vertexSize = sizeof(CGeometryCreator::CylinderVertex);
-	CQuantNormalCache* const quantNormalCache = (meshManipulatorOverride == nullptr) ? defaultMeshManipulator->getQuantNormalCache() : meshManipulatorOverride->getQuantNormalCache();
-	retval.inputParams = { 0b1111u,0b1u,{
-											{0u,EF_R32G32B32_SFLOAT,offsetof(CylinderVertex,pos)},
-											{0u,EF_R8G8B8A8_UNORM,offsetof(CylinderVertex,color)},
-											{0u,EF_R32G32_SFLOAT,offsetof(CylinderVertex,uv)},
-											{0u,EF_A2B10G10R10_SNORM_PACK32,offsetof(CylinderVertex,normal)}
-										},{vertexSize,SVertexInputBindingParams::EVIR_PER_VERTEX} };
-
-    const size_t vtxCnt = 2u*tesselation;
-    auto vtxBuf = asset::ICPUBuffer::create({ vtxCnt*sizeof(CylinderVertex) });
-
-    CylinderVertex* vertices = reinterpret_cast<CylinderVertex*>(vtxBuf->getPointer());
-	for (auto i=0ull; i<vtxCnt; i++)
-		vertices[i] = CylinderVertex();
-
-    const uint32_t halfIx = tesselation;
-
-    uint8_t glcolor[4];
-    color.toOpenGLColor(glcolor);
-
-    const float tesselationRec = core::reciprocal_approxim<float>(tesselation);
-    const float step = 2.f*core::PI<float>()*tesselationRec;
-    for (uint32_t i = 0u; i<tesselation; ++i)
-    {
-        core::vectorSIMDf p(std::cos(i*step), std::sin(i*step), 0.f);
-        p *= radius;
-        const auto n = quantNormalCache->quantize<EF_A2B10G10R10_SNORM_PACK32>(core::normalize(p));
-
-        memcpy(vertices[i].pos, p.pointer, 12u);
-        vertices[i].normal = n;
-        memcpy(vertices[i].color, glcolor, 4u);
-        vertices[i].uv[0] = float(i) * tesselationRec;
-
-        vertices[i+halfIx] = vertices[i];
-        vertices[i+halfIx].pos[2] = length;
-        vertices[i+halfIx].uv[1] = 1.f;
-    }
-
-    constexpr uint32_t rows = 2u;
-	retval.indexCount = rows * 3u * tesselation;
-    auto idxBuf = asset::ICPUBuffer::create({ retval.indexCount *sizeof(uint16_t) });
-    uint16_t* indices = (uint16_t*)idxBuf->getPointer();
-
-    for (uint32_t i = 0u, j = 0u; i < halfIx; ++i)
-    {
-        indices[j++] = i;
-        indices[j++] = (i+1u)!=halfIx ? (i+1u):0u;
-        indices[j++] = i+halfIx;
-        indices[j++] = i+halfIx;
-        indices[j++] = (i+1u)!=halfIx ? (i+1u):0u;
-        indices[j++] = (i+1u)!=halfIx ? (i+1u+halfIx):halfIx;
-    }
-
-	// set vertex buffer
-	idxBuf->setUsageFlags(idxBuf->getUsageFlags() | asset::IBuffer::EUF_INDEX_BUFFER_BIT);
-	retval.indexBuffer = { 0ull, std::move(idxBuf) };
-	vtxBuf->setUsageFlags(vtxBuf->getUsageFlags() | asset::IBuffer::EUF_VERTEX_BUFFER_BIT);
-	retval.bindings[0] = { 0ull, std::move(vtxBuf) };
-	retval.indexType = asset::EIT_16BIT;
-	//retval.bbox = ?;
+	using namespace hlsl;
+
+	CQuantNormalCache* const quantNormalCache = quantNormalCacheOverride == nullptr ? m_params.normalCache.get() : quantNormalCacheOverride;
+
+	const auto halfIx = tesselation;
+	const uint32_t u32_vertexCount = 2 * tesselation;
+	if (u32_vertexCount > std::numeric_limits<uint16_t>::max())
+		return nullptr;
+	const auto vertexCount = static_cast<uint16_t>(u32_vertexCount);
 
+	auto retval = core::make_smart_refctd_ptr<ICPUPolygonGeometry>();
+	retval->setIndexing(IPolygonGeometryBase::TriangleList());
+
+	// Create indices
+	using index_t = uint16_t;
+	{
+		constexpr uint32_t RowCount = 2u;
+		const auto IndexCount = RowCount * 3 * tesselation;
+		auto indexView = createIndexView<index_t>(IndexCount, vertexCount - 1);
+		auto u = reinterpret_cast<index_t*>(indexView.src.buffer->getPointer());
+
+		for (uint16_t i = 0u, j = 0u; i < halfIx; ++i)
+		{
+			u[j++] = i;
+			u[j++] = (i + 1u) != halfIx ? (i + 1u):0u;
+			u[j++] = i + halfIx;
+			u[j++] = i + halfIx;
+			u[j++] = (i + 1u)!= halfIx ? (i + 1u):0u;
+			u[j++] = (i + 1u)!= halfIx ? (i + 1u + halfIx) : halfIx;
+		}
+
+		retval->setIndexView(std::move(indexView));
+	}
+
+	constexpr auto NormalCacheFormat = EF_R8G8B8_SNORM;
+
+	// Create vertex attributes with NONE usage because we have no clue how they'll be used
+	hlsl::float32_t3* positions;
+
+	snorm_normal_t* normals;
+
+	using uv_element_t = uint16_t;
+	constexpr auto UnityUV = std::numeric_limits<uv_element_t>::max();
+	hlsl::vector<uv_element_t, 2>* uvs;
+	{
+		{
+			shapes::AABB<4, float32_t> aabb;
+			aabb.maxVx = float32_t4(radius, radius, length, 0.0f);
+			aabb.minVx = float32_t4(-radius, -radius, 0.0f, 0.0f);
+			auto positionView = createPositionView(vertexCount, aabb);
+			positions = reinterpret_cast<decltype(positions)>(positionView.src.buffer->getPointer());
+			retval->setPositionView(std::move(positionView));
+		}
+		{
+			shapes::AABB<4, int8_t> aabb;
+			aabb.maxVx = hlsl::vector<int8_t,4>(127,127,127,0);
+			aabb.minVx = -aabb.maxVx;
+			auto normalView = createSnormNormalView(vertexCount, aabb);
+			normals = reinterpret_cast<decltype(normals)>(normalView.src.buffer->getPointer());
+			retval->setNormalView(std::move(normalView));
+		}
+		{
+			auto uvView = createUvView<uv_element_t>(vertexCount);
+			uvs = reinterpret_cast<decltype(uvs)>(uvView.src.buffer->getPointer());
+			retval->getAuxAttributeViews()->push_back(std::move(uvView));
+		}
+	}
+
+	const float tesselationRec = 1.f / static_cast<float>(tesselation);
+	const float step = 2.f * numbers::pi<float32_t> * tesselationRec;
+	for (uint32_t i = 0u; i < tesselation; ++i)
+	{
+		const auto f_i = static_cast<float>(i);
+		hlsl::float32_t3 p(std::cos(f_i * step), std::sin(f_i * step), 0.f);
+		const auto n = quantNormalCache->quantize<NormalCacheFormat>(p);
+		p *= radius;
+
+		positions[i] = { p.x, p.y, p.z };
+		memcpy(normals + i, &n, sizeof(n));
+		encodeUv(uvs + i, float32_t2(f_i * tesselationRec, 0.f));
+
+		positions[i + halfIx] = { p.x, p.y, length };
+		normals[i + halfIx] = normals[i];
+		uvs[i + halfIx] = { 1.f * tesselationRec, UnityUV };
+	}
+
+	CPolygonGeometryManipulator::recomputeContentHashes(retval.get());
 	return retval;
 }
 
-/* A cone with proper normals and texture coords */
 core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createCone(
-	float radius, float length, uint32_t tesselation,
-	const video::SColor& colorTop,
-	const video::SColor& colorBottom,
-	float oblique,
-	IMeshManipulator* const meshManipulatorOverride
-) const
+	float radius, float length, uint16_t tesselation,
+	float oblique, CQuantNormalCache* const quantNormalCacheOverride) const
 {
-    const size_t vtxCnt = tesselation * 2;
-    auto vtxBuf = asset::ICPUBuffer::create({ vtxCnt * sizeof(ConeVertex) });
-    ConeVertex* vertices = reinterpret_cast<ConeVertex*>(vtxBuf->getPointer());
-
-	ConeVertex* baseVertices = vertices;
-	ConeVertex* apexVertices = vertices + tesselation;
 
-    std::fill(vertices,vertices+vtxCnt, ConeVertex(core::vectorSIMDf(0.f),{},colorBottom));
-	CQuantNormalCache* const quantNormalCache = (meshManipulatorOverride == nullptr) ? defaultMeshManipulator->getQuantNormalCache() : meshManipulatorOverride->getQuantNormalCache();
+	using namespace hlsl;
 
-    const float step = (2.f*core::PI<float>()) / tesselation;
+	const uint32_t u32_vertexCount = tesselation + 1;
+	if (u32_vertexCount > std::numeric_limits<uint16_t>::max())
+		return nullptr;
+	const auto vertexCount = static_cast<uint16_t>(u32_vertexCount);
 
-	const core::vectorSIMDf apexVertexCoords(oblique, length, 0.0f);
+	auto retval = core::make_smart_refctd_ptr<ICPUPolygonGeometry>();
+	retval->setIndexing(IPolygonGeometryBase::TriangleList());
 
-	//vertex positions
-	for (uint32_t i = 0u; i < tesselation; i++)
+	// Create indices
+	using index_t = uint16_t;
 	{
-		core::vectorSIMDf v(std::cos(i * step), 0.0f, std::sin(i * step), 0.0f);
-		v *= radius;
+		const auto IndexCount = 3 * tesselation;
+
+		auto indexView = createIndexView<index_t>(IndexCount, vertexCount - 1);
+		auto u = reinterpret_cast<index_t*>(indexView.src.buffer->getPointer());
+
+		const uint32_t apexVertexIndex = tesselation;
+
+		for (uint32_t i = 0; i < tesselation; i++)
+		{
+			u[i * 3] = apexVertexIndex;
+			u[(i * 3) + 1] = i;
+			u[(i * 3) + 2] = i == (tesselation - 1) ? 0 : i + 1;
+		}
 
-		memcpy(baseVertices[i].pos, v.pointer, sizeof(float) * 3);
-		memcpy(apexVertices[i].pos, apexVertexCoords.pointer, sizeof(float) * 3);
+		retval->setIndexView(std::move(indexView));
 	}
 
-	//vertex normals
-	for (uint32_t i = 0; i < tesselation; i++)
+	// Create vertex attributes with NONE usage because we have no clue how they'll be used
+	hlsl::float32_t3* positions;
 	{
-		const core::vectorSIMDf v0ToApex = apexVertexCoords - core::vectorSIMDf(vertices[i].pos[0], vertices[i].pos[1], vertices[i].pos[2]);
-
-		uint32_t nextVertexIndex = i == (tesselation - 1) ? 0 : i + 1;
-		core::vectorSIMDf u1 = core::vectorSIMDf(baseVertices[nextVertexIndex].pos[0], baseVertices[nextVertexIndex].pos[1], baseVertices[nextVertexIndex].pos[2]);
-		u1 -= core::vectorSIMDf(baseVertices[i].pos[0], baseVertices[i].pos[1], baseVertices[i].pos[2]);
-		float angleWeight = std::acos(core::dot(core::normalize(apexVertexCoords), core::normalize(u1)).x);
-		u1 = core::normalize(core::cross(v0ToApex, u1)) * angleWeight;
-
-		uint32_t prevVertexIndex = i == 0 ? (tesselation - 1) : i - 1;
-		core::vectorSIMDf u2 = core::vectorSIMDf(baseVertices[prevVertexIndex].pos[0], baseVertices[prevVertexIndex].pos[1], baseVertices[prevVertexIndex].pos[2]);
-		u2 -= core::vectorSIMDf(baseVertices[i].pos[0], baseVertices[i].pos[1], baseVertices[i].pos[2]);
-		angleWeight = std::acos(core::dot(core::normalize(apexVertexCoords), core::normalize(u2)).x);
-		u2 = core::normalize(core::cross(u2, v0ToApex)) * angleWeight;
-
-		baseVertices[i].normal = quantNormalCache->quantize<EF_A2B10G10R10_SNORM_PACK32>(core::normalize(u1 + u2));
-		apexVertices[i].normal = quantNormalCache->quantize<EF_A2B10G10R10_SNORM_PACK32>(core::normalize(u1));
+		{
+			shapes::AABB<4, float32_t> aabb;
+			aabb.maxVx = float32_t4(radius, radius, length, 0.0f);
+			aabb.minVx = float32_t4(-radius, -radius, 0.0f, 0.0f);
+			auto positionView = createPositionView(vertexCount, aabb);
+			positions = reinterpret_cast<decltype(positions)>(positionView.src.buffer->getPointer());
+			retval->setPositionView(std::move(positionView));
+		}
 	}
 
-	auto idxBuf = asset::ICPUBuffer::create({ 3u * tesselation * sizeof(uint16_t) });
-	uint16_t* indices = (uint16_t*)idxBuf->getPointer();
+	const float step = (2.f*core::PI<float>()) / tesselation;
+
+	const hlsl::float32_t3 apexVertexCoords(oblique, length, 0.0f);
 
-	const uint32_t firstIndexOfBaseVertices = 0;
-	const uint32_t firstIndexOfApexVertices = tesselation;
-	for (uint32_t i = 0; i < tesselation; i++)
+	const auto apexVertexBase_i = tesselation;
+
+	for (uint32_t i = 0u; i < tesselation; i++)
 	{
-		indices[i * 3] = firstIndexOfApexVertices + i;
-		indices[(i * 3) + 1] = firstIndexOfBaseVertices + i;
-		indices[(i * 3) + 2] = i == (tesselation - 1) ? firstIndexOfBaseVertices : firstIndexOfBaseVertices + i + 1;
+		hlsl::float32_t3 v(std::cos(i * step), 0.0f, std::sin(i * step));
+		v *= radius;
+		positions[i] = v;
 	}
+	positions[apexVertexBase_i] = apexVertexCoords;
 
-	return_type cone;
-
-	constexpr size_t vertexSize = sizeof(ConeVertex);
-	cone.inputParams =
-	{ 0b111u,0b1u,
-		{
-			{0u,EF_R32G32B32_SFLOAT,offsetof(ConeVertex,pos)},
-			{0u,EF_R8G8B8A8_UNORM,offsetof(ConeVertex,color)},
-			{0u,EF_A2B10G10R10_SNORM_PACK32,offsetof(ConeVertex,normal)}
-		},
-		{vertexSize,SVertexInputBindingParams::EVIR_PER_VERTEX}
-	};
+	CPolygonGeometryManipulator::recomputeContentHashes(retval.get());
+	return retval;
+}
 
-	vtxBuf->addUsageFlags(asset::IBuffer::EUF_VERTEX_BUFFER_BIT);
-	cone.bindings[0] = { 0, std::move(vtxBuf) };
-	idxBuf->addUsageFlags(asset::IBuffer::EUF_INDEX_BUFFER_BIT);
-	cone.indexBuffer = { 0, std::move(idxBuf) };
-	cone.indexCount = cone.indexBuffer.buffer->getSize() / sizeof(uint16_t);
-	cone.indexType = EIT_16BIT;
+core::smart_refctd_ptr<ICPUGeometryCollection> CGeometryCreator::createArrow(
+	const uint16_t tesselationCylinder,
+	const uint16_t tesselationCone,
+	const float height,
+	const float cylinderHeight,
+	const float width0,
+	const float width1
+) const
+{
+	assert(height > cylinderHeight);
+
+	auto cylinder = createCylinder(width0, cylinderHeight, tesselationCylinder);
+	auto cone = createCone(width1, height-cylinderHeight, tesselationCone);
+
+	auto collection = core::make_smart_refctd_ptr<ICPUGeometryCollection>();
+	auto* geometries = collection->getGeometries();
+	geometries->push_back({
+		.geometry = cylinder
+	});
+	const auto coneTransform = hlsl::math::linalg::rotation_mat(hlsl::numbers::pi<hlsl::float32_t> * -0.5f, hlsl::float32_t3(1.f, 0.f, 0.f));
+	geometries->push_back({
+		.transform = hlsl::float32_t3x4(coneTransform),
+		.geometry = cone
+	});
+	return collection;
 
-    return cone;
 }
-#endif
 
 core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createRectangle(const hlsl::float32_t2 size) const
 {
@@ -673,94 +706,56 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createRectangle(co
 		3---2
 		*/
 		const index_t indices[] = {0,3,1,1,3,2};
-		auto buffer = ICPUBuffer::create({
-			{sizeof(indices),IBuffer::EUF_INDEX_BUFFER_BIT},
-			const_cast<void*>((const void*)indices) // TODO: temporary till two different creation params (adopting needs non const void, copying needs const void only
-		});
-		shapes::AABB<4,index_t> aabb;
-		aabb.minVx[0] = 0;
-		aabb.maxVx[0] = 3;
-		retval->setIndexView({
-			.composed = {
-				.encodedDataRange = {.u16=aabb},
-				.stride = sizeof(index_t),
-				.format = EF_R16_UINT,
-				.rangeFormat = IGeometryBase::EAABBFormat::U16
-			},
-			.src = {.offset=0,.size=buffer->getSize(),.buffer=std::move(buffer)}
-		});
+		auto indexView = createIndexView<index_t>(std::size(indices), 3);
+		memcpy(indexView.src.buffer->getPointer(), indices, sizeof(indices));
+		retval->setIndexView(std::move(indexView));
 	}
 
+	constexpr auto VertexCount = 4;
 	// Create vertices
 	{
 		{
-			const hlsl::float32_t2 positions[] = {
+			const hlsl::float32_t2 positions[VertexCount] = {
 				hlsl::float32_t2(-size.x, size.y),
 				hlsl::float32_t2( size.x, size.y),
 				hlsl::float32_t2( size.x,-size.y),
 				hlsl::float32_t2(-size.x,-size.y)
 			};
-			auto buff = ICPUBuffer::create({{sizeof(positions),IBuffer::EUF_NONE},(void*)positions});
 			shapes::AABB<4,float32_t> aabb;
 			aabb.minVx = float32_t4(-size,0.f,0.f);
 			aabb.maxVx = float32_t4( size,0.f,0.f);
-			retval->visitAABB([aabb](auto& ref)->void
-				{
-					ref.minVx = hlsl::trunc(aabb.minVx);
-					ref.maxVx = hlsl::trunc(aabb.maxVx);
-				}
-			);
-			retval->setPositionView({
-				.composed = {
-					.encodedDataRange = {.f32=aabb},
-					.stride = sizeof(positions[0]),
-					.format = EF_R32G32_SFLOAT,
-					.rangeFormat = IGeometryBase::EAABBFormat::F32
-				},
-				.src = {.offset=0,.size=buff->getSize(),.buffer = std::move(buff)}
-			});
+			auto positionView = createPositionView<2>(VertexCount, aabb);
+			memcpy(positionView.src.buffer->getPointer(), positions, sizeof(positions));
+			retval->setPositionView(std::move(positionView));
 		}
 		{
-			const hlsl::vector<int8_t,4> normals[] = {
-				hlsl::vector<int8_t,4>(0,0,127,0),
-				hlsl::vector<int8_t,4>(0,0,127,0),
-				hlsl::vector<int8_t,4>(0,0,127,0),
-				hlsl::vector<int8_t,4>(0,0,127,0)
+			const hlsl::vector<int8_t,4> normals[VertexCount] = {
+				snorm_positive_z,
+				snorm_positive_z,
+				snorm_positive_z,
+				snorm_positive_z,
 			};
-			auto buff = ICPUBuffer::create({{sizeof(normals),IBuffer::EUF_NONE},(void*)normals});
 			shapes::AABB<4,int8_t> aabb;
-			aabb.maxVx = hlsl::vector<int8_t,4>(0,0,127,0);
-			aabb.minVx = -aabb.maxVx;
-			retval->setNormalView({
-				.composed = {
-					.encodedDataRange = {.s8=aabb},
-					.stride = sizeof(normals[0]),
-					.format = EF_R8G8B8A8_SNORM,
-					.rangeFormat = IGeometryBase::EAABBFormat::S8_NORM
-				},
-				.src = {.offset=0,.size=buff->getSize(),.buffer=std::move(buff)}
-			});
+			aabb.maxVx = snorm_positive_z;
+			aabb.minVx = snorm_normal_t(0, 0, 0, 0);
+			auto normalView = createSnormNormalView(VertexCount, aabb);
+			memcpy(normalView.src.buffer->getPointer(), normals, sizeof(normals));
+			retval->setNormalView(std::move(normalView));
 		}
 		{
-			const hlsl::vector<uint8_t,2> uvs[] = {
-				hlsl::vector<uint8_t,2>(  0,255),
-				hlsl::vector<uint8_t,2>(255,255),
-				hlsl::vector<uint8_t,2>(255,  0),
-				hlsl::vector<uint8_t,2>(  0,  0)
+			using uv_element_t = uint8_t;
+			constexpr auto MaxUvVal = std::numeric_limits<uv_element_t>::max();
+			const hlsl::vector<uv_element_t, 2> uvsData[VertexCount] = {
+				hlsl::vector<uv_element_t,2>(  0, MaxUvVal),
+				hlsl::vector<uv_element_t,2>(MaxUvVal, MaxUvVal),
+				hlsl::vector<uv_element_t,2>(MaxUvVal,  0),
+				hlsl::vector<uv_element_t,2>(  0,  0)
 			};
-			auto buff = ICPUBuffer::create({{sizeof(uvs),IBuffer::EUF_NONE},(void*)uvs});
-			shapes::AABB<4,uint8_t> aabb;
-			aabb.minVx = hlsl::vector<uint8_t,4>(0,0,0,0);
-			aabb.maxVx = hlsl::vector<uint8_t,4>(255,255,0,0);
-			retval->getAuxAttributeViews()->push_back({
-				.composed = {
-					.encodedDataRange = {.u8=aabb},
-					.stride = sizeof(uvs[0]),
-					.format = EF_R8G8_UNORM,
-					.rangeFormat = IGeometryBase::EAABBFormat::U8_NORM
-				},
-				.src = {.offset=0,.size=buff->getSize(),.buffer=std::move(buff)}
-			});
+			hlsl::vector<uv_element_t, 2>* uvs;
+			auto uvView = createUvView<uv_element_t>(VertexCount);
+			uvs = reinterpret_cast<decltype(uvs)>(uvView.src.buffer->getPointer());
+			memcpy(uvs, uvsData, sizeof(uvsData));
+			retval->getAuxAttributeViews()->push_back(std::move(uvView));
 		}
 	}
 
@@ -783,68 +778,36 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createDisk(const f
 	const size_t vertexCount = 2u + tesselation;
 
 	float32_t2* positions;
+
 	// for now because no reliable RGB10A2 encode and scant support for 24-bit UTB formats
-	hlsl::vector<int8_t,4>* normals;
+	snorm_normal_t* normals;
 	//
-	constexpr uint16_t UnityUV = 0xffffu;
-	uint16_t2* uvs;
+	using uv_element_t = uint16_t;
+	constexpr uint16_t UnityUV = std::numeric_limits<uv_element_t>::max();
+	hlsl::vector<uv_element_t, 2>* uvs;
 	{
 		{
-			constexpr auto AttrSize = sizeof(decltype(*positions));
-			auto buff = ICPUBuffer::create({AttrSize*vertexCount,IBuffer::EUF_NONE});
-			positions = reinterpret_cast<decltype(positions)>(buff->getPointer());
 			shapes::AABB<4,float32_t> aabb;
-			aabb.maxVx = float32_t4(radius,radius,0.f,0.f);
+			aabb.maxVx = float32_t4(radius,radius, 0.f, 0.f);
 			aabb.minVx = -aabb.maxVx;
-			retval->visitAABB([aabb](auto& ref)->void
-				{
-					ref.minVx = hlsl::trunc(aabb.minVx);
-					ref.maxVx = hlsl::trunc(aabb.maxVx);
-				}
-			);
-			retval->setPositionView({
-				.composed = {
-					.encodedDataRange = {.f32=aabb},
-					.stride = AttrSize,
-					.format = EF_R32G32_SFLOAT,
-					.rangeFormat = IGeometryBase::EAABBFormat::F32
-				},
-				.src = {.offset=0,.size=buff->getSize(),.buffer = std::move(buff)}
-			});
+			auto positionView = createPositionView<2>(vertexCount, aabb);
+			positions = reinterpret_cast<decltype(positions)>(positionView.src.buffer->getPointer());
+			retval->setPositionView(std::move(positionView));
 		}
 		{
 			constexpr auto AttrSize = sizeof(decltype(*normals));
 			auto buff = ICPUBuffer::create({AttrSize*vertexCount,IBuffer::EUF_NONE});
-			normals = reinterpret_cast<decltype(normals)>(buff->getPointer());
 			shapes::AABB<4,int8_t> aabb;
-			aabb.maxVx = hlsl::vector<int8_t,4>(0,0,127,0);
+			aabb.maxVx = snorm_positive_z;
 			aabb.minVx = -aabb.maxVx;
-			retval->setNormalView({
-				.composed = {
-					.encodedDataRange = {.s8=aabb},
-					.stride = AttrSize,
-					.format = EF_R8G8B8A8_SNORM,
-					.rangeFormat = IGeometryBase::EAABBFormat::S8_NORM
-				},
-				.src = {.offset=0,.size=buff->getSize(),.buffer=std::move(buff)}
-			});
+			auto normalView = createSnormNormalView(vertexCount, aabb);
+			normals = reinterpret_cast<decltype(normals)>(normalView.src.buffer->getPointer());
+			retval->setNormalView(std::move(normalView));
 		}
 		{
-			constexpr auto AttrSize = sizeof(decltype(*uvs));
-			auto buff = ICPUBuffer::create({AttrSize*vertexCount,IBuffer::EUF_NONE});
-			uvs = reinterpret_cast<decltype(uvs)>(buff->getPointer());
-			shapes::AABB<4,uint16_t> aabb;
-			aabb.minVx = uint16_t4(0,0,0,0);
-			aabb.maxVx = uint16_t4(UnityUV,UnityUV,0,0);
-			retval->getAuxAttributeViews()->push_back({
-				.composed = {
-					.encodedDataRange = {.u16=aabb},
-					.stride = AttrSize,
-					.format = EF_R16G16_UNORM,
-					.rangeFormat = IGeometryBase::EAABBFormat::U16_NORM
-				},
-				.src = {.offset=0,.size=buff->getSize(),.buffer=std::move(buff)}
-			});
+			auto uvView = createUvView<uv_element_t>(vertexCount);
+			uvs = reinterpret_cast<decltype(uvs)>(uvView.src.buffer->getPointer());
+			retval->getAuxAttributeViews()->push_back(std::move(uvView));
 		}
 	}
 
@@ -865,26 +828,27 @@ core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createDisk(const f
 			*(uvs++) = uint16_t2(t*UnityUV+0.5f,0);
 		}
 	}
-	std::fill_n(normals,vertexCount,hlsl::vector<int8_t,4>(0,0,127,0));
+	std::fill_n(normals,vertexCount, snorm_positive_z);
 
 	CPolygonGeometryManipulator::recomputeContentHashes(retval.get());
 	return retval;
 }
 
-#if 0
 /*
 	Helpful Icosphere class implementation used to compute
 	and create icopshere's vertices and indecies.
 
 	Polyhedron subdividing icosahedron (20 tris) by N-times iteration
-    The icosphere with N=1 (default) has 80 triangles by subdividing a triangle
-    of icosahedron into 4 triangles. If N=0, it is identical to icosahedron.
+		The icosphere with N=1 (default) has 80 triangles by subdividing a triangle
+		of icosahedron into 4 triangles. If N=0, it is identical to icosahedron.
 */
 
 class Icosphere
 {
 public:
-	Icosphere(float radius = 1.0f, int subdivision = 1, bool smooth = false) : radius(radius), subdivision(subdivision), smooth(smooth), interleavedStride(32)
+	using index_t = uint32_t;
+
+	Icosphere(float radius = 1.0f, int subdivision = 1, bool smooth = false) : radius(radius), subdivision(subdivision), smooth(smooth)
 	{
 		if (smooth)
 			buildVerticesSmooth();
@@ -895,30 +859,22 @@ class Icosphere
 	~Icosphere() {}
 
 	unsigned int getVertexCount() const { return (unsigned int)vertices.size() / 3; }
-	unsigned int getNormalCount() const { return (unsigned int)normals.size() / 3; }
-	unsigned int getTexCoordCount() const { return (unsigned int)texCoords.size() / 2; }
 	unsigned int getIndexCount() const { return (unsigned int)indices.size(); }
 	unsigned int getLineIndexCount() const { return (unsigned int)lineIndices.size(); }
 	unsigned int getTriangleCount() const { return getIndexCount() / 3; }
 
-	unsigned int getVertexSize() const { return (unsigned int)vertices.size() * sizeof(float); }   // # of bytes
+	unsigned int getPositionSize() const { return (unsigned int)vertices.size() * sizeof(float); }   // # of bytes
 	unsigned int getNormalSize() const { return (unsigned int)normals.size() * sizeof(float); }
 	unsigned int getTexCoordSize() const { return (unsigned int)texCoords.size() * sizeof(float); }
-	unsigned int getIndexSize() const { return (unsigned int)indices.size() * sizeof(unsigned int); }
+	unsigned int getIndexSize() const { return (unsigned int)indices.size() * sizeof(index_t); }
 	unsigned int getLineIndexSize() const { return (unsigned int)lineIndices.size() * sizeof(unsigned int); }
 
-	const float* getVertices() const { return vertices.data(); }
+	const float* getPositions() const { return vertices.data(); }
 	const float* getNormals() const { return normals.data(); }
 	const float* getTexCoords() const { return texCoords.data(); }
 	const unsigned int* getIndices() const { return indices.data(); }
 	const unsigned int* getLineIndices() const { return lineIndices.data(); }
 
-	// for interleaved vertices: V/N/T
-	unsigned int getInterleavedVertexCount() const { return getVertexCount(); }    // # of vertices
-	unsigned int getInterleavedVertexSize() const { return (unsigned int)interleavedVertices.size() * sizeof(float); }    // # of bytes
-	int getInterleavedStride() const { return interleavedStride; }   // should be 32 bytes
-	const float* getInterleavedVertices() const { return interleavedVertices.data(); }
-
 protected:
 
 private:
@@ -1015,14 +971,14 @@ class Icosphere
 		texture coordinate is shared or no. If it is on the line segments, it is also
 		non-shared point
 
-		   00  01  02  03  04         
-		   /\  /\  /\  /\  /\         
-		  /  \/  \/  \/  \/  \        
+			 00  01  02  03  04         
+			 /\  /\  /\  /\  /\         
+			/  \/  \/  \/  \/  \        
 		 05  06  07  08  09   \       
-		   \   10  11  12  13  14     
+			 \   10  11  12  13  14     
 			\  /\  /\  /\  /\  /      
 			 \/  \/  \/  \/  \/       
-			  15  16  17  18  19      
+				15  16  17  18  19      
 	*/
 
 	static inline bool isSharedTexCoord(const float t[2])
@@ -1096,11 +1052,6 @@ class Icosphere
 			vertices[i] *= scale;
 			vertices[i + 1] *= scale;
 			vertices[i + 2] *= scale;
-
-			// for interleaved array
-			interleavedVertices[j] *= scale;
-			interleavedVertices[j + 1] *= scale;
-			interleavedVertices[j + 2] *= scale;
 		}
 	}
 
@@ -1264,9 +1215,6 @@ class Icosphere
 
 		// subdivide icosahedron
 		subdivideVerticesFlat();
-
-		// generate interleaved vertex array as well
-		buildInterleavedVertices();
 	}
 
 	/*
@@ -1489,8 +1437,6 @@ class Icosphere
 		// subdivide icosahedron
 		subdivideVerticesSmooth();
 
-		// generate interleaved vertex array as well
-		buildInterleavedVertices();
 	}
 	/*
 		divide a trinage into 4 sub triangles and repeat N times
@@ -1588,7 +1534,7 @@ class Icosphere
 				 v1           
 				/ \           
 		 newV1 *---* newV3    
-			  / \ / \         
+				/ \ / \         
 			v2---*---v3       
 				newV2         
 	*/
@@ -1666,27 +1612,6 @@ class Icosphere
 		stride must be 32 bytes
 	*/
 
-	void buildInterleavedVertices()
-	{
-		core::vector<float>().swap(interleavedVertices);
-
-		std::size_t i, j;
-		std::size_t count = vertices.size();
-		for (i = 0, j = 0; i < count; i += 3, j += 2)
-		{
-			interleavedVertices.push_back(vertices[i]);
-			interleavedVertices.push_back(vertices[i + 1]);
-			interleavedVertices.push_back(vertices[i + 2]);
-
-			interleavedVertices.push_back(normals[i]);
-			interleavedVertices.push_back(normals[i + 1]);
-			interleavedVertices.push_back(normals[i + 2]);
-
-			interleavedVertices.push_back(texCoords[j]);
-			interleavedVertices.push_back(texCoords[j + 1]);
-		}
-	}
-
 	void addVertex(float x, float y, float z)
 	{
 		vertices.push_back(x);
@@ -1754,8 +1679,8 @@ class Icosphere
 		add 7 sub edge lines per triangle to array using 6 indices (CCW)           
 			 i1                                                                     
 			 /            : (i1, i2)                                                
-		   i2---i6        : (i2, i6)												  
-		   / \  /         : (i2, i3), (i2, i4), (i6, i4)							  
+			 i2---i6        : (i2, i6)												  
+			 / \  /         : (i2, i3), (i2, i4), (i6, i4)							  
 		 i3---i4---i5     : (i3, i4), (i4, i5)									  
 	*/
 
@@ -1830,46 +1755,73 @@ class Icosphere
 	core::vector<uint32_t> lineIndices;
 	std::map<std::pair<float, float>, uint32_t> sharedIndices;   // indices of shared vertices, key is tex coord (s,t)
 
-	// interleaved
-	core::vector<float> interleavedVertices;
-	uint32_t interleavedStride;											// # of bytes to hop to the next vertex (should be 32 bytes)
 
 };
 
 core::smart_refctd_ptr<ICPUPolygonGeometry> CGeometryCreator::createIcoSphere(float radius, uint32_t subdivision, bool smooth) const
 {
-	Icosphere IcosphereData(radius, subdivision, smooth);
-	
-	return_type icosphereGeometry;
 
-	constexpr size_t vertexSize = sizeof(IcosphereVertex);
+	Icosphere icosphere(radius, subdivision, smooth);
 
-	icosphereGeometry.inputParams =
-	{ 0b111u,0b1u,
-		{
-			{0u, EF_R32G32B32_SFLOAT, offsetof(IcosphereVertex,pos)},
-			{0u, EF_R32G32B32_SFLOAT, offsetof(IcosphereVertex,normals)},
-			{0u, EF_R32G32_SFLOAT, offsetof(IcosphereVertex,uv)}
-		},
-		{vertexSize,SVertexInputBindingParams::EVIR_PER_VERTEX} 
-	};
+	auto retval = core::make_smart_refctd_ptr<ICPUPolygonGeometry>();
+	retval->setIndexing(IPolygonGeometryBase::TriangleList());
+
+	using namespace hlsl;
+
+	// Create indices
+	{
+		auto indexView = createIndexView<Icosphere::index_t>(icosphere.getIndexCount(), icosphere.getVertexCount() - 1);
+		memcpy(indexView.src.buffer->getPointer(), icosphere.getIndices(), icosphere.getIndexSize());
+		retval->setIndexView(std::move(indexView));
+	}
 
-	auto vertexBuffer = asset::ICPUBuffer::create({ IcosphereData.getInterleavedVertexSize() });
-	auto indexBuffer = asset::ICPUBuffer::create({ IcosphereData.getIndexSize() });
+	{
+		{
+			shapes::AABB<4, float32_t> aabb;
+			aabb.maxVx = float32_t4(radius, radius, radius, 0.f);
+			aabb.minVx = -aabb.maxVx;
+			auto positionView = createPositionView(icosphere.getVertexCount(), aabb);
+			memcpy(positionView.src.buffer->getPointer(), icosphere.getPositions(), icosphere.getPositionSize());
+			retval->setPositionView(std::move(positionView));
+		}
+		{
+			using normal_t = float32_t3;
+			constexpr auto AttrSize = sizeof(normal_t);
+			auto buff = ICPUBuffer::create({icosphere.getNormalSize(), IBuffer::EUF_NONE});
+			const auto normals = reinterpret_cast<normal_t*>(buff->getPointer());
+			memcpy(normals, icosphere.getNormals(), icosphere.getNormalSize());
+			shapes::AABB<4,float32_t> aabb;
+			aabb.maxVx = float32_t4(1, 1, 1, 0.f);
+			aabb.minVx = -aabb.maxVx;
+			retval->setNormalView({
+				.composed = {
+					.encodedDataRange = {.f32 = aabb},
+					.stride = AttrSize,
+					.format = EF_R32G32B32_SFLOAT,
+					.rangeFormat = IGeometryBase::EAABBFormat::F32
+				},
+				.src = {.offset = 0,.size = buff->getSize(),.buffer = std::move(buff)},
+			});
+		}
+		{
+			using uv_element_t = uint16_t;
+			hlsl::vector<uv_element_t, 2>* uvs;
+			auto uvView = createUvView<uv_element_t>(icosphere.getVertexCount());
+			uvs = reinterpret_cast<decltype(uvs)>(uvView.src.buffer->getPointer());
 
-	memcpy(vertexBuffer->getPointer(), IcosphereData.getInterleavedVertices(), vertexBuffer->getSize());
-	memcpy(indexBuffer->getPointer(), IcosphereData.getIndices(), indexBuffer->getSize());
+			for (auto uv_i = 0u; uv_i < icosphere.getVertexCount(); uv_i++)
+			{
+				const auto texCoords = icosphere.getTexCoords();
+				encodeUv(uvs + uv_i, float32_t2(texCoords[2 * uv_i], texCoords[(2 * uv_i) + 1]));
+			}
 
-	vertexBuffer->addUsageFlags(asset::IBuffer::EUF_VERTEX_BUFFER_BIT);
-	icosphereGeometry.bindings[0] = { 0, std::move(vertexBuffer) };
-	indexBuffer->addUsageFlags(asset::IBuffer::EUF_INDEX_BUFFER_BIT);
-	icosphereGeometry.indexBuffer = { 0, std::move(indexBuffer) };
-	icosphereGeometry.indexCount = IcosphereData.getIndexCount();
-	icosphereGeometry.indexType = EIT_32BIT;
+			retval->getAuxAttributeViews()->push_back(std::move(uvView));
+		}
+	}
 
-	return icosphereGeometry;
+	CPolygonGeometryManipulator::recomputeContentHashes(retval.get());
+	return retval;
 }
-#endif
 
 } // end namespace nbl::asset
 
diff --git a/src/nbl/builtin/CMakeLists.txt b/src/nbl/builtin/CMakeLists.txt
index 9e4f35fc57..3b9fe1c39a 100644
--- a/src/nbl/builtin/CMakeLists.txt
+++ b/src/nbl/builtin/CMakeLists.txt
@@ -290,6 +290,7 @@ LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/format/shared_exp.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/format.hlsl")
 #linear algebra
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/linalg/fast_affine.hlsl")
+LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/linalg/transform.hlsl")
 # TODO: rename `equations` to `polynomials` probably
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/functions.hlsl")
 LIST_BUILTIN_RESOURCE(NBL_RESOURCES_TO_EMBED "hlsl/math/geometry.hlsl")
diff --git a/src/nbl/video/utilities/CAssetConverter.cpp b/src/nbl/video/utilities/CAssetConverter.cpp
index d004660e42..fed2d68cf0 100644
--- a/src/nbl/video/utilities/CAssetConverter.cpp
+++ b/src/nbl/video/utilities/CAssetConverter.cpp
@@ -3024,7 +3024,7 @@ auto CAssetConverter::reserve(const SInputs& inputs) -> SReserveResult
 									uint16_t alignment = hlsl::max(0x1u<<hlsl::findLSB(geom.vertexStride),32u);
 									if (geom.hasTransform())
 									{
-										size = core::alignUp(size,alignof(float))+sizeof(hlsl::float32_t3x4);
+										size = core::alignUp(size, IAccelerationStructure::TransformDataMinAlignment)+sizeof(hlsl::float32_t3x4);
 										alignment = hlsl::max<uint16_t>(alignof(float),alignment);
 									}
 									uint16_t indexSize = 0;
@@ -5061,7 +5061,7 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
 									uint16_t alignment = hlsl::max(0x1u<<hlsl::findLSB(geom.vertexStride),32u);
 									if (geom.hasTransform())
 									{
-										size = core::alignUp(size,alignof(float))+sizeof(hlsl::float32_t3x4);
+										size = core::alignUp(size, IAccelerationStructure::TransformDataMinAlignment)+sizeof(hlsl::float32_t3x4);
 										alignment = hlsl::max<uint16_t>(alignof(float),alignment);
 									}
 									uint16_t indexSize = 0u;
@@ -5265,7 +5265,7 @@ ISemaphore::future_t<IQueue::RESULT> CAssetConverter::convert_impl(SReserveResul
 									}
 									if (geom.hasTransform())
 									{
-										offset = core::alignUp(offset,alignof(float));
+										offset = core::alignUp(offset, IAccelerationStructure::TransformDataMinAlignment);
 										outGeom.transform = {.offset=offset,.buffer=smart_refctd_ptr<const IGPUBuffer>(scratchBuffer)};
 										memcpyCallback.data = &geom.transform;
 										if (!streamDataToScratch(offset,sizeof(geom.transform),memcpyCallback))