From 652009e5c4aebc5e0c0ba15295f7564f9bcc4669 Mon Sep 17 00:00:00 2001 From: Dirk van den Bekerom Date: Sun, 16 Feb 2025 13:11:35 +0100 Subject: [PATCH 01/18] passing names works --- .../vkFFT_AppManagement/vkFFT_InitializeApp.h | 8 +++++- .../vkFFT_API_handles/vkFFT_CompileKernel.h | 28 +++++++++++++++++++ vkFFT/vkFFT/vkFFT_Structs/vkFFT_Structs.h | 2 ++ 3 files changed, 37 insertions(+), 1 deletion(-) diff --git a/vkFFT/vkFFT/vkFFT_AppManagement/vkFFT_InitializeApp.h b/vkFFT/vkFFT/vkFFT_AppManagement/vkFFT_InitializeApp.h index a7fe07a..03c96b6 100644 --- a/vkFFT/vkFFT/vkFFT_AppManagement/vkFFT_InitializeApp.h +++ b/vkFFT/vkFFT/vkFFT_AppManagement/vkFFT_InitializeApp.h @@ -911,7 +911,7 @@ static inline VkFFTResult setConfigurationVkFFT(VkFFTApplication* app, VkFFTConf return VKFFT_ERROR_INVALID_QUEUE; } app->configuration.queue = inputLaunchConfiguration.queue; - + const char dummy_kernel[50] = "kernel void VkFFT_dummy (){}"; const char function_name[20] = "VkFFT_dummy"; @@ -966,6 +966,12 @@ static inline VkFFTResult setConfigurationVkFFT(VkFFTApplication* app, VkFFTConf compileOptions->release(); #endif +//DvdB + app->configuration.dirkKernelCounter = 0; + if (inputLaunchConfiguration.dirkName != nullptr){ + app->configuration.dirkName = inputLaunchConfiguration.dirkName; + } + resFFT = initializeBluesteinAutoPadding(app); if (resFFT != VKFFT_SUCCESS) { deleteVkFFT(app); diff --git a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_CompileKernel.h b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_CompileKernel.h index 19c3920..3057816 100644 --- a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_CompileKernel.h +++ b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_CompileKernel.h @@ -22,6 +22,9 @@ #ifndef VKFFT_COMPILEKERNEL_H #define VKFFT_COMPILEKERNEL_H #include "vkFFT/vkFFT_Structs/vkFFT_Structs.h" +#include //DvdB +#include //DvdV + static inline VkFFTResult VkFFT_CompileKernel(VkFFTApplication* app, VkFFTAxis* axis) { #if(VKFFT_BACKEND==0) @@ -177,6 +180,31 @@ static inline VkFFTResult VkFFT_CompileKernel(VkFFTApplication* app, VkFFTAxis* GLSLANG_MSG_DEFAULT_BIT, (const glslang_resource_t*)&default_resource, }; + + + //DvdB + + app->configuration.dirkKernelCounter++; + const char* dname = app->configuration.dirkName; + //const char* teststr = "HELLOOOOOOO"; + ofstream myfile; + std::string fname = "kernel_" ; + fname += dname ; + //fname += ".txt"; + fname += "_"+ std::to_string(app->configuration.dirkKernelCounter) +".txt"; + myfile.open(fname); + //app->configuration.dirkName = "DIRKDIRK"; + //myfile << app->configuration.dirkName; + myfile << code0; + //myfile << teststr; + myfile.close(); + + + + + + + //printf("%s\n", code0); glslang_shader_t* shader = glslang_shader_create((const glslang_input_t*)&input); const char* err; diff --git a/vkFFT/vkFFT/vkFFT_Structs/vkFFT_Structs.h b/vkFFT/vkFFT/vkFFT_Structs/vkFFT_Structs.h index d30318f..34fa6b2 100644 --- a/vkFFT/vkFFT/vkFFT_Structs/vkFFT_Structs.h +++ b/vkFFT/vkFFT/vkFFT_Structs/vkFFT_Structs.h @@ -321,6 +321,8 @@ typedef struct { MTL::CommandBuffer* commandBuffer;//Filled at app execution MTL::ComputeCommandEncoder* commandEncoder;//Filled at app execution #endif + pfUINT dirkKernelCounter; + const char* dirkName; } VkFFTConfiguration;//parameters specified at plan creation typedef struct { From 83b9c9a912851f8c4728d79c4b427de570bd1006 Mon Sep 17 00:00:00 2001 From: Dirk van den Bekerom Date: Sun, 16 Feb 2025 13:18:37 +0100 Subject: [PATCH 02/18] Update vkFFT_CompileKernel.h --- .../vkFFT_API_handles/vkFFT_CompileKernel.h | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_CompileKernel.h b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_CompileKernel.h index 3057816..24b8f0c 100644 --- a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_CompileKernel.h +++ b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_CompileKernel.h @@ -184,21 +184,24 @@ static inline VkFFTResult VkFFT_CompileKernel(VkFFTApplication* app, VkFFTAxis* //DvdB - app->configuration.dirkKernelCounter++; - const char* dname = app->configuration.dirkName; + const char* dname = app->configuration.dirkName; //const char* teststr = "HELLOOOOOOO"; ofstream myfile; - std::string fname = "kernel_" ; - fname += dname ; + std::string fname = ""; + + fname += dname ; + fname += "_kernel_"; //fname += ".txt"; - fname += "_"+ std::to_string(app->configuration.dirkKernelCounter) +".txt"; - myfile.open(fname); + fname += std::to_string(app->configuration.dirkKernelCounter) +".txt"; + + myfile.open(fname); //app->configuration.dirkName = "DIRKDIRK"; //myfile << app->configuration.dirkName; myfile << code0; //myfile << teststr; myfile.close(); - + app->configuration.dirkKernelCounter++; + From c9209349371a2e82f3c98810c47a4b41f66597f7 Mon Sep 17 00:00:00 2001 From: Dirk van den Bekerom Date: Sun, 16 Feb 2025 23:34:57 +0100 Subject: [PATCH 03/18] add currentBatch --- .../vkFFT_AppManagement/vkFFT_InitializeApp.h | 1 + .../vkFFT_KernelStartEnd.h | 7 ++++ .../vkFFT_InputOutputLayout.h | 22 ++++++++++-- .../vkFFT_PushConstants.h | 5 +++ .../vkFFT_API_handles/vkFFT_CompileKernel.h | 3 +- .../vkFFT_API_handles/vkFFT_DispatchPlan.h | 36 +++++++++++++++++++ .../vkFFT_InitAPIParameters.h | 8 +++++ .../vkFFT_Plans/vkFFT_Plan_FFT.h | 9 +++++ .../vkFFT_Plans/vkFFT_Plan_R2C.h | 7 ++++ vkFFT/vkFFT/vkFFT_Structs/vkFFT_Structs.h | 8 +++++ 10 files changed, 102 insertions(+), 4 deletions(-) diff --git a/vkFFT/vkFFT/vkFFT_AppManagement/vkFFT_InitializeApp.h b/vkFFT/vkFFT/vkFFT_AppManagement/vkFFT_InitializeApp.h index 03c96b6..8b49b72 100644 --- a/vkFFT/vkFFT/vkFFT_AppManagement/vkFFT_InitializeApp.h +++ b/vkFFT/vkFFT/vkFFT_AppManagement/vkFFT_InitializeApp.h @@ -968,6 +968,7 @@ static inline VkFFTResult setConfigurationVkFFT(VkFFTApplication* app, VkFFTConf //DvdB app->configuration.dirkKernelCounter = 0; + app->configuration.dirkDispatchCounter = 0; if (inputLaunchConfiguration.dirkName != nullptr){ app->configuration.dirkName = inputLaunchConfiguration.dirkName; } diff --git a/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel0/vkFFT_KernelStartEnd.h b/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel0/vkFFT_KernelStartEnd.h index 8ba77ac..7dd36bd 100644 --- a/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel0/vkFFT_KernelStartEnd.h +++ b/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel0/vkFFT_KernelStartEnd.h @@ -46,6 +46,10 @@ static inline void appendKernelStart(VkFFTSpecializationConstantsLayout* sc, int appendSharedMemoryVkFFT(sc, (int)locType); sc->tempLen = sprintf(sc->tempStr, "void main() {\n"); PfAppendLine(sc); + + //DvdB + sc->tempLen = sprintf(sc->tempStr, "if (gl_WorkGroupID.y > consts.currentBatch) return;\n"); + PfAppendLine(sc); #elif(VKFFT_BACKEND==1) sc->tempLen = sprintf(sc->tempStr, "extern __shared__ float shared[];\n"); PfAppendLine(sc); @@ -274,6 +278,9 @@ static inline void appendKernelStart_R2C(VkFFTSpecializationConstantsLayout* sc, sc->tempLen = sprintf(sc->tempStr, "void main() {\n"); PfAppendLine(sc); + //DvdB + sc->tempLen = sprintf(sc->tempStr, "if (gl_WorkGroupID.z > consts.currentBatch) return;\n"); + PfAppendLine(sc); #elif(VKFFT_BACKEND==1) sc->tempLen = sprintf(sc->tempStr, "extern \"C\" __global__ void __launch_bounds__(%" PRIi64 ") VkFFT_main_R2C ", sc->localSize[0].data.i * sc->localSize[1].data.i * sc->localSize[2].data.i); diff --git a/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel0/vkFFT_MemoryManagement/vkFFT_MemoryInitialization/vkFFT_InputOutputLayout.h b/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel0/vkFFT_MemoryManagement/vkFFT_MemoryInitialization/vkFFT_InputOutputLayout.h index 0cc75d1..1fc0aad 100644 --- a/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel0/vkFFT_MemoryManagement/vkFFT_MemoryInitialization/vkFFT_InputOutputLayout.h +++ b/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel0/vkFFT_MemoryManagement/vkFFT_MemoryInitialization/vkFFT_InputOutputLayout.h @@ -41,10 +41,18 @@ static inline void appendInputLayoutVkFFT(VkFFTSpecializationConstantsLayout* sc int typeSize = ((sc->inputMemoryCode % 10) == 3) ? sc->complexSize : sc->complexSize / 2; #if(VKFFT_BACKEND==0) if (sc->inputBufferBlockNum == 1) { - sc->tempLen = sprintf(sc->tempStr, "\ +/* sc->tempLen = sprintf(sc->tempStr, "\ layout(std430, binding = %d) buffer DataIn{\n\ %s inputs[%" PRIu64 "];\n\ };\n\n", id, inputMemoryType->name, sc->inputBufferBlockSize / typeSize); +*/ +//DvdB + sc->tempLen = sprintf(sc->tempStr, "\ +layout(std430, binding = %d) buffer DataIn{\n\ + %s inputs[];\n\ +};\n\n", id, inputMemoryType->name); + + PfAppendLine(sc); } else { @@ -68,11 +76,19 @@ static inline void appendOutputLayoutVkFFT(VkFFTSpecializationConstantsLayout* s int typeSize = ((sc->outputMemoryCode % 10) == 3) ? sc->complexSize : sc->complexSize / 2; #if(VKFFT_BACKEND==0) if (sc->inputBufferBlockNum == 1) { - sc->tempLen = sprintf(sc->tempStr, "\ +/* sc->tempLen = sprintf(sc->tempStr, "\ layout(std430, binding = %d) buffer DataOut{\n\ %s outputs[%" PRIu64 "];\n\ };\n\n", id, outputMemoryType->name, sc->outputBufferBlockSize / typeSize); - PfAppendLine(sc); +*/ +//DvdB + sc->tempLen = sprintf(sc->tempStr, "\ +layout(std430, binding = %d) buffer DataOut{\n\ + %s outputs[];\n\ +};\n\n", id, outputMemoryType->name); + + + PfAppendLine(sc); } else { sc->tempLen = sprintf(sc->tempStr, "\ diff --git a/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel0/vkFFT_MemoryManagement/vkFFT_MemoryInitialization/vkFFT_PushConstants.h b/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel0/vkFFT_MemoryManagement/vkFFT_MemoryInitialization/vkFFT_PushConstants.h index 12aa811..612846f 100644 --- a/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel0/vkFFT_MemoryManagement/vkFFT_MemoryInitialization/vkFFT_PushConstants.h +++ b/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel0/vkFFT_MemoryManagement/vkFFT_MemoryInitialization/vkFFT_PushConstants.h @@ -90,6 +90,11 @@ static inline void appendPushConstants(VkFFTSpecializationConstantsLayout* sc) { sprintf(tempCopyStr, "consts.%s", sc->kernelOffset.name); sprintf(sc->kernelOffset.name, "%s", tempCopyStr); } + if (sc->performPostCompilationCurrentBatch) { + appendPushConstant(sc, &sc->currentBatch); + sprintf(tempCopyStr, "consts.%s", sc->currentBatch.name); + sprintf(sc->currentBatch.name, "%s", tempCopyStr); + } #if(VKFFT_BACKEND==0) sc->tempLen = sprintf(sc->tempStr, "} consts;\n\n"); PfAppendLine(sc); diff --git a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_CompileKernel.h b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_CompileKernel.h index 24b8f0c..2068647 100644 --- a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_CompileKernel.h +++ b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_CompileKernel.h @@ -23,7 +23,7 @@ #define VKFFT_COMPILEKERNEL_H #include "vkFFT/vkFFT_Structs/vkFFT_Structs.h" #include //DvdB -#include //DvdV +#include //DvdB static inline VkFFTResult VkFFT_CompileKernel(VkFFTApplication* app, VkFFTAxis* axis) { @@ -197,6 +197,7 @@ static inline VkFFTResult VkFFT_CompileKernel(VkFFTApplication* app, VkFFTAxis* myfile.open(fname); //app->configuration.dirkName = "DIRKDIRK"; //myfile << app->configuration.dirkName; + myfile << "// kernel type: " << app->configuration.dirkTypeFFT <specializationConstants.swapComputeWorkGroupID == 1) { pfUINT temp = dispatchBlock[0]; dispatchBlock[0] = dispatchBlock[1]; @@ -54,6 +58,28 @@ static inline VkFFTResult VkFFT_DispatchPlan(VkFFTApplication* app, VkFFTAxis* a if (app->configuration.specifyOffsetsAtLaunch) { axis->updatePushConstants = 1; } + + + + const char* dname = app->configuration.dirkName; + ofstream myfile; + std::string fname = ""; + + fname += dname ; + fname += "_dispatch_"; + fname += std::to_string(app->configuration.dirkDispatchCounter); + fname += ".txt"; + + myfile.open(fname); + myfile << "Block0: "<< dispatchBlock[0] <configuration.dirkDispatchCounter++; + + //printf("%" PRIu64 " %" PRIu64 " %" PRIu64 "\n", dispatchBlock[0], dispatchBlock[1], dispatchBlock[2]); //printf("%" PRIu64 " %" PRIu64 " %" PRIu64 "\n", blockNumber[0], blockNumber[1], blockNumber[2]); for (pfUINT i = 0; i < 3; i++) @@ -107,6 +133,11 @@ static inline VkFFTResult VkFFT_DispatchPlan(VkFFTApplication* app, VkFFTAxis* a memcpy(&axis->pushConstants.data[offset], &temp, sizeof(pfUINT)); offset += sizeof(pfUINT); } + if (axis->specializationConstants.performPostCompilationCurrentBatch) { + temp = axis->specializationConstants.currentBatch.data.i; + memcpy(&axis->pushConstants.data[offset], &temp, sizeof(pfUINT)); + offset += sizeof(pfUINT); + } } else { pfUINT offset = 0; @@ -144,6 +175,11 @@ static inline VkFFTResult VkFFT_DispatchPlan(VkFFTApplication* app, VkFFTAxis* a memcpy(&axis->pushConstants.data[offset], &temp, sizeof(uint32_t)); offset += sizeof(uint32_t); } + if (axis->specializationConstants.performPostCompilationCurrentBatch) { + temp = (uint32_t)(axis->specializationConstants.currentBatch.data.i); + memcpy(&axis->pushConstants.data[offset], &temp, sizeof(uint32_t)); + offset += sizeof(uint32_t); + } } } dispatchSize[0] = (i == blockNumber[0] - 1) ? lastBlockSize[0] : blockSize[0]; diff --git a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_InitAPIParameters.h b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_InitAPIParameters.h index da4fa29..cd61e1a 100644 --- a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_InitAPIParameters.h +++ b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_InitAPIParameters.h @@ -389,6 +389,11 @@ static inline VkFFTResult initParametersAPI(VkFFTApplication* app, VkFFTSpeciali PfAllocateContainerFlexible(sc, &sc->kernelOffset, 50); sprintf(sc->kernelOffset.name, "kernelOffset"); } + if (sc->performPostCompilationCurrentBatch) { + sc->currentBatch.type = 100 + sc->uintTypeCode; + PfAllocateContainerFlexible(sc, &sc->currentBatch, 50); + sprintf(sc->currentBatch.name, "currentBatch"); + } #if(VKFFT_BACKEND==0) sprintf(sc->inputsStruct.name, "inputs"); sprintf(sc->outputsStruct.name, "outputs"); @@ -630,6 +635,9 @@ static inline VkFFTResult freeParametersAPI(VkFFTApplication* app, VkFFTSpeciali if (sc->performPostCompilationKernelOffset) { PfDeallocateContainer(sc, &sc->kernelOffset); } + if (sc->performPostCompilationCurrentBatch) { + PfDeallocateContainer(sc, &sc->currentBatch); + } return res; } diff --git a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_Plans/vkFFT_Plan_FFT.h b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_Plans/vkFFT_Plan_FFT.h index 47b4f7e..ba9805c 100644 --- a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_Plans/vkFFT_Plan_FFT.h +++ b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_Plans/vkFFT_Plan_FFT.h @@ -425,6 +425,8 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla if (app->configuration.specifyOffsetsAtLaunch) { axis->specializationConstants.performPostCompilationInputOffset = 1; axis->specializationConstants.performPostCompilationOutputOffset = 1; + axis->specializationConstants.performPostCompilationCurrentBatch = 1; + if (app->configuration.performConvolution) axis->specializationConstants.performPostCompilationKernelOffset = 1; } @@ -435,6 +437,8 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla axis->specializationConstants.outputOffset.data.i = app->configuration.outputBufferOffset; axis->specializationConstants.kernelOffset.type = 31; axis->specializationConstants.kernelOffset.data.i = app->configuration.kernelOffset; + axis->specializationConstants.currentBatch.type = 31; + axis->specializationConstants.currentBatch.data.i = app->configuration.numberBatches; } resFFT = VkFFTCheckUpdateBufferSet(app, axis, 1, 0); @@ -672,6 +676,10 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla axis->pushConstants.performPostCompilationKernelOffset = 1; axis->pushConstants.structSize += 1; } + if (axis->specializationConstants.performPostCompilationCurrentBatch) { + axis->pushConstants.performPostCompilationCurrentBatch = 1; + axis->pushConstants.structSize += 1; + } if (app->configuration.useUint64) axis->pushConstants.structSize *= sizeof(pfUINT); else @@ -770,6 +778,7 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla deleteVkFFT(app); return resFFT; } + app->configuration.dirkTypeFFT = 100000 + 1000*axis_id + 100*axis_upload_id+10*inverse+reverseBluesteinMultiUpload; resFFT = VkFFT_CompileKernel(app, axis); if (resFFT != VKFFT_SUCCESS) { deleteVkFFT(app); diff --git a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_Plans/vkFFT_Plan_R2C.h b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_Plans/vkFFT_Plan_R2C.h index f5b5e8e..76b7bab 100644 --- a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_Plans/vkFFT_Plan_R2C.h +++ b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_Plans/vkFFT_Plan_R2C.h @@ -137,6 +137,8 @@ static inline VkFFTResult VkFFTPlanR2CMultiUploadDecomposition(VkFFTApplication* if (app->configuration.specifyOffsetsAtLaunch) { axis->specializationConstants.performPostCompilationInputOffset = 1; axis->specializationConstants.performPostCompilationOutputOffset = 1; + axis->specializationConstants.performPostCompilationCurrentBatch = 1; + if (app->configuration.performConvolution) axis->specializationConstants.performPostCompilationKernelOffset = 1; } @@ -289,6 +291,10 @@ static inline VkFFTResult VkFFTPlanR2CMultiUploadDecomposition(VkFFTApplication* axis->pushConstants.performPostCompilationKernelOffset = 1; axis->pushConstants.structSize += 1; } + if (axis->specializationConstants.performPostCompilationCurrentBatch) { + axis->pushConstants.performPostCompilationCurrentBatch = 1; + axis->pushConstants.structSize += 1; + } if (app->configuration.useUint64) axis->pushConstants.structSize *= sizeof(pfUINT); else @@ -358,6 +364,7 @@ static inline VkFFTResult VkFFTPlanR2CMultiUploadDecomposition(VkFFTApplication* deleteVkFFT(app); return resFFT; } + app->configuration.dirkTypeFFT = 200000; resFFT = VkFFT_CompileKernel(app, axis); if (resFFT != VKFFT_SUCCESS) { deleteVkFFT(app); diff --git a/vkFFT/vkFFT/vkFFT_Structs/vkFFT_Structs.h b/vkFFT/vkFFT/vkFFT_Structs/vkFFT_Structs.h index 34fa6b2..25e52c1 100644 --- a/vkFFT/vkFFT/vkFFT_Structs/vkFFT_Structs.h +++ b/vkFFT/vkFFT/vkFFT_Structs/vkFFT_Structs.h @@ -323,6 +323,9 @@ typedef struct { #endif pfUINT dirkKernelCounter; const char* dirkName; + pfUINT dirkTypeFFT; + pfUINT dirkCurAxis; + pfUINT dirkDispatchCounter; } VkFFTConfiguration;//parameters specified at plan creation typedef struct { @@ -786,6 +789,7 @@ typedef struct { PfContainer inputOffset; PfContainer kernelOffset; PfContainer outputOffset; + PfContainer currentBatch; int reorderFourStep; int storeSharedComplexComponentsSeparately; int pushConstantsStructSize; @@ -793,6 +797,7 @@ typedef struct { int performPostCompilationInputOffset; int performPostCompilationOutputOffset; int performPostCompilationKernelOffset; + int performPostCompilationCurrentBatch; pfUINT inputBufferBlockNum; pfUINT inputBufferBlockSize; pfUINT outputBufferBlockNum; @@ -1033,6 +1038,9 @@ typedef struct { pfUINT performPostCompilationKernelOffset; pfUINT kernelOffset; + pfUINT performPostCompilationCurrentBatch; + pfUINT currentBatch; + pfUINT structSize; } VkFFTPushConstantsLayout; From 22d4b69057ea2fd6cd3bae15aed0117daa2d1af7 Mon Sep 17 00:00:00 2001 From: Dirk van den Bekerom Date: Mon, 17 Feb 2025 22:28:58 +0100 Subject: [PATCH 04/18] current batch implemented not tested with buffer --- .../vkFFT_AppManagement/vkFFT_InitializeApp.h | 6 +++ .../vkFFT_KernelStartEnd.h | 12 ++++-- .../vkFFT_KernelsLevel0/vkFFT_KernelUtils.h | 6 +++ .../vkFFT_InputOutputLayout.h | 22 +++++++++++ .../vkFFT_PushConstants.h | 10 ++--- .../vkFFT_KernelsLevel2/vkFFT_FFT.h | 4 ++ .../vkFFT_R2C_even_decomposition.h | 4 ++ .../vkFFT_API_handles/vkFFT_DispatchPlan.h | 20 +++++----- .../vkFFT_InitAPIParameters.h | 16 ++++---- .../vkFFT_API_handles/vkFFT_UpdateBuffers.h | 38 +++++++++++++++++++ .../vkFFT_Plans/vkFFT_Plan_FFT.h | 17 +++++---- .../vkFFT_Plans/vkFFT_Plan_R2C.h | 13 ++++--- vkFFT/vkFFT/vkFFT_Structs/vkFFT_Structs.h | 15 ++++++-- 13 files changed, 140 insertions(+), 43 deletions(-) diff --git a/vkFFT/vkFFT/vkFFT_AppManagement/vkFFT_InitializeApp.h b/vkFFT/vkFFT/vkFFT_AppManagement/vkFFT_InitializeApp.h index 8b49b72..d7a66ae 100644 --- a/vkFFT/vkFFT/vkFFT_AppManagement/vkFFT_InitializeApp.h +++ b/vkFFT/vkFFT/vkFFT_AppManagement/vkFFT_InitializeApp.h @@ -967,6 +967,8 @@ static inline VkFFTResult setConfigurationVkFFT(VkFFTApplication* app, VkFFTConf #endif //DvdB + if (inputLaunchConfiguration.dynamicBatch != 0) app->configuration.dynamicBatch = 1; + app->configuration.dirkKernelCounter = 0; app->configuration.dirkDispatchCounter = 0; if (inputLaunchConfiguration.dirkName != nullptr){ @@ -1173,6 +1175,10 @@ static inline VkFFTResult setConfigurationVkFFT(VkFFTApplication* app, VkFFTConf } app->configuration.kernel = inputLaunchConfiguration.kernel; } + if (inputLaunchConfiguration.currentBatchUBOSize !=0){ + app->configuration.currentBatchUBOSize = inputLaunchConfiguration.currentBatchUBOSize; + app->configuration.currentBatchUBO = inputLaunchConfiguration.currentBatchUBO; + } if (inputLaunchConfiguration.bufferOffset != 0) app->configuration.bufferOffset = inputLaunchConfiguration.bufferOffset; if (inputLaunchConfiguration.tempBufferOffset != 0) app->configuration.tempBufferOffset = inputLaunchConfiguration.tempBufferOffset; diff --git a/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel0/vkFFT_KernelStartEnd.h b/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel0/vkFFT_KernelStartEnd.h index 7dd36bd..f16f95e 100644 --- a/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel0/vkFFT_KernelStartEnd.h +++ b/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel0/vkFFT_KernelStartEnd.h @@ -48,8 +48,10 @@ static inline void appendKernelStart(VkFFTSpecializationConstantsLayout* sc, int PfAppendLine(sc); //DvdB - sc->tempLen = sprintf(sc->tempStr, "if (gl_WorkGroupID.y > consts.currentBatch) return;\n"); - PfAppendLine(sc); + if (sc->dynamicBatch){ + sc->tempLen = sprintf(sc->tempStr, "if (gl_WorkGroupID.y > currentBatch.N) return;\n"); + PfAppendLine(sc); + } #elif(VKFFT_BACKEND==1) sc->tempLen = sprintf(sc->tempStr, "extern __shared__ float shared[];\n"); PfAppendLine(sc); @@ -279,8 +281,10 @@ static inline void appendKernelStart_R2C(VkFFTSpecializationConstantsLayout* sc, PfAppendLine(sc); //DvdB - sc->tempLen = sprintf(sc->tempStr, "if (gl_WorkGroupID.z > consts.currentBatch) return;\n"); - PfAppendLine(sc); + if (sc->dynamicBatch){ + sc->tempLen = sprintf(sc->tempStr, "if (gl_WorkGroupID.z > currentBatch.N) return;\n"); + PfAppendLine(sc); + } #elif(VKFFT_BACKEND==1) sc->tempLen = sprintf(sc->tempStr, "extern \"C\" __global__ void __launch_bounds__(%" PRIi64 ") VkFFT_main_R2C ", sc->localSize[0].data.i * sc->localSize[1].data.i * sc->localSize[2].data.i); diff --git a/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel0/vkFFT_KernelUtils.h b/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel0/vkFFT_KernelUtils.h index 49aaba2..c984b27 100644 --- a/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel0/vkFFT_KernelUtils.h +++ b/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel0/vkFFT_KernelUtils.h @@ -61,6 +61,8 @@ static inline void appendVersion(VkFFTSpecializationConstantsLayout* sc) { #endif return; } + + static inline void appendExtensions(VkFFTSpecializationConstantsLayout* sc) { if (sc->res != VKFFT_SUCCESS) return; #if(VKFFT_BACKEND==0) @@ -78,6 +80,10 @@ static inline void appendExtensions(VkFFTSpecializationConstantsLayout* sc) { sc->tempLen = sprintf(sc->tempStr, "#extension GL_EXT_shader_16bit_storage : require\n\n"); PfAppendLine(sc); } + if (sc->dynamicBatch) { + sc->tempLen = sprintf(sc->tempStr, "#extension GL_EXT_scalar_block_layout : enable\n\n"); + PfAppendLine(sc); + } #elif(VKFFT_BACKEND==1) if ((((sc->floatTypeInputMemoryCode / 10) % 10) == 0) || (((sc->floatTypeOutputMemoryCode / 10) % 10) == 0) || (((sc->floatTypeCode / 10) % 10) == 0)) { sc->tempLen = sprintf(sc->tempStr, "\ diff --git a/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel0/vkFFT_MemoryManagement/vkFFT_MemoryInitialization/vkFFT_InputOutputLayout.h b/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel0/vkFFT_MemoryManagement/vkFFT_MemoryInitialization/vkFFT_InputOutputLayout.h index 1fc0aad..0df3b28 100644 --- a/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel0/vkFFT_MemoryManagement/vkFFT_MemoryInitialization/vkFFT_InputOutputLayout.h +++ b/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel0/vkFFT_MemoryManagement/vkFFT_MemoryInitialization/vkFFT_InputOutputLayout.h @@ -104,6 +104,28 @@ layout(std430, binding = %d) buffer DataOut{\n\ #endif return; } + +static inline void appendCurrentBatchVkFFT(VkFFTSpecializationConstantsLayout* sc, int id) { + if (sc->res != VKFFT_SUCCESS) return; + PfContainer* uintType32; + PfGetTypeFromCode(sc, sc->uintType32Code, &uintType32); + +#if(VKFFT_BACKEND==0) + + sc->tempLen = sprintf(sc->tempStr, "\ +layout(std140, binding = %d) uniform UniformBufferObject{\n\ + %s N;\n\ +} currentBatch;\n\n", id, uintType32->name); + PfAppendLine(sc); + +#elif(VKFFT_BACKEND==1) +#elif(VKFFT_BACKEND==2) +#elif((VKFFT_BACKEND==3)||(VKFFT_BACKEND==4)) +#elif(VKFFT_BACKEND==5) +#endif + return; +} + static inline void appendKernelLayoutVkFFT(VkFFTSpecializationConstantsLayout* sc, int id) { if (sc->res != VKFFT_SUCCESS) return; PfContainer* vecType; diff --git a/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel0/vkFFT_MemoryManagement/vkFFT_MemoryInitialization/vkFFT_PushConstants.h b/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel0/vkFFT_MemoryManagement/vkFFT_MemoryInitialization/vkFFT_PushConstants.h index 612846f..bbfa0a0 100644 --- a/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel0/vkFFT_MemoryManagement/vkFFT_MemoryInitialization/vkFFT_PushConstants.h +++ b/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel0/vkFFT_MemoryManagement/vkFFT_MemoryInitialization/vkFFT_PushConstants.h @@ -90,11 +90,11 @@ static inline void appendPushConstants(VkFFTSpecializationConstantsLayout* sc) { sprintf(tempCopyStr, "consts.%s", sc->kernelOffset.name); sprintf(sc->kernelOffset.name, "%s", tempCopyStr); } - if (sc->performPostCompilationCurrentBatch) { - appendPushConstant(sc, &sc->currentBatch); - sprintf(tempCopyStr, "consts.%s", sc->currentBatch.name); - sprintf(sc->currentBatch.name, "%s", tempCopyStr); - } + // if (sc->performPostCompilationCurrentBatch) { + // appendPushConstant(sc, &sc->currentBatch); + // sprintf(tempCopyStr, "consts.%s", sc->currentBatch.name); + // sprintf(sc->currentBatch.name, "%s", tempCopyStr); + // } #if(VKFFT_BACKEND==0) sc->tempLen = sprintf(sc->tempStr, "} consts;\n\n"); PfAppendLine(sc); diff --git a/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel2/vkFFT_FFT.h b/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel2/vkFFT_FFT.h index 5d59157..aff5209 100644 --- a/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel2/vkFFT_FFT.h +++ b/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel2/vkFFT_FFT.h @@ -73,6 +73,10 @@ static inline VkFFTResult shaderGen_FFT(VkFFTSpecializationConstantsLayout* sc, id++; appendOutputLayoutVkFFT(sc, id); id++; + if (sc->dynamicBatch){ + appendCurrentBatchVkFFT(sc, id); + id++; + } if (sc->convolutionStep) { appendKernelLayoutVkFFT(sc, id); id++; diff --git a/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel2/vkFFT_R2C_even_decomposition.h b/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel2/vkFFT_R2C_even_decomposition.h index 6319a39..b7af56f 100644 --- a/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel2/vkFFT_R2C_even_decomposition.h +++ b/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel2/vkFFT_R2C_even_decomposition.h @@ -64,6 +64,10 @@ static inline VkFFTResult shaderGen_R2C_even_decomposition(VkFFTSpecializationCo id++; appendOutputLayoutVkFFT(sc, id); id++; + if (sc->dynamicBatch){ + appendCurrentBatchVkFFT(sc, id); + id++; + } if (sc->LUT) { appendLUTLayoutVkFFT(sc, id); id++; diff --git a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_DispatchPlan.h b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_DispatchPlan.h index a3f29c5..3b35baf 100644 --- a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_DispatchPlan.h +++ b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_DispatchPlan.h @@ -133,11 +133,11 @@ static inline VkFFTResult VkFFT_DispatchPlan(VkFFTApplication* app, VkFFTAxis* a memcpy(&axis->pushConstants.data[offset], &temp, sizeof(pfUINT)); offset += sizeof(pfUINT); } - if (axis->specializationConstants.performPostCompilationCurrentBatch) { - temp = axis->specializationConstants.currentBatch.data.i; - memcpy(&axis->pushConstants.data[offset], &temp, sizeof(pfUINT)); - offset += sizeof(pfUINT); - } + // if (axis->specializationConstants.performPostCompilationCurrentBatch) { + // temp = axis->specializationConstants.currentBatch.data.i; + // memcpy(&axis->pushConstants.data[offset], &temp, sizeof(pfUINT)); + // offset += sizeof(pfUINT); + // } } else { pfUINT offset = 0; @@ -175,11 +175,11 @@ static inline VkFFTResult VkFFT_DispatchPlan(VkFFTApplication* app, VkFFTAxis* a memcpy(&axis->pushConstants.data[offset], &temp, sizeof(uint32_t)); offset += sizeof(uint32_t); } - if (axis->specializationConstants.performPostCompilationCurrentBatch) { - temp = (uint32_t)(axis->specializationConstants.currentBatch.data.i); - memcpy(&axis->pushConstants.data[offset], &temp, sizeof(uint32_t)); - offset += sizeof(uint32_t); - } + // if (axis->specializationConstants.performPostCompilationCurrentBatch) { + // temp = (uint32_t)(axis->specializationConstants.currentBatch.data.i); + // memcpy(&axis->pushConstants.data[offset], &temp, sizeof(uint32_t)); + // offset += sizeof(uint32_t); + // } } } dispatchSize[0] = (i == blockNumber[0] - 1) ? lastBlockSize[0] : blockSize[0]; diff --git a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_InitAPIParameters.h b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_InitAPIParameters.h index cd61e1a..d5d92c4 100644 --- a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_InitAPIParameters.h +++ b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_InitAPIParameters.h @@ -389,11 +389,11 @@ static inline VkFFTResult initParametersAPI(VkFFTApplication* app, VkFFTSpeciali PfAllocateContainerFlexible(sc, &sc->kernelOffset, 50); sprintf(sc->kernelOffset.name, "kernelOffset"); } - if (sc->performPostCompilationCurrentBatch) { - sc->currentBatch.type = 100 + sc->uintTypeCode; - PfAllocateContainerFlexible(sc, &sc->currentBatch, 50); - sprintf(sc->currentBatch.name, "currentBatch"); - } + // if (sc->performPostCompilationCurrentBatch) { + // sc->currentBatch.type = 100 + sc->uintTypeCode; + // PfAllocateContainerFlexible(sc, &sc->currentBatch, 50); + // sprintf(sc->currentBatch.name, "currentBatch"); + // } #if(VKFFT_BACKEND==0) sprintf(sc->inputsStruct.name, "inputs"); sprintf(sc->outputsStruct.name, "outputs"); @@ -635,9 +635,9 @@ static inline VkFFTResult freeParametersAPI(VkFFTApplication* app, VkFFTSpeciali if (sc->performPostCompilationKernelOffset) { PfDeallocateContainer(sc, &sc->kernelOffset); } - if (sc->performPostCompilationCurrentBatch) { - PfDeallocateContainer(sc, &sc->currentBatch); - } + // if (sc->performPostCompilationCurrentBatch) { + // PfDeallocateContainer(sc, &sc->currentBatch); + // } return res; } diff --git a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_UpdateBuffers.h b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_UpdateBuffers.h index dcf7b4d..0a25fe9 100644 --- a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_UpdateBuffers.h +++ b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_UpdateBuffers.h @@ -239,6 +239,18 @@ static inline VkFFTResult VkFFTConfigureDescriptors(VkFFTApplication* app, VkFFT VkDescriptorPoolSize descriptorPoolSize = { VK_DESCRIPTOR_TYPE_STORAGE_BUFFER }; descriptorPoolSize.descriptorCount = (uint32_t)(axis->specializationConstants.inputBufferBlockNum + axis->specializationConstants.outputBufferBlockNum); #endif + + + if (app->configuration.dynamicBatch == 1) { + axis->specializationConstants.currentBatchBindingID = (int)axis->numBindings; + axis->specializationConstants.numBuffersBound[axis->numBindings] = 1; +#if(VKFFT_BACKEND==0) + descriptorPoolSize.descriptorCount++; +#endif + axis->numBindings++; + } + + axis->specializationConstants.convolutionBindingID = -1; if ((axis_id == (app->configuration.FFTdim-1)) && (axis_upload_id == 0) && (app->configuration.performConvolution)) { axis->specializationConstants.convolutionBindingID = (int)axis->numBindings; @@ -570,6 +582,14 @@ static inline VkFFTResult VkFFTConfigureDescriptorsR2CMultiUploadDecomposition(V axis->numBindings++; } + if (app->configuration.dynamicBatch == 1) { + axis->specializationConstants.numBuffersBound[axis->numBindings] = 1; +#if(VKFFT_BACKEND==0) + descriptorPoolSize.descriptorCount++; +#endif + axis->numBindings++; + } + if (app->configuration.useLUT == 1) { axis->specializationConstants.numBuffersBound[axis->numBindings] = 1; #if(VKFFT_BACKEND==0) @@ -1109,6 +1129,15 @@ static inline VkFFTResult VkFFTUpdateBufferSet(VkFFTApplication* app, VkFFTPlan* } //descriptorBufferInfo.offset = 0; } + if ((i == axis->specializationConstants.currentBatchBindingID) &&(app->configuration.dynamicBatch)){ +#if(VKFFT_BACKEND==0) + if (axis->specializationConstants.performBufferSetUpdate) { + descriptorBufferInfo.buffer = app->configuration.currentBatchUBO; + descriptorBufferInfo.offset = 0; + descriptorBufferInfo.range = app->configuration.currentBatchUBOSize; + } +#endif + } if ((i == axis->specializationConstants.convolutionBindingID) && (app->configuration.performConvolution)) { if (axis->specializationConstants.performBufferSetUpdate) { pfUINT bufferId = 0; @@ -1544,6 +1573,15 @@ static inline VkFFTResult VkFFTUpdateBufferSetR2CMultiUploadDecomposition(VkFFTA axis->specializationConstants.kernelOffset.data.i = app->configuration.kernelOffset; } } + if ((i == 3) && (app->configuration.dynamicBatch == 1)) { +#if(VKFFT_BACKEND==0) + if (axis->specializationConstants.performBufferSetUpdate) { + descriptorBufferInfo.buffer = app->configuration.currentBatchUBO; + descriptorBufferInfo.offset = 0; + descriptorBufferInfo.range = app->configuration.currentBatchUBOSize; + } +#endif + } if ((i == axis->numBindings - 1) && (app->configuration.useLUT == 1)) { #if(VKFFT_BACKEND==0) if (axis->specializationConstants.performBufferSetUpdate) { diff --git a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_Plans/vkFFT_Plan_FFT.h b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_Plans/vkFFT_Plan_FFT.h index ba9805c..70dd684 100644 --- a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_Plans/vkFFT_Plan_FFT.h +++ b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_Plans/vkFFT_Plan_FFT.h @@ -425,7 +425,7 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla if (app->configuration.specifyOffsetsAtLaunch) { axis->specializationConstants.performPostCompilationInputOffset = 1; axis->specializationConstants.performPostCompilationOutputOffset = 1; - axis->specializationConstants.performPostCompilationCurrentBatch = 1; + //axis->specializationConstants.performPostCompilationCurrentBatch = 1; if (app->configuration.performConvolution) axis->specializationConstants.performPostCompilationKernelOffset = 1; @@ -437,8 +437,8 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla axis->specializationConstants.outputOffset.data.i = app->configuration.outputBufferOffset; axis->specializationConstants.kernelOffset.type = 31; axis->specializationConstants.kernelOffset.data.i = app->configuration.kernelOffset; - axis->specializationConstants.currentBatch.type = 31; - axis->specializationConstants.currentBatch.data.i = app->configuration.numberBatches; + //axis->specializationConstants.currentBatch.type = 31; + //axis->specializationConstants.currentBatch.data.i = app->configuration.numberBatches; } resFFT = VkFFTCheckUpdateBufferSet(app, axis, 1, 0); @@ -563,6 +563,9 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla else axis->specializationConstants.zeropad[1] = 0; } + if (app->configuration.dynamicBatch){ + axis->specializationConstants.dynamicBatch = 1; + } if ((app->configuration.FFTdim - 1 == axis_id) && (axis_upload_id == 0) && (app->configuration.performConvolution)) { axis->specializationConstants.convolutionStep = 1; } @@ -676,10 +679,10 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla axis->pushConstants.performPostCompilationKernelOffset = 1; axis->pushConstants.structSize += 1; } - if (axis->specializationConstants.performPostCompilationCurrentBatch) { - axis->pushConstants.performPostCompilationCurrentBatch = 1; - axis->pushConstants.structSize += 1; - } + // if (axis->specializationConstants.performPostCompilationCurrentBatch) { + // axis->pushConstants.performPostCompilationCurrentBatch = 1; + // axis->pushConstants.structSize += 1; + // } if (app->configuration.useUint64) axis->pushConstants.structSize *= sizeof(pfUINT); else diff --git a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_Plans/vkFFT_Plan_R2C.h b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_Plans/vkFFT_Plan_R2C.h index 76b7bab..d56490e 100644 --- a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_Plans/vkFFT_Plan_R2C.h +++ b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_Plans/vkFFT_Plan_R2C.h @@ -137,7 +137,7 @@ static inline VkFFTResult VkFFTPlanR2CMultiUploadDecomposition(VkFFTApplication* if (app->configuration.specifyOffsetsAtLaunch) { axis->specializationConstants.performPostCompilationInputOffset = 1; axis->specializationConstants.performPostCompilationOutputOffset = 1; - axis->specializationConstants.performPostCompilationCurrentBatch = 1; + //axis->specializationConstants.performPostCompilationCurrentBatch = 1; if (app->configuration.performConvolution) axis->specializationConstants.performPostCompilationKernelOffset = 1; @@ -258,6 +258,9 @@ static inline VkFFTResult VkFFTPlanR2CMultiUploadDecomposition(VkFFTApplication* else axis->specializationConstants.zeropad[1] = 0; }*/ + if (app->configuration.dynamicBatch){ + axis->specializationConstants.dynamicBatch = 1; + } if ((app->configuration.FFTdim == 1) && (app->configuration.performConvolution)) { axis->specializationConstants.convolutionStep = 1; } @@ -291,10 +294,10 @@ static inline VkFFTResult VkFFTPlanR2CMultiUploadDecomposition(VkFFTApplication* axis->pushConstants.performPostCompilationKernelOffset = 1; axis->pushConstants.structSize += 1; } - if (axis->specializationConstants.performPostCompilationCurrentBatch) { - axis->pushConstants.performPostCompilationCurrentBatch = 1; - axis->pushConstants.structSize += 1; - } + // if (axis->specializationConstants.performPostCompilationCurrentBatch) { + // axis->pushConstants.performPostCompilationCurrentBatch = 1; + // axis->pushConstants.structSize += 1; + // } if (app->configuration.useUint64) axis->pushConstants.structSize *= sizeof(pfUINT); else diff --git a/vkFFT/vkFFT/vkFFT_Structs/vkFFT_Structs.h b/vkFFT/vkFFT/vkFFT_Structs/vkFFT_Structs.h index 25e52c1..c28e58e 100644 --- a/vkFFT/vkFFT/vkFFT_Structs/vkFFT_Structs.h +++ b/vkFFT/vkFFT/vkFFT_Structs/vkFFT_Structs.h @@ -143,6 +143,7 @@ typedef struct { pfUINT* inputBufferSize;//array of input buffers sizes in bytes, if isInputFormatted is enabled pfUINT* outputBufferSize;//array of output buffers sizes in bytes, if isOutputFormatted is enabled pfUINT* kernelSize;//array of kernel buffers sizes in bytes, if performConvolution is enabled + pfUINT currentBatchUBOSize; #if(VKFFT_BACKEND==0) VkBuffer* buffer;//pointer to array of buffers (or one buffer) used for computations @@ -150,6 +151,8 @@ typedef struct { VkBuffer* inputBuffer;//pointer to array of input buffers (or one buffer) used to read data from if isInputFormatted is enabled VkBuffer* outputBuffer;//pointer to array of output buffers (or one buffer) used for write data to if isOutputFormatted is enabled VkBuffer* kernel;//pointer to array of kernel buffers (or one buffer) used for read kernel data from if performConvolution is enabled + VkBuffer currentBatchUBO; + #elif(VKFFT_BACKEND==1) void** buffer;//pointer to device buffer used for computations void** tempBuffer;//needed if reorderFourStep is enabled to transpose the array. Same size as buffer. Default 0. Setting to non zero value enables manual user allocation @@ -321,6 +324,7 @@ typedef struct { MTL::CommandBuffer* commandBuffer;//Filled at app execution MTL::ComputeCommandEncoder* commandEncoder;//Filled at app execution #endif + pfUINT dynamicBatch; pfUINT dirkKernelCounter; const char* dirkName; pfUINT dirkTypeFFT; @@ -789,7 +793,7 @@ typedef struct { PfContainer inputOffset; PfContainer kernelOffset; PfContainer outputOffset; - PfContainer currentBatch; + //PfContainer currentBatch; int reorderFourStep; int storeSharedComplexComponentsSeparately; int pushConstantsStructSize; @@ -797,7 +801,7 @@ typedef struct { int performPostCompilationInputOffset; int performPostCompilationOutputOffset; int performPostCompilationKernelOffset; - int performPostCompilationCurrentBatch; + //int performPostCompilationCurrentBatch; pfUINT inputBufferBlockNum; pfUINT inputBufferBlockSize; pfUINT outputBufferBlockNum; @@ -822,6 +826,7 @@ typedef struct { int swapComputeWorkGroupID; int convolutionStep; + int dynamicBatch; int symmetricKernel; int supportAxis; int cacheShuffle; @@ -843,6 +848,7 @@ typedef struct { int forceCallbackVersionRealTransforms; int numBuffersBound[10]; + int currentBatchBindingID; int convolutionBindingID; int LUTBindingID; int BluesteinConvolutionBindingID; @@ -1005,6 +1011,7 @@ typedef struct { //int outputType; PfContainer inputsStruct; PfContainer outputsStruct; + PfContainer batchStruct; PfContainer kernelStruct; PfContainer sdataStruct; PfContainer LUTStruct; @@ -1038,8 +1045,8 @@ typedef struct { pfUINT performPostCompilationKernelOffset; pfUINT kernelOffset; - pfUINT performPostCompilationCurrentBatch; - pfUINT currentBatch; + //pfUINT performPostCompilationCurrentBatch; + //pfUINT currentBatch; pfUINT structSize; } VkFFTPushConstantsLayout; From aed07ddc209ef50acf4140d190814b917c90c003 Mon Sep 17 00:00:00 2001 From: Dirk van den Bekerom Date: Wed, 19 Feb 2025 22:39:39 +0100 Subject: [PATCH 05/18] currentBatch limitable through uniform buf --- .../vkFFT_KernelStartEnd.h | 4 +- .../vkFFT_API_handles/vkFFT_CompileKernel.h | 76 ++++++++++++++----- .../vkFFT_API_handles/vkFFT_UpdateBuffers.h | 64 ++++++++++++++-- 3 files changed, 115 insertions(+), 29 deletions(-) diff --git a/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel0/vkFFT_KernelStartEnd.h b/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel0/vkFFT_KernelStartEnd.h index f16f95e..ab839f7 100644 --- a/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel0/vkFFT_KernelStartEnd.h +++ b/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel0/vkFFT_KernelStartEnd.h @@ -49,7 +49,7 @@ static inline void appendKernelStart(VkFFTSpecializationConstantsLayout* sc, int //DvdB if (sc->dynamicBatch){ - sc->tempLen = sprintf(sc->tempStr, "if (gl_WorkGroupID.y > currentBatch.N) return;\n"); + sc->tempLen = sprintf(sc->tempStr, "if (gl_WorkGroupID.y >= currentBatch.N) return;\n"); PfAppendLine(sc); } #elif(VKFFT_BACKEND==1) @@ -282,7 +282,7 @@ static inline void appendKernelStart_R2C(VkFFTSpecializationConstantsLayout* sc, //DvdB if (sc->dynamicBatch){ - sc->tempLen = sprintf(sc->tempStr, "if (gl_WorkGroupID.z > currentBatch.N) return;\n"); + sc->tempLen = sprintf(sc->tempStr, "if (gl_WorkGroupID.z >= currentBatch.N) return;\n"); PfAppendLine(sc); } #elif(VKFFT_BACKEND==1) diff --git a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_CompileKernel.h b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_CompileKernel.h index 2068647..8c94826 100644 --- a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_CompileKernel.h +++ b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_CompileKernel.h @@ -23,8 +23,9 @@ #define VKFFT_COMPILEKERNEL_H #include "vkFFT/vkFFT_Structs/vkFFT_Structs.h" #include //DvdB +#include //DvdB #include //DvdB - +using namespace std; static inline VkFFTResult VkFFT_CompileKernel(VkFFTApplication* app, VkFFTAxis* axis) { #if(VKFFT_BACKEND==0) @@ -164,22 +165,22 @@ static inline VkFFTResult VkFFT_CompileKernel(VkFFTApplication* app, VkFFTAxis* glslang_target_client_version_t client_version = (app->configuration.halfPrecision) ? GLSLANG_TARGET_VULKAN_1_1 : GLSLANG_TARGET_VULKAN_1_0; glslang_target_language_version_t target_language_version = (app->configuration.halfPrecision) ? GLSLANG_TARGET_SPV_1_3 : GLSLANG_TARGET_SPV_1_0; - glslang_input_t input = - { - GLSLANG_SOURCE_GLSL, - GLSLANG_STAGE_COMPUTE, - GLSLANG_CLIENT_VULKAN, - client_version, - GLSLANG_TARGET_SPV, - target_language_version, - code0, - 450, - GLSLANG_NO_PROFILE, - 1, - 0, - GLSLANG_MSG_DEFAULT_BIT, - (const glslang_resource_t*)&default_resource, - }; + //glslang_input_t input = + // { + // GLSLANG_SOURCE_GLSL, + // GLSLANG_STAGE_COMPUTE, + // GLSLANG_CLIENT_VULKAN, + // client_version, + // GLSLANG_TARGET_SPV, + // target_language_version, + // code0, + // 450, + // GLSLANG_NO_PROFILE, + // 1, + // 0, + // GLSLANG_MSG_DEFAULT_BIT, + // (const glslang_resource_t*)&default_resource, + // }; //DvdB @@ -194,20 +195,53 @@ static inline VkFFTResult VkFFT_CompileKernel(VkFFTApplication* app, VkFFTAxis* //fname += ".txt"; fname += std::to_string(app->configuration.dirkKernelCounter) +".txt"; + + + + // if (fname=="FFT1_kernel_3.txt"){ + // ifstream file("FFT1_kernel_3b.txt", ios::in | ios::binary | ios::ate); + + // unsigned int fileSize = file.tellg(); + // file.seekg(0, ios::beg); + + // char* buffer = new char[fileSize + 1]; + // file.read(buffer, fileSize); + // buffer[fileSize] = '\0'; + + // file.close(); + + // code0 = buffer; + + // } + myfile.open(fname); //app->configuration.dirkName = "DIRKDIRK"; //myfile << app->configuration.dirkName; myfile << "// kernel type: " << app->configuration.dirkTypeFFT <configuration.dirkKernelCounter++; - - - + glslang_input_t input = + { + GLSLANG_SOURCE_GLSL, + GLSLANG_STAGE_COMPUTE, + GLSLANG_CLIENT_VULKAN, + client_version, + GLSLANG_TARGET_SPV, + target_language_version, + code0, + 450, + GLSLANG_NO_PROFILE, + 1, + 0, + GLSLANG_MSG_DEFAULT_BIT, + (const glslang_resource_t*)&default_resource, + }; //printf("%s\n", code0); glslang_shader_t* shader = glslang_shader_create((const glslang_input_t*)&input); diff --git a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_UpdateBuffers.h b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_UpdateBuffers.h index 0a25fe9..840525d 100644 --- a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_UpdateBuffers.h +++ b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_UpdateBuffers.h @@ -235,6 +235,8 @@ static inline VkFFTResult VkFFTConfigureDescriptors(VkFFTApplication* app, VkFFT axis->specializationConstants.numBuffersBound[1] = (int)axis->specializationConstants.outputBufferBlockNum; axis->specializationConstants.numBuffersBound[2] = 0; axis->specializationConstants.numBuffersBound[3] = 0; + axis->specializationConstants.numBuffersBound[4] = 0; + #if(VKFFT_BACKEND==0) VkDescriptorPoolSize descriptorPoolSize = { VK_DESCRIPTOR_TYPE_STORAGE_BUFFER }; descriptorPoolSize.descriptorCount = (uint32_t)(axis->specializationConstants.inputBufferBlockNum + axis->specializationConstants.outputBufferBlockNum); @@ -569,20 +571,25 @@ static inline VkFFTResult VkFFTConfigureDescriptorsR2CMultiUploadDecomposition(V axis->specializationConstants.numBuffersBound[1] = (int)axis->specializationConstants.outputBufferBlockNum; axis->specializationConstants.numBuffersBound[2] = 0; axis->specializationConstants.numBuffersBound[3] = 0; + axis->specializationConstants.numBuffersBound[4] = 0; + #if(VKFFT_BACKEND==0) VkDescriptorPoolSize descriptorPoolSize = { VK_DESCRIPTOR_TYPE_STORAGE_BUFFER }; descriptorPoolSize.descriptorCount = (uint32_t)(axis->specializationConstants.numBuffersBound[0] + axis->specializationConstants.numBuffersBound[1]); #endif + if ((axis_id == (app->configuration.FFTdim-1)) && (axis_upload_id == 0) && (app->configuration.performConvolution)) { + axis->specializationConstants.convolutionBindingID = (int)axis->numBindings; axis->specializationConstants.numBuffersBound[axis->numBindings] = (int)axis->specializationConstants.kernelBlockNum; #if(VKFFT_BACKEND==0) descriptorPoolSize.descriptorCount += (uint32_t)axis->specializationConstants.kernelBlockNum; #endif axis->numBindings++; } - + if (app->configuration.dynamicBatch == 1) { + axis->specializationConstants.currentBatchBindingID = (int)axis->numBindings; axis->specializationConstants.numBuffersBound[axis->numBindings] = 1; #if(VKFFT_BACKEND==0) descriptorPoolSize.descriptorCount++; @@ -591,12 +598,17 @@ static inline VkFFTResult VkFFTConfigureDescriptorsR2CMultiUploadDecomposition(V } if (app->configuration.useLUT == 1) { + axis->specializationConstants.LUTBindingID = (int)axis->numBindings; axis->specializationConstants.numBuffersBound[axis->numBindings] = 1; #if(VKFFT_BACKEND==0) descriptorPoolSize.descriptorCount++; #endif axis->numBindings++; } + + + + #if(VKFFT_BACKEND==0) VkResult res = VK_SUCCESS; VkDescriptorPoolCreateInfo descriptorPoolCreateInfo = { VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO }; @@ -799,7 +811,7 @@ static inline VkFFTResult VkFFTUpdateBufferSet(VkFFTApplication* app, VkFFTPlan* axis->specializationConstants.outputOffset.type = 31; axis->specializationConstants.kernelOffset.type = 31; #if(VKFFT_BACKEND==0) - const VkDescriptorType descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + VkDescriptorType descriptorType; #endif for (pfUINT i = 0; i < axis->numBindings; ++i) { for (pfUINT j = 0; j < axis->specializationConstants.numBuffersBound[i]; ++j) { @@ -807,6 +819,7 @@ static inline VkFFTResult VkFFTUpdateBufferSet(VkFFTApplication* app, VkFFTPlan* VkDescriptorBufferInfo descriptorBufferInfo = { 0 }; #endif if (i == 0) { + descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; if ((axis_upload_id == FFTPlan->numAxisUploads[axis_id] - 1) && (app->configuration.isInputFormatted) && (!axis->specializationConstants.reverseBluesteinMultiUpload) && ( ((axis_id == app->firstAxis) && (!inverse)) || ((axis_id == app->lastAxis) && (inverse) && (!((axis_id == 0) && (axis->specializationConstants.performR2CmultiUpload))) && (!app->configuration.performConvolution) && (!app->configuration.inverseReturnToInputBuffer))) @@ -953,6 +966,7 @@ static inline VkFFTResult VkFFTUpdateBufferSet(VkFFTApplication* app, VkFFTPlan* //descriptorBufferInfo.offset = 0; } if (i == 1) { + descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; if (((axis_upload_id == 0) && (!app->useBluesteinFFT[axis_id]) && (app->configuration.isOutputFormatted && ( ((axis_id == app->firstAxis) && (inverse)) || ((axis_id == app->lastAxis) && (!inverse) && (!app->configuration.performConvolution)) @@ -1131,7 +1145,22 @@ static inline VkFFTResult VkFFTUpdateBufferSet(VkFFTApplication* app, VkFFTPlan* } if ((i == axis->specializationConstants.currentBatchBindingID) &&(app->configuration.dynamicBatch)){ #if(VKFFT_BACKEND==0) + descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + if (axis->specializationConstants.performBufferSetUpdate) { + + const char* dname = app->configuration.dirkName; + ofstream myfile; + std::string fname = ""; + + fname += dname ; + fname += "_update1.txt"; + + myfile.open(fname); + myfile << "test"; + myfile.close(); + + descriptorBufferInfo.buffer = app->configuration.currentBatchUBO; descriptorBufferInfo.offset = 0; descriptorBufferInfo.range = app->configuration.currentBatchUBOSize; @@ -1139,6 +1168,7 @@ static inline VkFFTResult VkFFTUpdateBufferSet(VkFFTApplication* app, VkFFTPlan* #endif } if ((i == axis->specializationConstants.convolutionBindingID) && (app->configuration.performConvolution)) { + descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; if (axis->specializationConstants.performBufferSetUpdate) { pfUINT bufferId = 0; pfUINT offset = j; @@ -1166,6 +1196,7 @@ static inline VkFFTResult VkFFTUpdateBufferSet(VkFFTApplication* app, VkFFTPlan* } if ((i == axis->specializationConstants.LUTBindingID) && (app->configuration.useLUT == 1)) { #if(VKFFT_BACKEND==0) + descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; if (axis->specializationConstants.performBufferSetUpdate) { descriptorBufferInfo.buffer = axis->bufferLUT; descriptorBufferInfo.offset = 0; @@ -1175,6 +1206,7 @@ static inline VkFFTResult VkFFTUpdateBufferSet(VkFFTApplication* app, VkFFTPlan* } if ((i == axis->specializationConstants.RaderUintLUTBindingID) && (axis->specializationConstants.raderUintLUT)) { #if(VKFFT_BACKEND==0) + descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; if (axis->specializationConstants.performBufferSetUpdate) { descriptorBufferInfo.buffer = axis->bufferRaderUintLUT; descriptorBufferInfo.offset = 0; @@ -1184,6 +1216,7 @@ static inline VkFFTResult VkFFTUpdateBufferSet(VkFFTApplication* app, VkFFTPlan* } if ((i == axis->specializationConstants.BluesteinConvolutionBindingID) && (app->useBluesteinFFT[axis_id]) && (axis_upload_id == 0)) { #if(VKFFT_BACKEND==0) + descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; if (axis->specializationConstants.performBufferSetUpdate) { if (axis->specializationConstants.inverseBluestein) descriptorBufferInfo.buffer = app->bufferBluesteinIFFT[axis_id]; @@ -1196,6 +1229,7 @@ static inline VkFFTResult VkFFTUpdateBufferSet(VkFFTApplication* app, VkFFTPlan* } if ((i == axis->specializationConstants.BluesteinMultiplicationBindingID) && (app->useBluesteinFFT[axis_id]) && (axis_upload_id == (FFTPlan->numAxisUploads[axis_id] - 1))) { #if(VKFFT_BACKEND==0) + descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; if (axis->specializationConstants.performBufferSetUpdate) { descriptorBufferInfo.buffer = app->bufferBluestein[axis_id]; descriptorBufferInfo.offset = 0; @@ -1229,7 +1263,7 @@ static inline VkFFTResult VkFFTUpdateBufferSet(VkFFTApplication* app, VkFFTPlan* static inline VkFFTResult VkFFTUpdateBufferSetR2CMultiUploadDecomposition(VkFFTApplication* app, VkFFTPlan* FFTPlan, VkFFTAxis* axis, pfUINT axis_id, pfUINT axis_upload_id, pfUINT inverse) { if (axis->specializationConstants.performOffsetUpdate || axis->specializationConstants.performBufferSetUpdate) { #if(VKFFT_BACKEND==0) - const VkDescriptorType descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + VkDescriptorType descriptorType; #endif for (pfUINT i = 0; i < axis->numBindings; ++i) { for (pfUINT j = 0; j < axis->specializationConstants.numBuffersBound[i]; ++j) { @@ -1237,6 +1271,7 @@ static inline VkFFTResult VkFFTUpdateBufferSetR2CMultiUploadDecomposition(VkFFTA VkDescriptorBufferInfo descriptorBufferInfo = { 0 }; #endif if (i == 0) { + descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; if (inverse) { if ((axis_upload_id == FFTPlan->numAxisUploads[axis_id] - 1) && (app->configuration.isInputFormatted) && (!axis->specializationConstants.reverseBluesteinMultiUpload) && ( ((axis_id == app->firstAxis) && (!inverse)) @@ -1395,6 +1430,7 @@ static inline VkFFTResult VkFFTUpdateBufferSetR2CMultiUploadDecomposition(VkFFTA } } if (i == 1) { + descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; if (inverse) { if ((axis_upload_id == 0) && (app->configuration.numberKernels > 1) && (inverse) && (!app->configuration.performConvolution)) { if (axis->specializationConstants.performBufferSetUpdate) { @@ -1547,7 +1583,8 @@ static inline VkFFTResult VkFFTUpdateBufferSetR2CMultiUploadDecomposition(VkFFTA } } } - if ((i == 2) && (app->configuration.performConvolution)) { + if ((i == axis->specializationConstants.convolutionBindingID) && (app->configuration.performConvolution)) { + descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; if (axis->specializationConstants.performBufferSetUpdate) { pfUINT bufferId = 0; pfUINT offset = j; @@ -1567,23 +1604,38 @@ static inline VkFFTResult VkFFTUpdateBufferSetR2CMultiUploadDecomposition(VkFFTA descriptorBufferInfo.buffer = app->configuration.kernel[bufferId]; descriptorBufferInfo.range = (axis->specializationConstants.kernelBlockSize); descriptorBufferInfo.offset = offset * (axis->specializationConstants.kernelBlockSize); + #endif } if (axis->specializationConstants.performOffsetUpdate) { axis->specializationConstants.kernelOffset.data.i = app->configuration.kernelOffset; } } - if ((i == 3) && (app->configuration.dynamicBatch == 1)) { + if ((i == axis->specializationConstants.currentBatchBindingID) && (app->configuration.dynamicBatch == 1)) { #if(VKFFT_BACKEND==0) + descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; if (axis->specializationConstants.performBufferSetUpdate) { + + const char* dname2 = app->configuration.dirkName; + ofstream myfile2; + std::string fname2 = ""; + + fname2 += dname2 ; + fname2 += "_update2.txt"; + + myfile2.open(fname2); + myfile2 << "test"; + myfile2.close(); + descriptorBufferInfo.buffer = app->configuration.currentBatchUBO; descriptorBufferInfo.offset = 0; descriptorBufferInfo.range = app->configuration.currentBatchUBOSize; } #endif } - if ((i == axis->numBindings - 1) && (app->configuration.useLUT == 1)) { + if ((i == axis->specializationConstants.LUTBindingID) && (app->configuration.useLUT == 1)) { #if(VKFFT_BACKEND==0) + descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; if (axis->specializationConstants.performBufferSetUpdate) { descriptorBufferInfo.buffer = axis->bufferLUT; descriptorBufferInfo.offset = 0; From 34008da316d9477c8e6568fed674da3a17364c17 Mon Sep 17 00:00:00 2001 From: Dirk van den Bekerom Date: Thu, 20 Feb 2025 12:21:12 +0100 Subject: [PATCH 06/18] add offset to currentBatch --- vkFFT/vkFFT/vkFFT_AppManagement/vkFFT_InitializeApp.h | 1 + .../vkFFT_API_handles/vkFFT_UpdateBuffers.h | 4 ++-- vkFFT/vkFFT/vkFFT_Structs/vkFFT_Structs.h | 1 + 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/vkFFT/vkFFT/vkFFT_AppManagement/vkFFT_InitializeApp.h b/vkFFT/vkFFT/vkFFT_AppManagement/vkFFT_InitializeApp.h index d7a66ae..80a22be 100644 --- a/vkFFT/vkFFT/vkFFT_AppManagement/vkFFT_InitializeApp.h +++ b/vkFFT/vkFFT/vkFFT_AppManagement/vkFFT_InitializeApp.h @@ -1178,6 +1178,7 @@ static inline VkFFTResult setConfigurationVkFFT(VkFFTApplication* app, VkFFTConf if (inputLaunchConfiguration.currentBatchUBOSize !=0){ app->configuration.currentBatchUBOSize = inputLaunchConfiguration.currentBatchUBOSize; app->configuration.currentBatchUBO = inputLaunchConfiguration.currentBatchUBO; + app->configuration.currentBatchUBOOffset = inputLaunchConfiguration.currentBatchUBOOffset; } if (inputLaunchConfiguration.bufferOffset != 0) app->configuration.bufferOffset = inputLaunchConfiguration.bufferOffset; diff --git a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_UpdateBuffers.h b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_UpdateBuffers.h index 840525d..9ddb9f1 100644 --- a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_UpdateBuffers.h +++ b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_UpdateBuffers.h @@ -1162,7 +1162,7 @@ static inline VkFFTResult VkFFTUpdateBufferSet(VkFFTApplication* app, VkFFTPlan* descriptorBufferInfo.buffer = app->configuration.currentBatchUBO; - descriptorBufferInfo.offset = 0; + descriptorBufferInfo.offset = app->configuration.currentBatchUBOOffset; descriptorBufferInfo.range = app->configuration.currentBatchUBOSize; } #endif @@ -1628,7 +1628,7 @@ static inline VkFFTResult VkFFTUpdateBufferSetR2CMultiUploadDecomposition(VkFFTA myfile2.close(); descriptorBufferInfo.buffer = app->configuration.currentBatchUBO; - descriptorBufferInfo.offset = 0; + descriptorBufferInfo.offset = app->configuration.currentBatchUBOOffset; descriptorBufferInfo.range = app->configuration.currentBatchUBOSize; } #endif diff --git a/vkFFT/vkFFT/vkFFT_Structs/vkFFT_Structs.h b/vkFFT/vkFFT/vkFFT_Structs/vkFFT_Structs.h index c28e58e..8fd5191 100644 --- a/vkFFT/vkFFT/vkFFT_Structs/vkFFT_Structs.h +++ b/vkFFT/vkFFT/vkFFT_Structs/vkFFT_Structs.h @@ -189,6 +189,7 @@ typedef struct { pfUINT inputBufferOffset;//specify if VkFFT has to offset the first element position inside the input buffer. In bytes. Default 0 pfUINT outputBufferOffset;//specify if VkFFT has to offset the first element position inside the output buffer. In bytes. Default 0 pfUINT kernelOffset;//specify if VkFFT has to offset the first element position inside the kernel. In bytes. Default 0 + pfUINT currentBatchUBOOffset; pfUINT specifyOffsetsAtLaunch;//specify if offsets will be selected with launch parameters VkFFTLaunchParams (0 - off, 1 - on). Default 0 //optional: (default 0 if not stated otherwise) From 124c0449e9097d784f96ba639e6d9d8b0757492e Mon Sep 17 00:00:00 2001 From: Dirk van den Bekerom Date: Fri, 21 Feb 2025 18:05:07 +0100 Subject: [PATCH 07/18] dynamicBatch=2 for separate fwd/inv batch number --- .../vkFFT_AppManagement/vkFFT_InitializeApp.h | 2 +- .../vkFFT_KernelStartEnd.h | 28 +++++++++++++++++-- .../vkFFT_KernelsLevel0/vkFFT_KernelUtils.h | 2 +- .../vkFFT_InputOutputLayout.h | 14 ++++++++-- .../vkFFT_API_handles/vkFFT_UpdateBuffers.h | 12 ++++---- .../vkFFT_Plans/vkFFT_Plan_FFT.h | 2 +- .../vkFFT_Plans/vkFFT_Plan_R2C.h | 2 +- 7 files changed, 47 insertions(+), 15 deletions(-) diff --git a/vkFFT/vkFFT/vkFFT_AppManagement/vkFFT_InitializeApp.h b/vkFFT/vkFFT/vkFFT_AppManagement/vkFFT_InitializeApp.h index 80a22be..355959b 100644 --- a/vkFFT/vkFFT/vkFFT_AppManagement/vkFFT_InitializeApp.h +++ b/vkFFT/vkFFT/vkFFT_AppManagement/vkFFT_InitializeApp.h @@ -967,7 +967,7 @@ static inline VkFFTResult setConfigurationVkFFT(VkFFTApplication* app, VkFFTConf #endif //DvdB - if (inputLaunchConfiguration.dynamicBatch != 0) app->configuration.dynamicBatch = 1; + if (inputLaunchConfiguration.dynamicBatch != 0) app->configuration.dynamicBatch = inputLaunchConfiguration.dynamicBatch; app->configuration.dirkKernelCounter = 0; app->configuration.dirkDispatchCounter = 0; diff --git a/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel0/vkFFT_KernelStartEnd.h b/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel0/vkFFT_KernelStartEnd.h index ab839f7..3ffb424 100644 --- a/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel0/vkFFT_KernelStartEnd.h +++ b/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel0/vkFFT_KernelStartEnd.h @@ -48,10 +48,22 @@ static inline void appendKernelStart(VkFFTSpecializationConstantsLayout* sc, int PfAppendLine(sc); //DvdB - if (sc->dynamicBatch){ + if (sc->dynamicBatch == 1){ sc->tempLen = sprintf(sc->tempStr, "if (gl_WorkGroupID.y >= currentBatch.N) return;\n"); PfAppendLine(sc); } + else if (sc->dynamicBatch == 2) + { + if (sc->inverse == 1){ + sc->tempLen = sprintf(sc->tempStr, "if (gl_WorkGroupID.y >= currentBatch.Ninv) return;\n"); + PfAppendLine(sc); + } + else + { + sc->tempLen = sprintf(sc->tempStr, "if (gl_WorkGroupID.y >= currentBatch.Nfwd) return;\n"); + PfAppendLine(sc); + } + } #elif(VKFFT_BACKEND==1) sc->tempLen = sprintf(sc->tempStr, "extern __shared__ float shared[];\n"); PfAppendLine(sc); @@ -281,10 +293,22 @@ static inline void appendKernelStart_R2C(VkFFTSpecializationConstantsLayout* sc, PfAppendLine(sc); //DvdB - if (sc->dynamicBatch){ + if (sc->dynamicBatch == 1){ sc->tempLen = sprintf(sc->tempStr, "if (gl_WorkGroupID.z >= currentBatch.N) return;\n"); PfAppendLine(sc); } + else if (sc->dynamicBatch == 2) + { + if (sc->inverse == 1){ + sc->tempLen = sprintf(sc->tempStr, "if (gl_WorkGroupID.z >= currentBatch.Ninv) return;\n"); + PfAppendLine(sc); + } + else + { + sc->tempLen = sprintf(sc->tempStr, "if (gl_WorkGroupID.z >= currentBatch.Nfwd) return;\n"); + PfAppendLine(sc); + } + } #elif(VKFFT_BACKEND==1) sc->tempLen = sprintf(sc->tempStr, "extern \"C\" __global__ void __launch_bounds__(%" PRIi64 ") VkFFT_main_R2C ", sc->localSize[0].data.i * sc->localSize[1].data.i * sc->localSize[2].data.i); diff --git a/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel0/vkFFT_KernelUtils.h b/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel0/vkFFT_KernelUtils.h index c984b27..e0cb4f6 100644 --- a/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel0/vkFFT_KernelUtils.h +++ b/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel0/vkFFT_KernelUtils.h @@ -80,7 +80,7 @@ static inline void appendExtensions(VkFFTSpecializationConstantsLayout* sc) { sc->tempLen = sprintf(sc->tempStr, "#extension GL_EXT_shader_16bit_storage : require\n\n"); PfAppendLine(sc); } - if (sc->dynamicBatch) { + if (sc->dynamicBatch) { //Allow for std430 uniform blocks sc->tempLen = sprintf(sc->tempStr, "#extension GL_EXT_scalar_block_layout : enable\n\n"); PfAppendLine(sc); } diff --git a/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel0/vkFFT_MemoryManagement/vkFFT_MemoryInitialization/vkFFT_InputOutputLayout.h b/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel0/vkFFT_MemoryManagement/vkFFT_MemoryInitialization/vkFFT_InputOutputLayout.h index 0df3b28..e947813 100644 --- a/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel0/vkFFT_MemoryManagement/vkFFT_MemoryInitialization/vkFFT_InputOutputLayout.h +++ b/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel0/vkFFT_MemoryManagement/vkFFT_MemoryInitialization/vkFFT_InputOutputLayout.h @@ -111,11 +111,19 @@ static inline void appendCurrentBatchVkFFT(VkFFTSpecializationConstantsLayout* s PfGetTypeFromCode(sc, sc->uintType32Code, &uintType32); #if(VKFFT_BACKEND==0) - + if (sc->dynamicBatch==1){ sc->tempLen = sprintf(sc->tempStr, "\ -layout(std140, binding = %d) uniform UniformBufferObject{\n\ +layout(std430, binding = %d) uniform UniformBufferObject{\n\ %s N;\n\ -} currentBatch;\n\n", id, uintType32->name); + } currentBatch;\n\n", id, uintType32->name); + } + else if (sc->dynamicBatch==2){ + sc->tempLen = sprintf(sc->tempStr, "\ +layout(std430, binding = %d) uniform UniformBufferObject{\n\ + %s Nfwd;\n\ + %s Ninv;\n\ + } currentBatch;\n\n", id, uintType32->name, uintType32->name); + } PfAppendLine(sc); #elif(VKFFT_BACKEND==1) diff --git a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_UpdateBuffers.h b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_UpdateBuffers.h index 9ddb9f1..29c0b87 100644 --- a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_UpdateBuffers.h +++ b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_UpdateBuffers.h @@ -243,7 +243,7 @@ static inline VkFFTResult VkFFTConfigureDescriptors(VkFFTApplication* app, VkFFT #endif - if (app->configuration.dynamicBatch == 1) { + if (app->configuration.dynamicBatch >= 1) { axis->specializationConstants.currentBatchBindingID = (int)axis->numBindings; axis->specializationConstants.numBuffersBound[axis->numBindings] = 1; #if(VKFFT_BACKEND==0) @@ -588,7 +588,7 @@ static inline VkFFTResult VkFFTConfigureDescriptorsR2CMultiUploadDecomposition(V axis->numBindings++; } - if (app->configuration.dynamicBatch == 1) { + if (app->configuration.dynamicBatch >= 1) { axis->specializationConstants.currentBatchBindingID = (int)axis->numBindings; axis->specializationConstants.numBuffersBound[axis->numBindings] = 1; #if(VKFFT_BACKEND==0) @@ -1143,7 +1143,7 @@ static inline VkFFTResult VkFFTUpdateBufferSet(VkFFTApplication* app, VkFFTPlan* } //descriptorBufferInfo.offset = 0; } - if ((i == axis->specializationConstants.currentBatchBindingID) &&(app->configuration.dynamicBatch)){ + if ((i == axis->specializationConstants.currentBatchBindingID) && (app->configuration.dynamicBatch)){ #if(VKFFT_BACKEND==0) descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; @@ -1163,7 +1163,7 @@ static inline VkFFTResult VkFFTUpdateBufferSet(VkFFTApplication* app, VkFFTPlan* descriptorBufferInfo.buffer = app->configuration.currentBatchUBO; descriptorBufferInfo.offset = app->configuration.currentBatchUBOOffset; - descriptorBufferInfo.range = app->configuration.currentBatchUBOSize; + descriptorBufferInfo.range = app->configuration.currentBatchUBOSize; } #endif } @@ -1611,7 +1611,7 @@ static inline VkFFTResult VkFFTUpdateBufferSetR2CMultiUploadDecomposition(VkFFTA axis->specializationConstants.kernelOffset.data.i = app->configuration.kernelOffset; } } - if ((i == axis->specializationConstants.currentBatchBindingID) && (app->configuration.dynamicBatch == 1)) { + if ((i == axis->specializationConstants.currentBatchBindingID) && (app->configuration.dynamicBatch)) { #if(VKFFT_BACKEND==0) descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; if (axis->specializationConstants.performBufferSetUpdate) { @@ -1629,7 +1629,7 @@ static inline VkFFTResult VkFFTUpdateBufferSetR2CMultiUploadDecomposition(VkFFTA descriptorBufferInfo.buffer = app->configuration.currentBatchUBO; descriptorBufferInfo.offset = app->configuration.currentBatchUBOOffset; - descriptorBufferInfo.range = app->configuration.currentBatchUBOSize; + descriptorBufferInfo.range = app->configuration.currentBatchUBOSize; } #endif } diff --git a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_Plans/vkFFT_Plan_FFT.h b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_Plans/vkFFT_Plan_FFT.h index 70dd684..0ce596d 100644 --- a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_Plans/vkFFT_Plan_FFT.h +++ b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_Plans/vkFFT_Plan_FFT.h @@ -564,7 +564,7 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla axis->specializationConstants.zeropad[1] = 0; } if (app->configuration.dynamicBatch){ - axis->specializationConstants.dynamicBatch = 1; + axis->specializationConstants.dynamicBatch = app->configuration.dynamicBatch; } if ((app->configuration.FFTdim - 1 == axis_id) && (axis_upload_id == 0) && (app->configuration.performConvolution)) { axis->specializationConstants.convolutionStep = 1; diff --git a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_Plans/vkFFT_Plan_R2C.h b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_Plans/vkFFT_Plan_R2C.h index d56490e..9ce66a0 100644 --- a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_Plans/vkFFT_Plan_R2C.h +++ b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_Plans/vkFFT_Plan_R2C.h @@ -259,7 +259,7 @@ static inline VkFFTResult VkFFTPlanR2CMultiUploadDecomposition(VkFFTApplication* axis->specializationConstants.zeropad[1] = 0; }*/ if (app->configuration.dynamicBatch){ - axis->specializationConstants.dynamicBatch = 1; + axis->specializationConstants.dynamicBatch = app->configuration.dynamicBatch; } if ((app->configuration.FFTdim == 1) && (app->configuration.performConvolution)) { axis->specializationConstants.convolutionStep = 1; From abf2e050775cc102def7670c6e6c0f84584c2cc6 Mon Sep 17 00:00:00 2001 From: Dirk van den Bekerom Date: Fri, 21 Feb 2025 19:12:20 +0100 Subject: [PATCH 08/18] remove debug lines to prepare for pr --- .../vkFFT_AppManagement/vkFFT_InitializeApp.h | 7 -- .../vkFFT_KernelStartEnd.h | 2 - .../vkFFT_InputOutputLayout.h | 16 +---- .../vkFFT_API_handles/vkFFT_CompileKernel.h | 64 ------------------- .../vkFFT_API_handles/vkFFT_DispatchPlan.h | 21 ------ .../vkFFT_API_handles/vkFFT_UpdateBuffers.h | 25 -------- .../vkFFT_Plans/vkFFT_Plan_FFT.h | 1 - .../vkFFT_Plans/vkFFT_Plan_R2C.h | 1 - vkFFT/vkFFT/vkFFT_Structs/vkFFT_Structs.h | 20 ++---- 9 files changed, 7 insertions(+), 150 deletions(-) diff --git a/vkFFT/vkFFT/vkFFT_AppManagement/vkFFT_InitializeApp.h b/vkFFT/vkFFT/vkFFT_AppManagement/vkFFT_InitializeApp.h index 355959b..bb3a0a0 100644 --- a/vkFFT/vkFFT/vkFFT_AppManagement/vkFFT_InitializeApp.h +++ b/vkFFT/vkFFT/vkFFT_AppManagement/vkFFT_InitializeApp.h @@ -966,15 +966,8 @@ static inline VkFFTResult setConfigurationVkFFT(VkFFTApplication* app, VkFFTConf compileOptions->release(); #endif -//DvdB if (inputLaunchConfiguration.dynamicBatch != 0) app->configuration.dynamicBatch = inputLaunchConfiguration.dynamicBatch; - app->configuration.dirkKernelCounter = 0; - app->configuration.dirkDispatchCounter = 0; - if (inputLaunchConfiguration.dirkName != nullptr){ - app->configuration.dirkName = inputLaunchConfiguration.dirkName; - } - resFFT = initializeBluesteinAutoPadding(app); if (resFFT != VKFFT_SUCCESS) { deleteVkFFT(app); diff --git a/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel0/vkFFT_KernelStartEnd.h b/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel0/vkFFT_KernelStartEnd.h index 3ffb424..0ce7210 100644 --- a/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel0/vkFFT_KernelStartEnd.h +++ b/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel0/vkFFT_KernelStartEnd.h @@ -47,7 +47,6 @@ static inline void appendKernelStart(VkFFTSpecializationConstantsLayout* sc, int sc->tempLen = sprintf(sc->tempStr, "void main() {\n"); PfAppendLine(sc); - //DvdB if (sc->dynamicBatch == 1){ sc->tempLen = sprintf(sc->tempStr, "if (gl_WorkGroupID.y >= currentBatch.N) return;\n"); PfAppendLine(sc); @@ -292,7 +291,6 @@ static inline void appendKernelStart_R2C(VkFFTSpecializationConstantsLayout* sc, sc->tempLen = sprintf(sc->tempStr, "void main() {\n"); PfAppendLine(sc); - //DvdB if (sc->dynamicBatch == 1){ sc->tempLen = sprintf(sc->tempStr, "if (gl_WorkGroupID.z >= currentBatch.N) return;\n"); PfAppendLine(sc); diff --git a/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel0/vkFFT_MemoryManagement/vkFFT_MemoryInitialization/vkFFT_InputOutputLayout.h b/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel0/vkFFT_MemoryManagement/vkFFT_MemoryInitialization/vkFFT_InputOutputLayout.h index e947813..7517dff 100644 --- a/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel0/vkFFT_MemoryManagement/vkFFT_MemoryInitialization/vkFFT_InputOutputLayout.h +++ b/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel0/vkFFT_MemoryManagement/vkFFT_MemoryInitialization/vkFFT_InputOutputLayout.h @@ -41,16 +41,10 @@ static inline void appendInputLayoutVkFFT(VkFFTSpecializationConstantsLayout* sc int typeSize = ((sc->inputMemoryCode % 10) == 3) ? sc->complexSize : sc->complexSize / 2; #if(VKFFT_BACKEND==0) if (sc->inputBufferBlockNum == 1) { -/* sc->tempLen = sprintf(sc->tempStr, "\ -layout(std430, binding = %d) buffer DataIn{\n\ - %s inputs[%" PRIu64 "];\n\ -};\n\n", id, inputMemoryType->name, sc->inputBufferBlockSize / typeSize); -*/ -//DvdB sc->tempLen = sprintf(sc->tempStr, "\ layout(std430, binding = %d) buffer DataIn{\n\ %s inputs[];\n\ -};\n\n", id, inputMemoryType->name); +};\n\n", id, inputMemoryType->name); // use runtime-sized arrays so that the same shader can be reused for different batch numbers PfAppendLine(sc); @@ -76,16 +70,10 @@ static inline void appendOutputLayoutVkFFT(VkFFTSpecializationConstantsLayout* s int typeSize = ((sc->outputMemoryCode % 10) == 3) ? sc->complexSize : sc->complexSize / 2; #if(VKFFT_BACKEND==0) if (sc->inputBufferBlockNum == 1) { -/* sc->tempLen = sprintf(sc->tempStr, "\ -layout(std430, binding = %d) buffer DataOut{\n\ - %s outputs[%" PRIu64 "];\n\ -};\n\n", id, outputMemoryType->name, sc->outputBufferBlockSize / typeSize); -*/ -//DvdB sc->tempLen = sprintf(sc->tempStr, "\ layout(std430, binding = %d) buffer DataOut{\n\ %s outputs[];\n\ -};\n\n", id, outputMemoryType->name); +};\n\n", id, outputMemoryType->name); // use runtime-sized arrays so that the same shader can be reused for different batch numbers PfAppendLine(sc); diff --git a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_CompileKernel.h b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_CompileKernel.h index 8c94826..276ad34 100644 --- a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_CompileKernel.h +++ b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_CompileKernel.h @@ -22,9 +22,6 @@ #ifndef VKFFT_COMPILEKERNEL_H #define VKFFT_COMPILEKERNEL_H #include "vkFFT/vkFFT_Structs/vkFFT_Structs.h" -#include //DvdB -#include //DvdB -#include //DvdB using namespace std; static inline VkFFTResult VkFFT_CompileKernel(VkFFTApplication* app, VkFFTAxis* axis) { @@ -165,67 +162,6 @@ static inline VkFFTResult VkFFT_CompileKernel(VkFFTApplication* app, VkFFTAxis* glslang_target_client_version_t client_version = (app->configuration.halfPrecision) ? GLSLANG_TARGET_VULKAN_1_1 : GLSLANG_TARGET_VULKAN_1_0; glslang_target_language_version_t target_language_version = (app->configuration.halfPrecision) ? GLSLANG_TARGET_SPV_1_3 : GLSLANG_TARGET_SPV_1_0; - //glslang_input_t input = - // { - // GLSLANG_SOURCE_GLSL, - // GLSLANG_STAGE_COMPUTE, - // GLSLANG_CLIENT_VULKAN, - // client_version, - // GLSLANG_TARGET_SPV, - // target_language_version, - // code0, - // 450, - // GLSLANG_NO_PROFILE, - // 1, - // 0, - // GLSLANG_MSG_DEFAULT_BIT, - // (const glslang_resource_t*)&default_resource, - // }; - - - //DvdB - - const char* dname = app->configuration.dirkName; - //const char* teststr = "HELLOOOOOOO"; - ofstream myfile; - std::string fname = ""; - - fname += dname ; - fname += "_kernel_"; - //fname += ".txt"; - fname += std::to_string(app->configuration.dirkKernelCounter) +".txt"; - - - - - // if (fname=="FFT1_kernel_3.txt"){ - // ifstream file("FFT1_kernel_3b.txt", ios::in | ios::binary | ios::ate); - - // unsigned int fileSize = file.tellg(); - // file.seekg(0, ios::beg); - - // char* buffer = new char[fileSize + 1]; - // file.read(buffer, fileSize); - // buffer[fileSize] = '\0'; - - // file.close(); - - // code0 = buffer; - - // } - - myfile.open(fname); - //app->configuration.dirkName = "DIRKDIRK"; - //myfile << app->configuration.dirkName; - myfile << "// kernel type: " << app->configuration.dirkTypeFFT <configuration.dirkKernelCounter++; - - - glslang_input_t input = { GLSLANG_SOURCE_GLSL, diff --git a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_DispatchPlan.h b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_DispatchPlan.h index 3b35baf..a214e2a 100644 --- a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_DispatchPlan.h +++ b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_DispatchPlan.h @@ -59,27 +59,6 @@ static inline VkFFTResult VkFFT_DispatchPlan(VkFFTApplication* app, VkFFTAxis* a axis->updatePushConstants = 1; } - - - const char* dname = app->configuration.dirkName; - ofstream myfile; - std::string fname = ""; - - fname += dname ; - fname += "_dispatch_"; - fname += std::to_string(app->configuration.dirkDispatchCounter); - fname += ".txt"; - - myfile.open(fname); - myfile << "Block0: "<< dispatchBlock[0] <configuration.dirkDispatchCounter++; - - //printf("%" PRIu64 " %" PRIu64 " %" PRIu64 "\n", dispatchBlock[0], dispatchBlock[1], dispatchBlock[2]); //printf("%" PRIu64 " %" PRIu64 " %" PRIu64 "\n", blockNumber[0], blockNumber[1], blockNumber[2]); for (pfUINT i = 0; i < 3; i++) diff --git a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_UpdateBuffers.h b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_UpdateBuffers.h index 29c0b87..e607baa 100644 --- a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_UpdateBuffers.h +++ b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_UpdateBuffers.h @@ -1148,19 +1148,6 @@ static inline VkFFTResult VkFFTUpdateBufferSet(VkFFTApplication* app, VkFFTPlan* descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; if (axis->specializationConstants.performBufferSetUpdate) { - - const char* dname = app->configuration.dirkName; - ofstream myfile; - std::string fname = ""; - - fname += dname ; - fname += "_update1.txt"; - - myfile.open(fname); - myfile << "test"; - myfile.close(); - - descriptorBufferInfo.buffer = app->configuration.currentBatchUBO; descriptorBufferInfo.offset = app->configuration.currentBatchUBOOffset; descriptorBufferInfo.range = app->configuration.currentBatchUBOSize; @@ -1615,18 +1602,6 @@ static inline VkFFTResult VkFFTUpdateBufferSetR2CMultiUploadDecomposition(VkFFTA #if(VKFFT_BACKEND==0) descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; if (axis->specializationConstants.performBufferSetUpdate) { - - const char* dname2 = app->configuration.dirkName; - ofstream myfile2; - std::string fname2 = ""; - - fname2 += dname2 ; - fname2 += "_update2.txt"; - - myfile2.open(fname2); - myfile2 << "test"; - myfile2.close(); - descriptorBufferInfo.buffer = app->configuration.currentBatchUBO; descriptorBufferInfo.offset = app->configuration.currentBatchUBOOffset; descriptorBufferInfo.range = app->configuration.currentBatchUBOSize; diff --git a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_Plans/vkFFT_Plan_FFT.h b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_Plans/vkFFT_Plan_FFT.h index 0ce596d..c4e7bad 100644 --- a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_Plans/vkFFT_Plan_FFT.h +++ b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_Plans/vkFFT_Plan_FFT.h @@ -781,7 +781,6 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla deleteVkFFT(app); return resFFT; } - app->configuration.dirkTypeFFT = 100000 + 1000*axis_id + 100*axis_upload_id+10*inverse+reverseBluesteinMultiUpload; resFFT = VkFFT_CompileKernel(app, axis); if (resFFT != VKFFT_SUCCESS) { deleteVkFFT(app); diff --git a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_Plans/vkFFT_Plan_R2C.h b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_Plans/vkFFT_Plan_R2C.h index 9ce66a0..c59be43 100644 --- a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_Plans/vkFFT_Plan_R2C.h +++ b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_Plans/vkFFT_Plan_R2C.h @@ -367,7 +367,6 @@ static inline VkFFTResult VkFFTPlanR2CMultiUploadDecomposition(VkFFTApplication* deleteVkFFT(app); return resFFT; } - app->configuration.dirkTypeFFT = 200000; resFFT = VkFFT_CompileKernel(app, axis); if (resFFT != VKFFT_SUCCESS) { deleteVkFFT(app); diff --git a/vkFFT/vkFFT/vkFFT_Structs/vkFFT_Structs.h b/vkFFT/vkFFT/vkFFT_Structs/vkFFT_Structs.h index 8fd5191..af47631 100644 --- a/vkFFT/vkFFT/vkFFT_Structs/vkFFT_Structs.h +++ b/vkFFT/vkFFT/vkFFT_Structs/vkFFT_Structs.h @@ -143,15 +143,15 @@ typedef struct { pfUINT* inputBufferSize;//array of input buffers sizes in bytes, if isInputFormatted is enabled pfUINT* outputBufferSize;//array of output buffers sizes in bytes, if isOutputFormatted is enabled pfUINT* kernelSize;//array of kernel buffers sizes in bytes, if performConvolution is enabled - pfUINT currentBatchUBOSize; - + pfUINT currentBatchUBOSize; //size of the uniform buffer containing the number of batches to perform. Set to 4 if dynamicBatch=1 and set to 8 if dynamicBatch=2. + #if(VKFFT_BACKEND==0) VkBuffer* buffer;//pointer to array of buffers (or one buffer) used for computations VkBuffer* tempBuffer;//needed if reorderFourStep is enabled to transpose the array. Same sum size or bigger as buffer (can be split in multiple). Default 0. Setting to non zero value enables manual user allocation VkBuffer* inputBuffer;//pointer to array of input buffers (or one buffer) used to read data from if isInputFormatted is enabled VkBuffer* outputBuffer;//pointer to array of output buffers (or one buffer) used for write data to if isOutputFormatted is enabled VkBuffer* kernel;//pointer to array of kernel buffers (or one buffer) used for read kernel data from if performConvolution is enabled - VkBuffer currentBatchUBO; + VkBuffer currentBatchUBO; //uniform buffer that contains the number of batches to perform when dynamicBatch >= 1. #elif(VKFFT_BACKEND==1) void** buffer;//pointer to device buffer used for computations @@ -189,7 +189,7 @@ typedef struct { pfUINT inputBufferOffset;//specify if VkFFT has to offset the first element position inside the input buffer. In bytes. Default 0 pfUINT outputBufferOffset;//specify if VkFFT has to offset the first element position inside the output buffer. In bytes. Default 0 pfUINT kernelOffset;//specify if VkFFT has to offset the first element position inside the kernel. In bytes. Default 0 - pfUINT currentBatchUBOOffset; + pfUINT currentBatchUBOOffset; //byte offset of the number of batches in the within the currentBatchUBO pfUINT specifyOffsetsAtLaunch;//specify if offsets will be selected with launch parameters VkFFTLaunchParams (0 - off, 1 - on). Default 0 //optional: (default 0 if not stated otherwise) @@ -325,12 +325,7 @@ typedef struct { MTL::CommandBuffer* commandBuffer;//Filled at app execution MTL::ComputeCommandEncoder* commandEncoder;//Filled at app execution #endif - pfUINT dynamicBatch; - pfUINT dirkKernelCounter; - const char* dirkName; - pfUINT dirkTypeFFT; - pfUINT dirkCurAxis; - pfUINT dirkDispatchCounter; + pfUINT dynamicBatch; //set to 1 to dynamically limit the number of batches using the currentBatchUBO buffer. set to 2 for different numbers for fwd and inv. } VkFFTConfiguration;//parameters specified at plan creation typedef struct { @@ -794,7 +789,6 @@ typedef struct { PfContainer inputOffset; PfContainer kernelOffset; PfContainer outputOffset; - //PfContainer currentBatch; int reorderFourStep; int storeSharedComplexComponentsSeparately; int pushConstantsStructSize; @@ -802,7 +796,6 @@ typedef struct { int performPostCompilationInputOffset; int performPostCompilationOutputOffset; int performPostCompilationKernelOffset; - //int performPostCompilationCurrentBatch; pfUINT inputBufferBlockNum; pfUINT inputBufferBlockSize; pfUINT outputBufferBlockNum; @@ -1046,9 +1039,6 @@ typedef struct { pfUINT performPostCompilationKernelOffset; pfUINT kernelOffset; - //pfUINT performPostCompilationCurrentBatch; - //pfUINT currentBatch; - pfUINT structSize; } VkFFTPushConstantsLayout; From d293f15c26b46cdabf72f637bdea7ef2c95cd3a4 Mon Sep 17 00:00:00 2001 From: Dirk van den Bekerom Date: Sat, 22 Feb 2025 19:10:49 +0100 Subject: [PATCH 09/18] indirect dispatch --- .../vkFFT_AppManagement/vkFFT_InitializeApp.h | 7 ++++++- .../vkFFT_API_handles/vkFFT_DispatchPlan.h | 19 +++++++++++++++++-- vkFFT/vkFFT/vkFFT_Structs/vkFFT_Structs.h | 6 +++++- 3 files changed, 28 insertions(+), 4 deletions(-) diff --git a/vkFFT/vkFFT/vkFFT_AppManagement/vkFFT_InitializeApp.h b/vkFFT/vkFFT/vkFFT_AppManagement/vkFFT_InitializeApp.h index bb3a0a0..c91b120 100644 --- a/vkFFT/vkFFT/vkFFT_AppManagement/vkFFT_InitializeApp.h +++ b/vkFFT/vkFFT/vkFFT_AppManagement/vkFFT_InitializeApp.h @@ -967,7 +967,12 @@ static inline VkFFTResult setConfigurationVkFFT(VkFFTApplication* app, VkFFTConf #endif if (inputLaunchConfiguration.dynamicBatch != 0) app->configuration.dynamicBatch = inputLaunchConfiguration.dynamicBatch; - + if (inputLaunchConfiguration.indirectDispatch != 0) { + app->configuration.indirectDispatch = inputLaunchConfiguration.indirectDispatch; + app->configuration.indirectBuffer = inputLaunchConfiguration.indirectBuffer; + app->configuration.indirectBufferOffset = inputLaunchConfiguration.indirectBufferOffset; + app->configuration.indirectHostPointer = inputLaunchConfiguration.indirectHostPointer; + } resFFT = initializeBluesteinAutoPadding(app); if (resFFT != VKFFT_SUCCESS) { deleteVkFFT(app); diff --git a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_DispatchPlan.h b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_DispatchPlan.h index a214e2a..f4b81ab 100644 --- a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_DispatchPlan.h +++ b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_DispatchPlan.h @@ -62,7 +62,7 @@ static inline VkFFTResult VkFFT_DispatchPlan(VkFFTApplication* app, VkFFTAxis* a //printf("%" PRIu64 " %" PRIu64 " %" PRIu64 "\n", dispatchBlock[0], dispatchBlock[1], dispatchBlock[2]); //printf("%" PRIu64 " %" PRIu64 " %" PRIu64 "\n", blockNumber[0], blockNumber[1], blockNumber[2]); for (pfUINT i = 0; i < 3; i++) - if (blockNumber[i] == 1) blockSize[i] = dispatchBlock[i]; + if (blockNumber[i] == 1) blockSize[i] = dispatchBlock[i]; for (pfUINT i = 0; i < blockNumber[0]; i++) { for (pfUINT j = 0; j < blockNumber[1]; j++) { for (pfUINT k = 0; k < blockNumber[2]; k++) { @@ -168,7 +168,22 @@ static inline VkFFTResult VkFFT_DispatchPlan(VkFFTApplication* app, VkFFTAxis* a if (axis->pushConstants.structSize > 0) { vkCmdPushConstants(app->configuration.commandBuffer[0], axis->pipelineLayout, VK_SHADER_STAGE_COMPUTE_BIT, 0, (uint32_t)axis->pushConstants.structSize, axis->pushConstants.data); } - vkCmdDispatch(app->configuration.commandBuffer[0], (uint32_t)dispatchSize[0], (uint32_t)dispatchSize[1], (uint32_t)dispatchSize[2]); + bool indirect_dispatch = (app->configuration.indirectDispatch && app->configuration.indirectHostPointer != nullptr); + pfUINT indirect_offset; + //auto sc = axis->specializationConstants.inverse; + if (indirect_dispatch){ + unsigned int* host_indirect = (unsigned int*)((char*)app->configuration.indirectHostPointer + app->configuration.indirectBufferOffset + app->indirectDispatchID*16); + host_indirect[0] = (uint32_t)dispatchSize[0]; + host_indirect[1] = (uint32_t)dispatchSize[1]; + host_indirect[2] = (uint32_t)dispatchSize[2]; + host_indirect[3] = axis->specializationConstants.inverse + axis->specializationConstants.performR2CmultiUpload * 2;// + axis->specializationConstants.axis_upload_id * 10 + axis->specializationConstants.axis_id; + indirect_offset = app->configuration.indirectBufferOffset + 16*app->indirectDispatchID; + vkCmdDispatchIndirect(app->configuration.commandBuffer[0], app->configuration.indirectBuffer, indirect_offset); + app->indirectDispatchID++; + } + else { + vkCmdDispatch(app->configuration.commandBuffer[0], (uint32_t)dispatchSize[0], (uint32_t)dispatchSize[1], (uint32_t)dispatchSize[2]); + } #elif(VKFFT_BACKEND==1) void* args[10]; CUresult result = CUDA_SUCCESS; diff --git a/vkFFT/vkFFT/vkFFT_Structs/vkFFT_Structs.h b/vkFFT/vkFFT/vkFFT_Structs/vkFFT_Structs.h index af47631..8f14ad6 100644 --- a/vkFFT/vkFFT/vkFFT_Structs/vkFFT_Structs.h +++ b/vkFFT/vkFFT/vkFFT_Structs/vkFFT_Structs.h @@ -152,6 +152,8 @@ typedef struct { VkBuffer* outputBuffer;//pointer to array of output buffers (or one buffer) used for write data to if isOutputFormatted is enabled VkBuffer* kernel;//pointer to array of kernel buffers (or one buffer) used for read kernel data from if performConvolution is enabled VkBuffer currentBatchUBO; //uniform buffer that contains the number of batches to perform when dynamicBatch >= 1. + VkBuffer indirectBuffer; + unsigned int* indirectHostPointer; #elif(VKFFT_BACKEND==1) void** buffer;//pointer to device buffer used for computations @@ -190,6 +192,7 @@ typedef struct { pfUINT outputBufferOffset;//specify if VkFFT has to offset the first element position inside the output buffer. In bytes. Default 0 pfUINT kernelOffset;//specify if VkFFT has to offset the first element position inside the kernel. In bytes. Default 0 pfUINT currentBatchUBOOffset; //byte offset of the number of batches in the within the currentBatchUBO + pfUINT indirectBufferOffset; pfUINT specifyOffsetsAtLaunch;//specify if offsets will be selected with launch parameters VkFFTLaunchParams (0 - off, 1 - on). Default 0 //optional: (default 0 if not stated otherwise) @@ -326,6 +329,7 @@ typedef struct { MTL::ComputeCommandEncoder* commandEncoder;//Filled at app execution #endif pfUINT dynamicBatch; //set to 1 to dynamically limit the number of batches using the currentBatchUBO buffer. set to 2 for different numbers for fwd and inv. + pfUINT indirectDispatch; //0 for direct, 1 for fwd indirect, 2 for inv indirec, 3 for both } VkFFTConfiguration;//parameters specified at plan creation typedef struct { @@ -1185,7 +1189,7 @@ typedef struct { pfUINT bufferBluesteinSize[VKFFT_MAX_FFT_DIMENSIONS]; void* applicationBluesteinString[VKFFT_MAX_FFT_DIMENSIONS]; pfUINT applicationBluesteinStringSize[VKFFT_MAX_FFT_DIMENSIONS]; - + pfUINT indirectDispatchID; pfUINT numRaderFFTPrimes; pfUINT rader_primes[30]; pfUINT rader_buffer_size[30]; From cfbbd2fb256476451bb738d0bab6ec3ce7df279f Mon Sep 17 00:00:00 2001 From: Dirk van den Bekerom Date: Sun, 23 Feb 2025 15:19:47 +0100 Subject: [PATCH 10/18] remove comment --- .../vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_DispatchPlan.h | 1 - 1 file changed, 1 deletion(-) diff --git a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_DispatchPlan.h b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_DispatchPlan.h index f4b81ab..a44357f 100644 --- a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_DispatchPlan.h +++ b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_DispatchPlan.h @@ -170,7 +170,6 @@ static inline VkFFTResult VkFFT_DispatchPlan(VkFFTApplication* app, VkFFTAxis* a } bool indirect_dispatch = (app->configuration.indirectDispatch && app->configuration.indirectHostPointer != nullptr); pfUINT indirect_offset; - //auto sc = axis->specializationConstants.inverse; if (indirect_dispatch){ unsigned int* host_indirect = (unsigned int*)((char*)app->configuration.indirectHostPointer + app->configuration.indirectBufferOffset + app->indirectDispatchID*16); host_indirect[0] = (uint32_t)dispatchSize[0]; From 3c0f9abf61c9697119fb58ee1e226e21114191b8 Mon Sep 17 00:00:00 2001 From: Dirk van den Bekerom Date: Sun, 23 Feb 2025 17:52:19 +0100 Subject: [PATCH 11/18] add debug info --- .../vkFFT_AppManagement/vkFFT_InitializeApp.h | 3 + .../vkFFT_API_handles/vkFFT_CompileKernel.h | 41 ++++++++++++ .../vkFFT_API_handles/vkFFT_UpdateBuffers.h | 64 +++++++++++++++++-- vkFFT/vkFFT/vkFFT_Structs/vkFFT_Structs.h | 3 + 4 files changed, 107 insertions(+), 4 deletions(-) diff --git a/vkFFT/vkFFT/vkFFT_AppManagement/vkFFT_InitializeApp.h b/vkFFT/vkFFT/vkFFT_AppManagement/vkFFT_InitializeApp.h index c91b120..180387e 100644 --- a/vkFFT/vkFFT/vkFFT_AppManagement/vkFFT_InitializeApp.h +++ b/vkFFT/vkFFT/vkFFT_AppManagement/vkFFT_InitializeApp.h @@ -973,6 +973,9 @@ static inline VkFFTResult setConfigurationVkFFT(VkFFTApplication* app, VkFFTConf app->configuration.indirectBufferOffset = inputLaunchConfiguration.indirectBufferOffset; app->configuration.indirectHostPointer = inputLaunchConfiguration.indirectHostPointer; } + if (inputLaunchConfiguration.debugName != nullptr) app->configuration.debugName = inputLaunchConfiguration.debugName; + app->debugKernelCounter = 0; + app->debugUpdateCounter = 0; resFFT = initializeBluesteinAutoPadding(app); if (resFFT != VKFFT_SUCCESS) { deleteVkFFT(app); diff --git a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_CompileKernel.h b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_CompileKernel.h index 276ad34..ff27dc7 100644 --- a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_CompileKernel.h +++ b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_CompileKernel.h @@ -178,6 +178,47 @@ static inline VkFFTResult VkFFT_CompileKernel(VkFFTApplication* app, VkFFTAxis* GLSLANG_MSG_DEFAULT_BIT, (const glslang_resource_t*)&default_resource, }; + + + //DvdB + + const char* dname = app->configuration.debugName; + ofstream myfile; + std::string fname = ""; + + fname += dname ; + fname += "_kernel_"; + fname += std::to_string(app->debugKernelCounter) +".comp"; + + + + + // if (fname=="FFT1_kernel_3.txt"){ + // ifstream file("FFT1_kernel_3b.txt", ios::in | ios::binary | ios::ate); + + // unsigned int fileSize = file.tellg(); + // file.seekg(0, ios::beg); + + // char* buffer = new char[fileSize + 1]; + // file.read(buffer, fileSize); + // buffer[fileSize] = '\0'; + + // file.close(); + + // code0 = buffer; + + // } + + myfile.open(fname); + //app->configuration.dirkName = "DIRKDIRK"; + //myfile << app->configuration.dirkName; + //myfile << "// kernel type: " << app->configuration.dirkTypeFFT <debugKernelCounter++; + + //printf("%s\n", code0); glslang_shader_t* shader = glslang_shader_create((const glslang_input_t*)&input); diff --git a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_UpdateBuffers.h b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_UpdateBuffers.h index e607baa..422e1af 100644 --- a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_UpdateBuffers.h +++ b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_UpdateBuffers.h @@ -235,7 +235,6 @@ static inline VkFFTResult VkFFTConfigureDescriptors(VkFFTApplication* app, VkFFT axis->specializationConstants.numBuffersBound[1] = (int)axis->specializationConstants.outputBufferBlockNum; axis->specializationConstants.numBuffersBound[2] = 0; axis->specializationConstants.numBuffersBound[3] = 0; - axis->specializationConstants.numBuffersBound[4] = 0; #if(VKFFT_BACKEND==0) VkDescriptorPoolSize descriptorPoolSize = { VK_DESCRIPTOR_TYPE_STORAGE_BUFFER }; @@ -571,7 +570,6 @@ static inline VkFFTResult VkFFTConfigureDescriptorsR2CMultiUploadDecomposition(V axis->specializationConstants.numBuffersBound[1] = (int)axis->specializationConstants.outputBufferBlockNum; axis->specializationConstants.numBuffersBound[2] = 0; axis->specializationConstants.numBuffersBound[3] = 0; - axis->specializationConstants.numBuffersBound[4] = 0; #if(VKFFT_BACKEND==0) @@ -806,6 +804,18 @@ static inline VkFFTResult VkFFTCheckUpdateBufferSet(VkFFTApplication* app, VkFFT return VKFFT_SUCCESS; } static inline VkFFTResult VkFFTUpdateBufferSet(VkFFTApplication* app, VkFFTPlan* FFTPlan, VkFFTAxis* axis, pfUINT axis_id, pfUINT axis_upload_id, pfUINT inverse) { + + + + const char* dname = app->configuration.debugName; + ofstream myfile; + std::string fname = ""; + fname += dname ; + fname += "_buffer_"; + fname += std::to_string(app->debugUpdateCounter) + ".txt"; + myfile.open(fname); + myfile << "FFT" <specializationConstants.performOffsetUpdate || axis->specializationConstants.performBufferSetUpdate) { axis->specializationConstants.inputOffset.type = 31; axis->specializationConstants.outputOffset.type = 31; @@ -845,6 +855,7 @@ static inline VkFFTResult VkFFTUpdateBufferSet(VkFFTApplication* app, VkFFTPlan* descriptorBufferInfo.buffer = app->configuration.inputBuffer[bufferId]; descriptorBufferInfo.range = (axis->specializationConstants.inputBufferBlockSize); descriptorBufferInfo.offset = offset * (axis->specializationConstants.inputBufferBlockSize); + myfile << "i = 0, IN0 - Range: " << descriptorBufferInfo.range << " , Offset: " << descriptorBufferInfo.offset <specializationConstants.performOffsetUpdate) { @@ -874,6 +885,8 @@ static inline VkFFTResult VkFFTUpdateBufferSet(VkFFTApplication* app, VkFFTPlan* descriptorBufferInfo.buffer = app->configuration.outputBuffer[bufferId]; descriptorBufferInfo.range = (axis->specializationConstants.inputBufferBlockSize); descriptorBufferInfo.offset = offset * (axis->specializationConstants.inputBufferBlockSize); + myfile << "i = 0, OUT(IN) - Range: " << descriptorBufferInfo.range << " , Offset: " << descriptorBufferInfo.offset <specializationConstants.performOffsetUpdate) { @@ -957,8 +970,11 @@ static inline VkFFTResult VkFFTUpdateBufferSet(VkFFTApplication* app, VkFFTPlan* } #if(VKFFT_BACKEND==0) if (axis->specializationConstants.performBufferSetUpdate) { - descriptorBufferInfo.range = (axis->specializationConstants.inputBufferBlockSize); + //descriptorBufferInfo.range = (axis->specializationConstants.inputBufferBlockSize); + descriptorBufferInfo.range = app->configuration.bufferSize[0]; descriptorBufferInfo.offset = offset * (axis->specializationConstants.inputBufferBlockSize); + myfile << "i = 0, IN1 - Range: " << descriptorBufferInfo.range << " , Offset: " << descriptorBufferInfo.offset <configuration.outputBuffer[bufferId]; descriptorBufferInfo.range = (axis->specializationConstants.outputBufferBlockSize); descriptorBufferInfo.offset = offset * (axis->specializationConstants.outputBufferBlockSize); + myfile << "i = 1, OUT0 - Range: " << descriptorBufferInfo.range << " , Offset: " << descriptorBufferInfo.offset <specializationConstants.performOffsetUpdate) { @@ -1136,8 +1153,11 @@ static inline VkFFTResult VkFFTUpdateBufferSet(VkFFTApplication* app, VkFFTPlan* } #if(VKFFT_BACKEND==0) if (axis->specializationConstants.performBufferSetUpdate) { - descriptorBufferInfo.range = (axis->specializationConstants.outputBufferBlockSize); + //descriptorBufferInfo.range = (axis->specializationConstants.outputBufferBlockSize); + descriptorBufferInfo.range = app->configuration.bufferSize[0]; descriptorBufferInfo.offset = offset * (axis->specializationConstants.outputBufferBlockSize); + myfile << "i = 1, OUT1 - Range: " << descriptorBufferInfo.range << " , Offset: " << descriptorBufferInfo.offset <specializationConstants.performOffsetUpdate) { axis->specializationConstants.performOffsetUpdate = 0; } + myfile.close(); + app->debugUpdateCounter++; return VKFFT_SUCCESS; } static inline VkFFTResult VkFFTUpdateBufferSetR2CMultiUploadDecomposition(VkFFTApplication* app, VkFFTPlan* FFTPlan, VkFFTAxis* axis, pfUINT axis_id, pfUINT axis_upload_id, pfUINT inverse) { + + + const char* dname = app->configuration.debugName; + ofstream myfile; + std::string fname = ""; + fname += dname ; + fname += "_buffer_"; + fname += std::to_string(app->debugUpdateCounter) + ".txt"; + myfile.open(fname); + myfile << "R2C" <specializationConstants.performOffsetUpdate || axis->specializationConstants.performBufferSetUpdate) { #if(VKFFT_BACKEND==0) VkDescriptorType descriptorType; @@ -1284,6 +1318,8 @@ static inline VkFFTResult VkFFTUpdateBufferSetR2CMultiUploadDecomposition(VkFFTA descriptorBufferInfo.buffer = app->configuration.inputBuffer[bufferId]; descriptorBufferInfo.range = (axis->specializationConstants.inputBufferBlockSize); descriptorBufferInfo.offset = offset * (axis->specializationConstants.inputBufferBlockSize); + myfile << "i = 0, IN100 - Range: " << descriptorBufferInfo.range << " , Offset: " << descriptorBufferInfo.offset <specializationConstants.performOffsetUpdate) { @@ -1312,6 +1348,8 @@ static inline VkFFTResult VkFFTUpdateBufferSetR2CMultiUploadDecomposition(VkFFTA descriptorBufferInfo.buffer = app->configuration.outputBuffer[bufferId]; descriptorBufferInfo.range = (axis->specializationConstants.inputBufferBlockSize); descriptorBufferInfo.offset = offset * (axis->specializationConstants.inputBufferBlockSize); + myfile << "i = 0, OUT(IN)100 - Range: " << descriptorBufferInfo.range << " , Offset: " << descriptorBufferInfo.offset <specializationConstants.performOffsetUpdate) { @@ -1339,6 +1377,8 @@ static inline VkFFTResult VkFFTUpdateBufferSetR2CMultiUploadDecomposition(VkFFTA descriptorBufferInfo.buffer = app->configuration.buffer[bufferId]; descriptorBufferInfo.range = (axis->specializationConstants.inputBufferBlockSize); descriptorBufferInfo.offset = offset * (axis->specializationConstants.inputBufferBlockSize); + myfile << "i = 0, IN101 - Range: " << descriptorBufferInfo.range << " , Offset: " << descriptorBufferInfo.offset <specializationConstants.performOffsetUpdate) { @@ -1381,6 +1421,8 @@ static inline VkFFTResult VkFFTUpdateBufferSetR2CMultiUploadDecomposition(VkFFTA descriptorBufferInfo.buffer = app->configuration.outputBuffer[bufferId]; descriptorBufferInfo.range = (axis->specializationConstants.outputBufferBlockSize); descriptorBufferInfo.offset = offset * (axis->specializationConstants.outputBufferBlockSize); + myfile << "i = 0, OUT100 - Range: " << descriptorBufferInfo.range << " , Offset: " << descriptorBufferInfo.offset <specializationConstants.performOffsetUpdate) { @@ -1408,6 +1450,8 @@ static inline VkFFTResult VkFFTUpdateBufferSetR2CMultiUploadDecomposition(VkFFTA descriptorBufferInfo.buffer = app->configuration.buffer[bufferId]; descriptorBufferInfo.range = (axis->specializationConstants.outputBufferBlockSize); descriptorBufferInfo.offset = offset * (axis->specializationConstants.outputBufferBlockSize); + myfile << "i = 0, OUT101 - Range: " << descriptorBufferInfo.range << " , Offset: " << descriptorBufferInfo.offset <specializationConstants.performOffsetUpdate) { @@ -1440,6 +1484,8 @@ static inline VkFFTResult VkFFTUpdateBufferSetR2CMultiUploadDecomposition(VkFFTA descriptorBufferInfo.buffer = app->configuration.outputBuffer[bufferId]; descriptorBufferInfo.range = (axis->specializationConstants.outputBufferBlockSize); descriptorBufferInfo.offset = offset * (axis->specializationConstants.outputBufferBlockSize); + myfile << "i = 1, OUT102 - Range: " << descriptorBufferInfo.range << " , Offset: " << descriptorBufferInfo.offset <specializationConstants.performOffsetUpdate) { @@ -1468,6 +1514,8 @@ static inline VkFFTResult VkFFTUpdateBufferSetR2CMultiUploadDecomposition(VkFFTA descriptorBufferInfo.buffer = app->configuration.tempBuffer[bufferId]; descriptorBufferInfo.range = (axis->specializationConstants.outputBufferBlockSize); descriptorBufferInfo.offset = offset * (axis->specializationConstants.outputBufferBlockSize); + myfile << "i = 1, OUT103 - Range: " << descriptorBufferInfo.range << " , Offset: " << descriptorBufferInfo.offset <specializationConstants.performOffsetUpdate) { @@ -1493,6 +1541,8 @@ static inline VkFFTResult VkFFTUpdateBufferSetR2CMultiUploadDecomposition(VkFFTA descriptorBufferInfo.buffer = app->configuration.buffer[bufferId]; descriptorBufferInfo.range = (axis->specializationConstants.outputBufferBlockSize); descriptorBufferInfo.offset = offset * (axis->specializationConstants.outputBufferBlockSize); + myfile << "i = 1, OUT104 - Range: " << descriptorBufferInfo.range << " , Offset: " << descriptorBufferInfo.offset <specializationConstants.performOffsetUpdate) { @@ -1535,6 +1585,8 @@ static inline VkFFTResult VkFFTUpdateBufferSetR2CMultiUploadDecomposition(VkFFTA descriptorBufferInfo.buffer = app->configuration.outputBuffer[bufferId]; descriptorBufferInfo.range = (axis->specializationConstants.outputBufferBlockSize); descriptorBufferInfo.offset = offset * (axis->specializationConstants.outputBufferBlockSize); + myfile << "i = 1, OUT105 - Range: " << descriptorBufferInfo.range << " , Offset: " << descriptorBufferInfo.offset <specializationConstants.performOffsetUpdate) { @@ -1562,6 +1614,8 @@ static inline VkFFTResult VkFFTUpdateBufferSetR2CMultiUploadDecomposition(VkFFTA descriptorBufferInfo.buffer = app->configuration.buffer[bufferId]; descriptorBufferInfo.range = (axis->specializationConstants.outputBufferBlockSize); descriptorBufferInfo.offset = offset * (axis->specializationConstants.outputBufferBlockSize); + myfile << "i = 1, OUT106 - Range: " << descriptorBufferInfo.range << " , Offset: " << descriptorBufferInfo.offset <specializationConstants.performOffsetUpdate) { @@ -1639,6 +1693,8 @@ static inline VkFFTResult VkFFTUpdateBufferSetR2CMultiUploadDecomposition(VkFFTA if (axis->specializationConstants.performOffsetUpdate) { axis->specializationConstants.performOffsetUpdate = 0; } + myfile.close(); + app->debugUpdateCounter++; return VKFFT_SUCCESS; } diff --git a/vkFFT/vkFFT/vkFFT_Structs/vkFFT_Structs.h b/vkFFT/vkFFT/vkFFT_Structs/vkFFT_Structs.h index 8f14ad6..1169440 100644 --- a/vkFFT/vkFFT/vkFFT_Structs/vkFFT_Structs.h +++ b/vkFFT/vkFFT/vkFFT_Structs/vkFFT_Structs.h @@ -330,6 +330,7 @@ typedef struct { #endif pfUINT dynamicBatch; //set to 1 to dynamically limit the number of batches using the currentBatchUBO buffer. set to 2 for different numbers for fwd and inv. pfUINT indirectDispatch; //0 for direct, 1 for fwd indirect, 2 for inv indirec, 3 for both + const char* debugName; } VkFFTConfiguration;//parameters specified at plan creation typedef struct { @@ -1200,6 +1201,8 @@ typedef struct { pfUINT applicationStringSize;//size of saveApplicationString in bytes void* saveApplicationString;//memory array(uint32_t* for Vulkan, char* for CUDA/HIP/OpenCL) through which user can access VkFFT generated binaries. (will be allocated by VkFFT, deallocated with deleteVkFFT call) + pfUINT debugKernelCounter; + pfUINT debugUpdateCounter; } VkFFTApplication; #endif From 61050087c00f5ea3292ef09daa6f806eaea34304 Mon Sep 17 00:00:00 2001 From: Dirk van den Bekerom Date: Tue, 4 Mar 2025 21:08:45 +0100 Subject: [PATCH 12/18] optional debug flag --- .../vkFFT_API_handles/vkFFT_CompileKernel.h | 52 +++++-------------- vkFFT/vkFFT/vkFFT_Structs/vkFFT_Structs.h | 1 + 2 files changed, 15 insertions(+), 38 deletions(-) diff --git a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_CompileKernel.h b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_CompileKernel.h index ff27dc7..733b389 100644 --- a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_CompileKernel.h +++ b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_CompileKernel.h @@ -178,46 +178,22 @@ static inline VkFFTResult VkFFT_CompileKernel(VkFFTApplication* app, VkFFTAxis* GLSLANG_MSG_DEFAULT_BIT, (const glslang_resource_t*)&default_resource, }; + + if (app->configuration.enableDebug){ + const char* dname = app->configuration.debugName; + ofstream myfile; + std::string fname = ""; + fname += dname ; + fname += "_kernel_"; + fname += std::to_string(app->debugKernelCounter) +".comp"; - //DvdB - - const char* dname = app->configuration.debugName; - ofstream myfile; - std::string fname = ""; - - fname += dname ; - fname += "_kernel_"; - fname += std::to_string(app->debugKernelCounter) +".comp"; - - - - - // if (fname=="FFT1_kernel_3.txt"){ - // ifstream file("FFT1_kernel_3b.txt", ios::in | ios::binary | ios::ate); - - // unsigned int fileSize = file.tellg(); - // file.seekg(0, ios::beg); - - // char* buffer = new char[fileSize + 1]; - // file.read(buffer, fileSize); - // buffer[fileSize] = '\0'; - - // file.close(); - - // code0 = buffer; - - // } - - myfile.open(fname); - //app->configuration.dirkName = "DIRKDIRK"; - //myfile << app->configuration.dirkName; - //myfile << "// kernel type: " << app->configuration.dirkTypeFFT <debugKernelCounter++; - + myfile.open(fname); + myfile << code0; + + myfile.close(); + app->debugKernelCounter++; + } //printf("%s\n", code0); diff --git a/vkFFT/vkFFT/vkFFT_Structs/vkFFT_Structs.h b/vkFFT/vkFFT/vkFFT_Structs/vkFFT_Structs.h index 1169440..ebc01d5 100644 --- a/vkFFT/vkFFT/vkFFT_Structs/vkFFT_Structs.h +++ b/vkFFT/vkFFT/vkFFT_Structs/vkFFT_Structs.h @@ -331,6 +331,7 @@ typedef struct { pfUINT dynamicBatch; //set to 1 to dynamically limit the number of batches using the currentBatchUBO buffer. set to 2 for different numbers for fwd and inv. pfUINT indirectDispatch; //0 for direct, 1 for fwd indirect, 2 for inv indirec, 3 for both const char* debugName; + pfUINT enableDebug; } VkFFTConfiguration;//parameters specified at plan creation typedef struct { From 661b7c53a27c83fe2abdbcae3fc876c8ee4aea39 Mon Sep 17 00:00:00 2001 From: Dirk van den Bekerom Date: Wed, 5 Mar 2025 22:17:03 +0100 Subject: [PATCH 13/18] correct workgroup id for indirect dispatch --- .../vkFFT_API_handles/vkFFT_DispatchPlan.h | 5 +- .../vkFFT_API_handles/vkFFT_UpdateBuffers.h | 93 +++++++++++-------- .../vkFFT_Plans/vkFFT_Plan_FFT.h | 2 +- .../vkFFT_Plans/vkFFT_Plan_R2C.h | 1 + vkFFT/vkFFT/vkFFT_Structs/vkFFT_Structs.h | 1 + 5 files changed, 63 insertions(+), 39 deletions(-) diff --git a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_DispatchPlan.h b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_DispatchPlan.h index a44357f..1461b23 100644 --- a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_DispatchPlan.h +++ b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_DispatchPlan.h @@ -175,7 +175,10 @@ static inline VkFFTResult VkFFT_DispatchPlan(VkFFTApplication* app, VkFFTAxis* a host_indirect[0] = (uint32_t)dispatchSize[0]; host_indirect[1] = (uint32_t)dispatchSize[1]; host_indirect[2] = (uint32_t)dispatchSize[2]; - host_indirect[3] = axis->specializationConstants.inverse + axis->specializationConstants.performR2CmultiUpload * 2;// + axis->specializationConstants.axis_upload_id * 10 + axis->specializationConstants.axis_id; + host_indirect[3] = axis->batchWorkGroup; +; + //host_indirect[3] = 1000*axis->specializationConstants.performR2C + axis->specializationConstants.performR2CmultiUpload; + //host_indirect[3] = axis->specializationConstants.inverse + axis->specializationConstants.performR2CmultiUpload * 2;// + axis->specializationConstants.axis_upload_id * 10 + axis->specializationConstants.axis_id; indirect_offset = app->configuration.indirectBufferOffset + 16*app->indirectDispatchID; vkCmdDispatchIndirect(app->configuration.commandBuffer[0], app->configuration.indirectBuffer, indirect_offset); app->indirectDispatchID++; diff --git a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_UpdateBuffers.h b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_UpdateBuffers.h index 422e1af..f387b99 100644 --- a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_UpdateBuffers.h +++ b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_UpdateBuffers.h @@ -805,17 +805,16 @@ static inline VkFFTResult VkFFTCheckUpdateBufferSet(VkFFTApplication* app, VkFFT } static inline VkFFTResult VkFFTUpdateBufferSet(VkFFTApplication* app, VkFFTPlan* FFTPlan, VkFFTAxis* axis, pfUINT axis_id, pfUINT axis_upload_id, pfUINT inverse) { - - - const char* dname = app->configuration.debugName; ofstream myfile; - std::string fname = ""; - fname += dname ; - fname += "_buffer_"; - fname += std::to_string(app->debugUpdateCounter) + ".txt"; - myfile.open(fname); - myfile << "FFT" <configuration.enableDebug){ + const char* dname = app->configuration.debugName; + std::string fname = ""; + fname += dname ; + fname += "_buffer_"; + fname += std::to_string(app->debugUpdateCounter) + ".txt"; + myfile.open(fname); + myfile << "FFT" <specializationConstants.performOffsetUpdate || axis->specializationConstants.performBufferSetUpdate) { axis->specializationConstants.inputOffset.type = 31; axis->specializationConstants.outputOffset.type = 31; @@ -855,7 +854,9 @@ static inline VkFFTResult VkFFTUpdateBufferSet(VkFFTApplication* app, VkFFTPlan* descriptorBufferInfo.buffer = app->configuration.inputBuffer[bufferId]; descriptorBufferInfo.range = (axis->specializationConstants.inputBufferBlockSize); descriptorBufferInfo.offset = offset * (axis->specializationConstants.inputBufferBlockSize); - myfile << "i = 0, IN0 - Range: " << descriptorBufferInfo.range << " , Offset: " << descriptorBufferInfo.offset <configuration.enableDebug){ + myfile << "i = 0, IN0 - Range: " << descriptorBufferInfo.range << " , Offset: " << descriptorBufferInfo.offset <specializationConstants.performOffsetUpdate) { @@ -885,7 +886,9 @@ static inline VkFFTResult VkFFTUpdateBufferSet(VkFFTApplication* app, VkFFTPlan* descriptorBufferInfo.buffer = app->configuration.outputBuffer[bufferId]; descriptorBufferInfo.range = (axis->specializationConstants.inputBufferBlockSize); descriptorBufferInfo.offset = offset * (axis->specializationConstants.inputBufferBlockSize); - myfile << "i = 0, OUT(IN) - Range: " << descriptorBufferInfo.range << " , Offset: " << descriptorBufferInfo.offset <configuration.enableDebug){ + myfile << "i = 0, OUT(IN) - Range: " << descriptorBufferInfo.range << " , Offset: " << descriptorBufferInfo.offset <specializationConstants.inputBufferBlockSize); descriptorBufferInfo.range = app->configuration.bufferSize[0]; descriptorBufferInfo.offset = offset * (axis->specializationConstants.inputBufferBlockSize); - myfile << "i = 0, IN1 - Range: " << descriptorBufferInfo.range << " , Offset: " << descriptorBufferInfo.offset <configuration.enableDebug){ + myfile << "i = 0, IN1 - Range: " << descriptorBufferInfo.range << " , Offset: " << descriptorBufferInfo.offset <configuration.outputBuffer[bufferId]; descriptorBufferInfo.range = (axis->specializationConstants.outputBufferBlockSize); descriptorBufferInfo.offset = offset * (axis->specializationConstants.outputBufferBlockSize); - myfile << "i = 1, OUT0 - Range: " << descriptorBufferInfo.range << " , Offset: " << descriptorBufferInfo.offset <configuration.enableDebug){ + myfile << "i = 1, OUT0 - Range: " << descriptorBufferInfo.range << " , Offset: " << descriptorBufferInfo.offset <specializationConstants.performOffsetUpdate) { @@ -1156,7 +1161,8 @@ static inline VkFFTResult VkFFTUpdateBufferSet(VkFFTApplication* app, VkFFTPlan* //descriptorBufferInfo.range = (axis->specializationConstants.outputBufferBlockSize); descriptorBufferInfo.range = app->configuration.bufferSize[0]; descriptorBufferInfo.offset = offset * (axis->specializationConstants.outputBufferBlockSize); - myfile << "i = 1, OUT1 - Range: " << descriptorBufferInfo.range << " , Offset: " << descriptorBufferInfo.offset <configuration.enableDebug){ + myfile << "i = 1, OUT1 - Range: " << descriptorBufferInfo.range << " , Offset: " << descriptorBufferInfo.offset <specializationConstants.performOffsetUpdate) { axis->specializationConstants.performOffsetUpdate = 0; } - myfile.close(); + if (app->configuration.enableDebug){ + myfile.close();} app->debugUpdateCounter++; return VKFFT_SUCCESS; } static inline VkFFTResult VkFFTUpdateBufferSetR2CMultiUploadDecomposition(VkFFTApplication* app, VkFFTPlan* FFTPlan, VkFFTAxis* axis, pfUINT axis_id, pfUINT axis_upload_id, pfUINT inverse) { - - const char* dname = app->configuration.debugName; - ofstream myfile; - std::string fname = ""; - fname += dname ; - fname += "_buffer_"; - fname += std::to_string(app->debugUpdateCounter) + ".txt"; - myfile.open(fname); - myfile << "R2C" <configuration.enableDebug){ + const char* dname = app->configuration.debugName; + std::string fname = ""; + fname += dname ; + fname += "_buffer_"; + fname += std::to_string(app->debugUpdateCounter) + ".txt"; + myfile.open(fname); + myfile << "R2C" <specializationConstants.performOffsetUpdate || axis->specializationConstants.performBufferSetUpdate) { #if(VKFFT_BACKEND==0) @@ -1318,7 +1325,8 @@ static inline VkFFTResult VkFFTUpdateBufferSetR2CMultiUploadDecomposition(VkFFTA descriptorBufferInfo.buffer = app->configuration.inputBuffer[bufferId]; descriptorBufferInfo.range = (axis->specializationConstants.inputBufferBlockSize); descriptorBufferInfo.offset = offset * (axis->specializationConstants.inputBufferBlockSize); - myfile << "i = 0, IN100 - Range: " << descriptorBufferInfo.range << " , Offset: " << descriptorBufferInfo.offset <configuration.enableDebug){ + myfile << "i = 0, IN100 - Range: " << descriptorBufferInfo.range << " , Offset: " << descriptorBufferInfo.offset <configuration.outputBuffer[bufferId]; descriptorBufferInfo.range = (axis->specializationConstants.inputBufferBlockSize); descriptorBufferInfo.offset = offset * (axis->specializationConstants.inputBufferBlockSize); - myfile << "i = 0, OUT(IN)100 - Range: " << descriptorBufferInfo.range << " , Offset: " << descriptorBufferInfo.offset <configuration.enableDebug){ + myfile << "i = 0, OUT(IN)100 - Range: " << descriptorBufferInfo.range << " , Offset: " << descriptorBufferInfo.offset <configuration.buffer[bufferId]; descriptorBufferInfo.range = (axis->specializationConstants.inputBufferBlockSize); descriptorBufferInfo.offset = offset * (axis->specializationConstants.inputBufferBlockSize); - myfile << "i = 0, IN101 - Range: " << descriptorBufferInfo.range << " , Offset: " << descriptorBufferInfo.offset <configuration.enableDebug){ + myfile << "i = 0, IN101 - Range: " << descriptorBufferInfo.range << " , Offset: " << descriptorBufferInfo.offset <configuration.outputBuffer[bufferId]; descriptorBufferInfo.range = (axis->specializationConstants.outputBufferBlockSize); descriptorBufferInfo.offset = offset * (axis->specializationConstants.outputBufferBlockSize); - myfile << "i = 0, OUT100 - Range: " << descriptorBufferInfo.range << " , Offset: " << descriptorBufferInfo.offset <configuration.enableDebug){ + myfile << "i = 0, OUT100 - Range: " << descriptorBufferInfo.range << " , Offset: " << descriptorBufferInfo.offset <configuration.buffer[bufferId]; descriptorBufferInfo.range = (axis->specializationConstants.outputBufferBlockSize); descriptorBufferInfo.offset = offset * (axis->specializationConstants.outputBufferBlockSize); - myfile << "i = 0, OUT101 - Range: " << descriptorBufferInfo.range << " , Offset: " << descriptorBufferInfo.offset <configuration.enableDebug){ + myfile << "i = 0, OUT101 - Range: " << descriptorBufferInfo.range << " , Offset: " << descriptorBufferInfo.offset <configuration.outputBuffer[bufferId]; descriptorBufferInfo.range = (axis->specializationConstants.outputBufferBlockSize); descriptorBufferInfo.offset = offset * (axis->specializationConstants.outputBufferBlockSize); - myfile << "i = 1, OUT102 - Range: " << descriptorBufferInfo.range << " , Offset: " << descriptorBufferInfo.offset <configuration.enableDebug){ + myfile << "i = 1, OUT102 - Range: " << descriptorBufferInfo.range << " , Offset: " << descriptorBufferInfo.offset <configuration.tempBuffer[bufferId]; descriptorBufferInfo.range = (axis->specializationConstants.outputBufferBlockSize); descriptorBufferInfo.offset = offset * (axis->specializationConstants.outputBufferBlockSize); - myfile << "i = 1, OUT103 - Range: " << descriptorBufferInfo.range << " , Offset: " << descriptorBufferInfo.offset <configuration.enableDebug){ + myfile << "i = 1, OUT103 - Range: " << descriptorBufferInfo.range << " , Offset: " << descriptorBufferInfo.offset <configuration.buffer[bufferId]; descriptorBufferInfo.range = (axis->specializationConstants.outputBufferBlockSize); descriptorBufferInfo.offset = offset * (axis->specializationConstants.outputBufferBlockSize); - myfile << "i = 1, OUT104 - Range: " << descriptorBufferInfo.range << " , Offset: " << descriptorBufferInfo.offset <configuration.enableDebug){ + myfile << "i = 1, OUT104 - Range: " << descriptorBufferInfo.range << " , Offset: " << descriptorBufferInfo.offset <configuration.outputBuffer[bufferId]; descriptorBufferInfo.range = (axis->specializationConstants.outputBufferBlockSize); descriptorBufferInfo.offset = offset * (axis->specializationConstants.outputBufferBlockSize); - myfile << "i = 1, OUT105 - Range: " << descriptorBufferInfo.range << " , Offset: " << descriptorBufferInfo.offset <configuration.enableDebug){ + myfile << "i = 1, OUT105 - Range: " << descriptorBufferInfo.range << " , Offset: " << descriptorBufferInfo.offset <configuration.buffer[bufferId]; descriptorBufferInfo.range = (axis->specializationConstants.outputBufferBlockSize); descriptorBufferInfo.offset = offset * (axis->specializationConstants.outputBufferBlockSize); - myfile << "i = 1, OUT106 - Range: " << descriptorBufferInfo.range << " , Offset: " << descriptorBufferInfo.offset <configuration.enableDebug){ + myfile << "i = 1, OUT106 - Range: " << descriptorBufferInfo.range << " , Offset: " << descriptorBufferInfo.offset <specializationConstants.performOffsetUpdate) { axis->specializationConstants.performOffsetUpdate = 0; } - myfile.close(); + if (app->configuration.enableDebug){ + myfile.close(); + } app->debugUpdateCounter++; return VKFFT_SUCCESS; } diff --git a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_Plans/vkFFT_Plan_FFT.h b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_Plans/vkFFT_Plan_FFT.h index c4e7bad..5b767ba 100644 --- a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_Plans/vkFFT_Plan_FFT.h +++ b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_Plans/vkFFT_Plan_FFT.h @@ -46,7 +46,7 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla #elif(VKFFT_BACKEND==5) #endif VkFFTAxis* axis = (reverseBluesteinMultiUpload) ? &FFTPlan->inverseBluesteinAxes[axis_id][axis_upload_id] : &FFTPlan->axes[axis_id][axis_upload_id]; - + axis->batchWorkGroup = 1; axis->specializationConstants.sourceFFTSize.type = 31; axis->specializationConstants.sourceFFTSize.data.i = app->configuration.size[axis_id]; axis->specializationConstants.axis_id = (int)axis_id; diff --git a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_Plans/vkFFT_Plan_R2C.h b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_Plans/vkFFT_Plan_R2C.h index c59be43..89e905e 100644 --- a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_Plans/vkFFT_Plan_R2C.h +++ b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_Plans/vkFFT_Plan_R2C.h @@ -42,6 +42,7 @@ static inline VkFFTResult VkFFTPlanR2CMultiUploadDecomposition(VkFFTApplication* #elif(VKFFT_BACKEND==5) #endif VkFFTAxis* axis = &FFTPlan->R2Cdecomposition; + axis->batchWorkGroup = 2; axis->specializationConstants.sourceFFTSize.type = 31; axis->specializationConstants.sourceFFTSize.data.i = (pfINT)app->configuration.size[0]; axis->specializationConstants.numFFTdims = (int)app->configuration.FFTdim; diff --git a/vkFFT/vkFFT/vkFFT_Structs/vkFFT_Structs.h b/vkFFT/vkFFT/vkFFT_Structs/vkFFT_Structs.h index ebc01d5..cae5402 100644 --- a/vkFFT/vkFFT/vkFFT_Structs/vkFFT_Structs.h +++ b/vkFFT/vkFFT/vkFFT_Structs/vkFFT_Structs.h @@ -1127,6 +1127,7 @@ typedef struct { pfUINT bufferLUTSize; pfUINT bufferRaderUintLUTSize; pfUINT referenceLUT; + pfUINT batchWorkGroup; } VkFFTAxis; typedef struct { From 1491ee5af140f5e5e59e56c840460a9c63c140d0 Mon Sep 17 00:00:00 2001 From: Dirk van den Bekerom Date: Sat, 29 Mar 2025 17:38:24 +0100 Subject: [PATCH 14/18] remove debug lines restore some whitespaces Update vkFFT_UpdateBuffers.h remove debug lines remove currentBatch/dynamicBatch --- .../vkFFT_AppManagement/vkFFT_InitializeApp.h | 11 +- .../vkFFT_KernelStartEnd.h | 33 ----- .../vkFFT_KernelsLevel0/vkFFT_KernelUtils.h | 6 - .../vkFFT_InputOutputLayout.h | 30 ---- .../vkFFT_PushConstants.h | 5 - .../vkFFT_KernelsLevel2/vkFFT_FFT.h | 4 - .../vkFFT_R2C_even_decomposition.h | 4 - .../vkFFT_API_handles/vkFFT_CompileKernel.h | 49 ++----- .../vkFFT_API_handles/vkFFT_DispatchPlan.h | 17 +-- .../vkFFT_InitAPIParameters.h | 8 - .../vkFFT_API_handles/vkFFT_UpdateBuffers.h | 138 +----------------- .../vkFFT_Plans/vkFFT_Plan_FFT.h | 11 -- .../vkFFT_Plans/vkFFT_Plan_R2C.h | 9 -- vkFFT/vkFFT/vkFFT_Structs/vkFFT_Structs.h | 14 +- 14 files changed, 20 insertions(+), 319 deletions(-) diff --git a/vkFFT/vkFFT/vkFFT_AppManagement/vkFFT_InitializeApp.h b/vkFFT/vkFFT/vkFFT_AppManagement/vkFFT_InitializeApp.h index 180387e..22f0eee 100644 --- a/vkFFT/vkFFT/vkFFT_AppManagement/vkFFT_InitializeApp.h +++ b/vkFFT/vkFFT/vkFFT_AppManagement/vkFFT_InitializeApp.h @@ -911,7 +911,7 @@ static inline VkFFTResult setConfigurationVkFFT(VkFFTApplication* app, VkFFTConf return VKFFT_ERROR_INVALID_QUEUE; } app->configuration.queue = inputLaunchConfiguration.queue; - + const char dummy_kernel[50] = "kernel void VkFFT_dummy (){}"; const char function_name[20] = "VkFFT_dummy"; @@ -966,16 +966,12 @@ static inline VkFFTResult setConfigurationVkFFT(VkFFTApplication* app, VkFFTConf compileOptions->release(); #endif - if (inputLaunchConfiguration.dynamicBatch != 0) app->configuration.dynamicBatch = inputLaunchConfiguration.dynamicBatch; if (inputLaunchConfiguration.indirectDispatch != 0) { app->configuration.indirectDispatch = inputLaunchConfiguration.indirectDispatch; app->configuration.indirectBuffer = inputLaunchConfiguration.indirectBuffer; app->configuration.indirectBufferOffset = inputLaunchConfiguration.indirectBufferOffset; app->configuration.indirectHostPointer = inputLaunchConfiguration.indirectHostPointer; } - if (inputLaunchConfiguration.debugName != nullptr) app->configuration.debugName = inputLaunchConfiguration.debugName; - app->debugKernelCounter = 0; - app->debugUpdateCounter = 0; resFFT = initializeBluesteinAutoPadding(app); if (resFFT != VKFFT_SUCCESS) { deleteVkFFT(app); @@ -1176,11 +1172,6 @@ static inline VkFFTResult setConfigurationVkFFT(VkFFTApplication* app, VkFFTConf } app->configuration.kernel = inputLaunchConfiguration.kernel; } - if (inputLaunchConfiguration.currentBatchUBOSize !=0){ - app->configuration.currentBatchUBOSize = inputLaunchConfiguration.currentBatchUBOSize; - app->configuration.currentBatchUBO = inputLaunchConfiguration.currentBatchUBO; - app->configuration.currentBatchUBOOffset = inputLaunchConfiguration.currentBatchUBOOffset; - } if (inputLaunchConfiguration.bufferOffset != 0) app->configuration.bufferOffset = inputLaunchConfiguration.bufferOffset; if (inputLaunchConfiguration.tempBufferOffset != 0) app->configuration.tempBufferOffset = inputLaunchConfiguration.tempBufferOffset; diff --git a/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel0/vkFFT_KernelStartEnd.h b/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel0/vkFFT_KernelStartEnd.h index 0ce7210..8ba77ac 100644 --- a/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel0/vkFFT_KernelStartEnd.h +++ b/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel0/vkFFT_KernelStartEnd.h @@ -46,23 +46,6 @@ static inline void appendKernelStart(VkFFTSpecializationConstantsLayout* sc, int appendSharedMemoryVkFFT(sc, (int)locType); sc->tempLen = sprintf(sc->tempStr, "void main() {\n"); PfAppendLine(sc); - - if (sc->dynamicBatch == 1){ - sc->tempLen = sprintf(sc->tempStr, "if (gl_WorkGroupID.y >= currentBatch.N) return;\n"); - PfAppendLine(sc); - } - else if (sc->dynamicBatch == 2) - { - if (sc->inverse == 1){ - sc->tempLen = sprintf(sc->tempStr, "if (gl_WorkGroupID.y >= currentBatch.Ninv) return;\n"); - PfAppendLine(sc); - } - else - { - sc->tempLen = sprintf(sc->tempStr, "if (gl_WorkGroupID.y >= currentBatch.Nfwd) return;\n"); - PfAppendLine(sc); - } - } #elif(VKFFT_BACKEND==1) sc->tempLen = sprintf(sc->tempStr, "extern __shared__ float shared[];\n"); PfAppendLine(sc); @@ -291,22 +274,6 @@ static inline void appendKernelStart_R2C(VkFFTSpecializationConstantsLayout* sc, sc->tempLen = sprintf(sc->tempStr, "void main() {\n"); PfAppendLine(sc); - if (sc->dynamicBatch == 1){ - sc->tempLen = sprintf(sc->tempStr, "if (gl_WorkGroupID.z >= currentBatch.N) return;\n"); - PfAppendLine(sc); - } - else if (sc->dynamicBatch == 2) - { - if (sc->inverse == 1){ - sc->tempLen = sprintf(sc->tempStr, "if (gl_WorkGroupID.z >= currentBatch.Ninv) return;\n"); - PfAppendLine(sc); - } - else - { - sc->tempLen = sprintf(sc->tempStr, "if (gl_WorkGroupID.z >= currentBatch.Nfwd) return;\n"); - PfAppendLine(sc); - } - } #elif(VKFFT_BACKEND==1) sc->tempLen = sprintf(sc->tempStr, "extern \"C\" __global__ void __launch_bounds__(%" PRIi64 ") VkFFT_main_R2C ", sc->localSize[0].data.i * sc->localSize[1].data.i * sc->localSize[2].data.i); diff --git a/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel0/vkFFT_KernelUtils.h b/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel0/vkFFT_KernelUtils.h index e0cb4f6..49aaba2 100644 --- a/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel0/vkFFT_KernelUtils.h +++ b/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel0/vkFFT_KernelUtils.h @@ -61,8 +61,6 @@ static inline void appendVersion(VkFFTSpecializationConstantsLayout* sc) { #endif return; } - - static inline void appendExtensions(VkFFTSpecializationConstantsLayout* sc) { if (sc->res != VKFFT_SUCCESS) return; #if(VKFFT_BACKEND==0) @@ -80,10 +78,6 @@ static inline void appendExtensions(VkFFTSpecializationConstantsLayout* sc) { sc->tempLen = sprintf(sc->tempStr, "#extension GL_EXT_shader_16bit_storage : require\n\n"); PfAppendLine(sc); } - if (sc->dynamicBatch) { //Allow for std430 uniform blocks - sc->tempLen = sprintf(sc->tempStr, "#extension GL_EXT_scalar_block_layout : enable\n\n"); - PfAppendLine(sc); - } #elif(VKFFT_BACKEND==1) if ((((sc->floatTypeInputMemoryCode / 10) % 10) == 0) || (((sc->floatTypeOutputMemoryCode / 10) % 10) == 0) || (((sc->floatTypeCode / 10) % 10) == 0)) { sc->tempLen = sprintf(sc->tempStr, "\ diff --git a/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel0/vkFFT_MemoryManagement/vkFFT_MemoryInitialization/vkFFT_InputOutputLayout.h b/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel0/vkFFT_MemoryManagement/vkFFT_MemoryInitialization/vkFFT_InputOutputLayout.h index 7517dff..620c137 100644 --- a/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel0/vkFFT_MemoryManagement/vkFFT_MemoryInitialization/vkFFT_InputOutputLayout.h +++ b/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel0/vkFFT_MemoryManagement/vkFFT_MemoryInitialization/vkFFT_InputOutputLayout.h @@ -92,36 +92,6 @@ layout(std430, binding = %d) buffer DataOut{\n\ #endif return; } - -static inline void appendCurrentBatchVkFFT(VkFFTSpecializationConstantsLayout* sc, int id) { - if (sc->res != VKFFT_SUCCESS) return; - PfContainer* uintType32; - PfGetTypeFromCode(sc, sc->uintType32Code, &uintType32); - -#if(VKFFT_BACKEND==0) - if (sc->dynamicBatch==1){ - sc->tempLen = sprintf(sc->tempStr, "\ -layout(std430, binding = %d) uniform UniformBufferObject{\n\ - %s N;\n\ - } currentBatch;\n\n", id, uintType32->name); - } - else if (sc->dynamicBatch==2){ - sc->tempLen = sprintf(sc->tempStr, "\ -layout(std430, binding = %d) uniform UniformBufferObject{\n\ - %s Nfwd;\n\ - %s Ninv;\n\ - } currentBatch;\n\n", id, uintType32->name, uintType32->name); - } - PfAppendLine(sc); - -#elif(VKFFT_BACKEND==1) -#elif(VKFFT_BACKEND==2) -#elif((VKFFT_BACKEND==3)||(VKFFT_BACKEND==4)) -#elif(VKFFT_BACKEND==5) -#endif - return; -} - static inline void appendKernelLayoutVkFFT(VkFFTSpecializationConstantsLayout* sc, int id) { if (sc->res != VKFFT_SUCCESS) return; PfContainer* vecType; diff --git a/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel0/vkFFT_MemoryManagement/vkFFT_MemoryInitialization/vkFFT_PushConstants.h b/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel0/vkFFT_MemoryManagement/vkFFT_MemoryInitialization/vkFFT_PushConstants.h index bbfa0a0..12aa811 100644 --- a/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel0/vkFFT_MemoryManagement/vkFFT_MemoryInitialization/vkFFT_PushConstants.h +++ b/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel0/vkFFT_MemoryManagement/vkFFT_MemoryInitialization/vkFFT_PushConstants.h @@ -90,11 +90,6 @@ static inline void appendPushConstants(VkFFTSpecializationConstantsLayout* sc) { sprintf(tempCopyStr, "consts.%s", sc->kernelOffset.name); sprintf(sc->kernelOffset.name, "%s", tempCopyStr); } - // if (sc->performPostCompilationCurrentBatch) { - // appendPushConstant(sc, &sc->currentBatch); - // sprintf(tempCopyStr, "consts.%s", sc->currentBatch.name); - // sprintf(sc->currentBatch.name, "%s", tempCopyStr); - // } #if(VKFFT_BACKEND==0) sc->tempLen = sprintf(sc->tempStr, "} consts;\n\n"); PfAppendLine(sc); diff --git a/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel2/vkFFT_FFT.h b/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel2/vkFFT_FFT.h index aff5209..5d59157 100644 --- a/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel2/vkFFT_FFT.h +++ b/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel2/vkFFT_FFT.h @@ -73,10 +73,6 @@ static inline VkFFTResult shaderGen_FFT(VkFFTSpecializationConstantsLayout* sc, id++; appendOutputLayoutVkFFT(sc, id); id++; - if (sc->dynamicBatch){ - appendCurrentBatchVkFFT(sc, id); - id++; - } if (sc->convolutionStep) { appendKernelLayoutVkFFT(sc, id); id++; diff --git a/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel2/vkFFT_R2C_even_decomposition.h b/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel2/vkFFT_R2C_even_decomposition.h index b7af56f..6319a39 100644 --- a/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel2/vkFFT_R2C_even_decomposition.h +++ b/vkFFT/vkFFT/vkFFT_CodeGen/vkFFT_KernelsLevel2/vkFFT_R2C_even_decomposition.h @@ -64,10 +64,6 @@ static inline VkFFTResult shaderGen_R2C_even_decomposition(VkFFTSpecializationCo id++; appendOutputLayoutVkFFT(sc, id); id++; - if (sc->dynamicBatch){ - appendCurrentBatchVkFFT(sc, id); - id++; - } if (sc->LUT) { appendLUTLayoutVkFFT(sc, id); id++; diff --git a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_CompileKernel.h b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_CompileKernel.h index 733b389..19c3920 100644 --- a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_CompileKernel.h +++ b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_CompileKernel.h @@ -22,7 +22,6 @@ #ifndef VKFFT_COMPILEKERNEL_H #define VKFFT_COMPILEKERNEL_H #include "vkFFT/vkFFT_Structs/vkFFT_Structs.h" -using namespace std; static inline VkFFTResult VkFFT_CompileKernel(VkFFTApplication* app, VkFFTAxis* axis) { #if(VKFFT_BACKEND==0) @@ -163,39 +162,21 @@ static inline VkFFTResult VkFFT_CompileKernel(VkFFTApplication* app, VkFFTAxis* glslang_target_client_version_t client_version = (app->configuration.halfPrecision) ? GLSLANG_TARGET_VULKAN_1_1 : GLSLANG_TARGET_VULKAN_1_0; glslang_target_language_version_t target_language_version = (app->configuration.halfPrecision) ? GLSLANG_TARGET_SPV_1_3 : GLSLANG_TARGET_SPV_1_0; glslang_input_t input = - { - GLSLANG_SOURCE_GLSL, - GLSLANG_STAGE_COMPUTE, - GLSLANG_CLIENT_VULKAN, - client_version, - GLSLANG_TARGET_SPV, - target_language_version, - code0, - 450, - GLSLANG_NO_PROFILE, - 1, - 0, - GLSLANG_MSG_DEFAULT_BIT, - (const glslang_resource_t*)&default_resource, - }; - - if (app->configuration.enableDebug){ - const char* dname = app->configuration.debugName; - ofstream myfile; - std::string fname = ""; - - fname += dname ; - fname += "_kernel_"; - fname += std::to_string(app->debugKernelCounter) +".comp"; - - myfile.open(fname); - myfile << code0; - - myfile.close(); - app->debugKernelCounter++; - } - - + { + GLSLANG_SOURCE_GLSL, + GLSLANG_STAGE_COMPUTE, + GLSLANG_CLIENT_VULKAN, + client_version, + GLSLANG_TARGET_SPV, + target_language_version, + code0, + 450, + GLSLANG_NO_PROFILE, + 1, + 0, + GLSLANG_MSG_DEFAULT_BIT, + (const glslang_resource_t*)&default_resource, + }; //printf("%s\n", code0); glslang_shader_t* shader = glslang_shader_create((const glslang_input_t*)&input); const char* err; diff --git a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_DispatchPlan.h b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_DispatchPlan.h index 1461b23..091e5a1 100644 --- a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_DispatchPlan.h +++ b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_DispatchPlan.h @@ -25,10 +25,6 @@ static inline VkFFTResult VkFFT_DispatchPlan(VkFFTApplication* app, VkFFTAxis* axis, pfUINT* dispatchBlock) { VkFFTResult resFFT = VKFFT_SUCCESS; - - - - if (axis->specializationConstants.swapComputeWorkGroupID == 1) { pfUINT temp = dispatchBlock[0]; dispatchBlock[0] = dispatchBlock[1]; @@ -58,11 +54,10 @@ static inline VkFFTResult VkFFT_DispatchPlan(VkFFTApplication* app, VkFFTAxis* a if (app->configuration.specifyOffsetsAtLaunch) { axis->updatePushConstants = 1; } - //printf("%" PRIu64 " %" PRIu64 " %" PRIu64 "\n", dispatchBlock[0], dispatchBlock[1], dispatchBlock[2]); //printf("%" PRIu64 " %" PRIu64 " %" PRIu64 "\n", blockNumber[0], blockNumber[1], blockNumber[2]); for (pfUINT i = 0; i < 3; i++) - if (blockNumber[i] == 1) blockSize[i] = dispatchBlock[i]; + if (blockNumber[i] == 1) blockSize[i] = dispatchBlock[i]; for (pfUINT i = 0; i < blockNumber[0]; i++) { for (pfUINT j = 0; j < blockNumber[1]; j++) { for (pfUINT k = 0; k < blockNumber[2]; k++) { @@ -112,11 +107,6 @@ static inline VkFFTResult VkFFT_DispatchPlan(VkFFTApplication* app, VkFFTAxis* a memcpy(&axis->pushConstants.data[offset], &temp, sizeof(pfUINT)); offset += sizeof(pfUINT); } - // if (axis->specializationConstants.performPostCompilationCurrentBatch) { - // temp = axis->specializationConstants.currentBatch.data.i; - // memcpy(&axis->pushConstants.data[offset], &temp, sizeof(pfUINT)); - // offset += sizeof(pfUINT); - // } } else { pfUINT offset = 0; @@ -154,11 +144,6 @@ static inline VkFFTResult VkFFT_DispatchPlan(VkFFTApplication* app, VkFFTAxis* a memcpy(&axis->pushConstants.data[offset], &temp, sizeof(uint32_t)); offset += sizeof(uint32_t); } - // if (axis->specializationConstants.performPostCompilationCurrentBatch) { - // temp = (uint32_t)(axis->specializationConstants.currentBatch.data.i); - // memcpy(&axis->pushConstants.data[offset], &temp, sizeof(uint32_t)); - // offset += sizeof(uint32_t); - // } } } dispatchSize[0] = (i == blockNumber[0] - 1) ? lastBlockSize[0] : blockSize[0]; diff --git a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_InitAPIParameters.h b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_InitAPIParameters.h index d5d92c4..da4fa29 100644 --- a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_InitAPIParameters.h +++ b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_InitAPIParameters.h @@ -389,11 +389,6 @@ static inline VkFFTResult initParametersAPI(VkFFTApplication* app, VkFFTSpeciali PfAllocateContainerFlexible(sc, &sc->kernelOffset, 50); sprintf(sc->kernelOffset.name, "kernelOffset"); } - // if (sc->performPostCompilationCurrentBatch) { - // sc->currentBatch.type = 100 + sc->uintTypeCode; - // PfAllocateContainerFlexible(sc, &sc->currentBatch, 50); - // sprintf(sc->currentBatch.name, "currentBatch"); - // } #if(VKFFT_BACKEND==0) sprintf(sc->inputsStruct.name, "inputs"); sprintf(sc->outputsStruct.name, "outputs"); @@ -635,9 +630,6 @@ static inline VkFFTResult freeParametersAPI(VkFFTApplication* app, VkFFTSpeciali if (sc->performPostCompilationKernelOffset) { PfDeallocateContainer(sc, &sc->kernelOffset); } - // if (sc->performPostCompilationCurrentBatch) { - // PfDeallocateContainer(sc, &sc->currentBatch); - // } return res; } diff --git a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_UpdateBuffers.h b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_UpdateBuffers.h index f387b99..07f4755 100644 --- a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_UpdateBuffers.h +++ b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_UpdateBuffers.h @@ -235,23 +235,10 @@ static inline VkFFTResult VkFFTConfigureDescriptors(VkFFTApplication* app, VkFFT axis->specializationConstants.numBuffersBound[1] = (int)axis->specializationConstants.outputBufferBlockNum; axis->specializationConstants.numBuffersBound[2] = 0; axis->specializationConstants.numBuffersBound[3] = 0; - #if(VKFFT_BACKEND==0) VkDescriptorPoolSize descriptorPoolSize = { VK_DESCRIPTOR_TYPE_STORAGE_BUFFER }; descriptorPoolSize.descriptorCount = (uint32_t)(axis->specializationConstants.inputBufferBlockNum + axis->specializationConstants.outputBufferBlockNum); #endif - - - if (app->configuration.dynamicBatch >= 1) { - axis->specializationConstants.currentBatchBindingID = (int)axis->numBindings; - axis->specializationConstants.numBuffersBound[axis->numBindings] = 1; -#if(VKFFT_BACKEND==0) - descriptorPoolSize.descriptorCount++; -#endif - axis->numBindings++; - } - - axis->specializationConstants.convolutionBindingID = -1; if ((axis_id == (app->configuration.FFTdim-1)) && (axis_upload_id == 0) && (app->configuration.performConvolution)) { axis->specializationConstants.convolutionBindingID = (int)axis->numBindings; @@ -571,12 +558,10 @@ static inline VkFFTResult VkFFTConfigureDescriptorsR2CMultiUploadDecomposition(V axis->specializationConstants.numBuffersBound[2] = 0; axis->specializationConstants.numBuffersBound[3] = 0; - #if(VKFFT_BACKEND==0) VkDescriptorPoolSize descriptorPoolSize = { VK_DESCRIPTOR_TYPE_STORAGE_BUFFER }; descriptorPoolSize.descriptorCount = (uint32_t)(axis->specializationConstants.numBuffersBound[0] + axis->specializationConstants.numBuffersBound[1]); #endif - if ((axis_id == (app->configuration.FFTdim-1)) && (axis_upload_id == 0) && (app->configuration.performConvolution)) { axis->specializationConstants.convolutionBindingID = (int)axis->numBindings; axis->specializationConstants.numBuffersBound[axis->numBindings] = (int)axis->specializationConstants.kernelBlockNum; @@ -585,15 +570,6 @@ static inline VkFFTResult VkFFTConfigureDescriptorsR2CMultiUploadDecomposition(V #endif axis->numBindings++; } - - if (app->configuration.dynamicBatch >= 1) { - axis->specializationConstants.currentBatchBindingID = (int)axis->numBindings; - axis->specializationConstants.numBuffersBound[axis->numBindings] = 1; -#if(VKFFT_BACKEND==0) - descriptorPoolSize.descriptorCount++; -#endif - axis->numBindings++; - } if (app->configuration.useLUT == 1) { axis->specializationConstants.LUTBindingID = (int)axis->numBindings; @@ -603,10 +579,6 @@ static inline VkFFTResult VkFFTConfigureDescriptorsR2CMultiUploadDecomposition(V #endif axis->numBindings++; } - - - - #if(VKFFT_BACKEND==0) VkResult res = VK_SUCCESS; VkDescriptorPoolCreateInfo descriptorPoolCreateInfo = { VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO }; @@ -805,22 +777,12 @@ static inline VkFFTResult VkFFTCheckUpdateBufferSet(VkFFTApplication* app, VkFFT } static inline VkFFTResult VkFFTUpdateBufferSet(VkFFTApplication* app, VkFFTPlan* FFTPlan, VkFFTAxis* axis, pfUINT axis_id, pfUINT axis_upload_id, pfUINT inverse) { - ofstream myfile; - if (app->configuration.enableDebug){ - const char* dname = app->configuration.debugName; - std::string fname = ""; - fname += dname ; - fname += "_buffer_"; - fname += std::to_string(app->debugUpdateCounter) + ".txt"; - myfile.open(fname); - myfile << "FFT" <specializationConstants.performOffsetUpdate || axis->specializationConstants.performBufferSetUpdate) { axis->specializationConstants.inputOffset.type = 31; axis->specializationConstants.outputOffset.type = 31; axis->specializationConstants.kernelOffset.type = 31; #if(VKFFT_BACKEND==0) - VkDescriptorType descriptorType; + const VkDescriptorType descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; #endif for (pfUINT i = 0; i < axis->numBindings; ++i) { for (pfUINT j = 0; j < axis->specializationConstants.numBuffersBound[i]; ++j) { @@ -828,7 +790,6 @@ static inline VkFFTResult VkFFTUpdateBufferSet(VkFFTApplication* app, VkFFTPlan* VkDescriptorBufferInfo descriptorBufferInfo = { 0 }; #endif if (i == 0) { - descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; if ((axis_upload_id == FFTPlan->numAxisUploads[axis_id] - 1) && (app->configuration.isInputFormatted) && (!axis->specializationConstants.reverseBluesteinMultiUpload) && ( ((axis_id == app->firstAxis) && (!inverse)) || ((axis_id == app->lastAxis) && (inverse) && (!((axis_id == 0) && (axis->specializationConstants.performR2CmultiUpload))) && (!app->configuration.performConvolution) && (!app->configuration.inverseReturnToInputBuffer))) @@ -854,9 +815,6 @@ static inline VkFFTResult VkFFTUpdateBufferSet(VkFFTApplication* app, VkFFTPlan* descriptorBufferInfo.buffer = app->configuration.inputBuffer[bufferId]; descriptorBufferInfo.range = (axis->specializationConstants.inputBufferBlockSize); descriptorBufferInfo.offset = offset * (axis->specializationConstants.inputBufferBlockSize); - if (app->configuration.enableDebug){ - myfile << "i = 0, IN0 - Range: " << descriptorBufferInfo.range << " , Offset: " << descriptorBufferInfo.offset <specializationConstants.performOffsetUpdate) { @@ -886,10 +844,6 @@ static inline VkFFTResult VkFFTUpdateBufferSet(VkFFTApplication* app, VkFFTPlan* descriptorBufferInfo.buffer = app->configuration.outputBuffer[bufferId]; descriptorBufferInfo.range = (axis->specializationConstants.inputBufferBlockSize); descriptorBufferInfo.offset = offset * (axis->specializationConstants.inputBufferBlockSize); - if (app->configuration.enableDebug){ - myfile << "i = 0, OUT(IN) - Range: " << descriptorBufferInfo.range << " , Offset: " << descriptorBufferInfo.offset <specializationConstants.performOffsetUpdate) { @@ -976,9 +930,6 @@ static inline VkFFTResult VkFFTUpdateBufferSet(VkFFTApplication* app, VkFFTPlan* //descriptorBufferInfo.range = (axis->specializationConstants.inputBufferBlockSize); descriptorBufferInfo.range = app->configuration.bufferSize[0]; descriptorBufferInfo.offset = offset * (axis->specializationConstants.inputBufferBlockSize); - if (app->configuration.enableDebug){ - myfile << "i = 0, IN1 - Range: " << descriptorBufferInfo.range << " , Offset: " << descriptorBufferInfo.offset <useBluesteinFFT[axis_id]) && (app->configuration.isOutputFormatted && ( ((axis_id == app->firstAxis) && (inverse)) || ((axis_id == app->lastAxis) && (!inverse) && (!app->configuration.performConvolution)) @@ -1020,8 +970,6 @@ static inline VkFFTResult VkFFTUpdateBufferSet(VkFFTApplication* app, VkFFTPlan* descriptorBufferInfo.buffer = app->configuration.outputBuffer[bufferId]; descriptorBufferInfo.range = (axis->specializationConstants.outputBufferBlockSize); descriptorBufferInfo.offset = offset * (axis->specializationConstants.outputBufferBlockSize); - if (app->configuration.enableDebug){ - myfile << "i = 1, OUT0 - Range: " << descriptorBufferInfo.range << " , Offset: " << descriptorBufferInfo.offset <specializationConstants.performOffsetUpdate) { @@ -1161,27 +1109,12 @@ static inline VkFFTResult VkFFTUpdateBufferSet(VkFFTApplication* app, VkFFTPlan* //descriptorBufferInfo.range = (axis->specializationConstants.outputBufferBlockSize); descriptorBufferInfo.range = app->configuration.bufferSize[0]; descriptorBufferInfo.offset = offset * (axis->specializationConstants.outputBufferBlockSize); - if (app->configuration.enableDebug){ - myfile << "i = 1, OUT1 - Range: " << descriptorBufferInfo.range << " , Offset: " << descriptorBufferInfo.offset <specializationConstants.currentBatchBindingID) && (app->configuration.dynamicBatch)){ -#if(VKFFT_BACKEND==0) - descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - - if (axis->specializationConstants.performBufferSetUpdate) { - descriptorBufferInfo.buffer = app->configuration.currentBatchUBO; - descriptorBufferInfo.offset = app->configuration.currentBatchUBOOffset; - descriptorBufferInfo.range = app->configuration.currentBatchUBOSize; - } -#endif - } if ((i == axis->specializationConstants.convolutionBindingID) && (app->configuration.performConvolution)) { - descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; if (axis->specializationConstants.performBufferSetUpdate) { pfUINT bufferId = 0; pfUINT offset = j; @@ -1209,7 +1142,6 @@ static inline VkFFTResult VkFFTUpdateBufferSet(VkFFTApplication* app, VkFFTPlan* } if ((i == axis->specializationConstants.LUTBindingID) && (app->configuration.useLUT == 1)) { #if(VKFFT_BACKEND==0) - descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; if (axis->specializationConstants.performBufferSetUpdate) { descriptorBufferInfo.buffer = axis->bufferLUT; descriptorBufferInfo.offset = 0; @@ -1219,7 +1151,6 @@ static inline VkFFTResult VkFFTUpdateBufferSet(VkFFTApplication* app, VkFFTPlan* } if ((i == axis->specializationConstants.RaderUintLUTBindingID) && (axis->specializationConstants.raderUintLUT)) { #if(VKFFT_BACKEND==0) - descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; if (axis->specializationConstants.performBufferSetUpdate) { descriptorBufferInfo.buffer = axis->bufferRaderUintLUT; descriptorBufferInfo.offset = 0; @@ -1229,7 +1160,6 @@ static inline VkFFTResult VkFFTUpdateBufferSet(VkFFTApplication* app, VkFFTPlan* } if ((i == axis->specializationConstants.BluesteinConvolutionBindingID) && (app->useBluesteinFFT[axis_id]) && (axis_upload_id == 0)) { #if(VKFFT_BACKEND==0) - descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; if (axis->specializationConstants.performBufferSetUpdate) { if (axis->specializationConstants.inverseBluestein) descriptorBufferInfo.buffer = app->bufferBluesteinIFFT[axis_id]; @@ -1242,7 +1172,6 @@ static inline VkFFTResult VkFFTUpdateBufferSet(VkFFTApplication* app, VkFFTPlan* } if ((i == axis->specializationConstants.BluesteinMultiplicationBindingID) && (app->useBluesteinFFT[axis_id]) && (axis_upload_id == (FFTPlan->numAxisUploads[axis_id] - 1))) { #if(VKFFT_BACKEND==0) - descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; if (axis->specializationConstants.performBufferSetUpdate) { descriptorBufferInfo.buffer = app->bufferBluestein[axis_id]; descriptorBufferInfo.offset = 0; @@ -1271,27 +1200,13 @@ static inline VkFFTResult VkFFTUpdateBufferSet(VkFFTApplication* app, VkFFTPlan* if (axis->specializationConstants.performOffsetUpdate) { axis->specializationConstants.performOffsetUpdate = 0; } - if (app->configuration.enableDebug){ - myfile.close();} - app->debugUpdateCounter++; return VKFFT_SUCCESS; } static inline VkFFTResult VkFFTUpdateBufferSetR2CMultiUploadDecomposition(VkFFTApplication* app, VkFFTPlan* FFTPlan, VkFFTAxis* axis, pfUINT axis_id, pfUINT axis_upload_id, pfUINT inverse) { - ofstream myfile; - if (app->configuration.enableDebug){ - const char* dname = app->configuration.debugName; - std::string fname = ""; - fname += dname ; - fname += "_buffer_"; - fname += std::to_string(app->debugUpdateCounter) + ".txt"; - myfile.open(fname); - myfile << "R2C" <specializationConstants.performOffsetUpdate || axis->specializationConstants.performBufferSetUpdate) { #if(VKFFT_BACKEND==0) - VkDescriptorType descriptorType; + const VkDescriptorType descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; #endif for (pfUINT i = 0; i < axis->numBindings; ++i) { for (pfUINT j = 0; j < axis->specializationConstants.numBuffersBound[i]; ++j) { @@ -1299,7 +1214,6 @@ static inline VkFFTResult VkFFTUpdateBufferSetR2CMultiUploadDecomposition(VkFFTA VkDescriptorBufferInfo descriptorBufferInfo = { 0 }; #endif if (i == 0) { - descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; if (inverse) { if ((axis_upload_id == FFTPlan->numAxisUploads[axis_id] - 1) && (app->configuration.isInputFormatted) && (!axis->specializationConstants.reverseBluesteinMultiUpload) && ( ((axis_id == app->firstAxis) && (!inverse)) @@ -1325,9 +1239,6 @@ static inline VkFFTResult VkFFTUpdateBufferSetR2CMultiUploadDecomposition(VkFFTA descriptorBufferInfo.buffer = app->configuration.inputBuffer[bufferId]; descriptorBufferInfo.range = (axis->specializationConstants.inputBufferBlockSize); descriptorBufferInfo.offset = offset * (axis->specializationConstants.inputBufferBlockSize); - if (app->configuration.enableDebug){ - myfile << "i = 0, IN100 - Range: " << descriptorBufferInfo.range << " , Offset: " << descriptorBufferInfo.offset <specializationConstants.performOffsetUpdate) { @@ -1356,9 +1267,6 @@ static inline VkFFTResult VkFFTUpdateBufferSetR2CMultiUploadDecomposition(VkFFTA descriptorBufferInfo.buffer = app->configuration.outputBuffer[bufferId]; descriptorBufferInfo.range = (axis->specializationConstants.inputBufferBlockSize); descriptorBufferInfo.offset = offset * (axis->specializationConstants.inputBufferBlockSize); - if (app->configuration.enableDebug){ - myfile << "i = 0, OUT(IN)100 - Range: " << descriptorBufferInfo.range << " , Offset: " << descriptorBufferInfo.offset <specializationConstants.performOffsetUpdate) { @@ -1386,9 +1294,6 @@ static inline VkFFTResult VkFFTUpdateBufferSetR2CMultiUploadDecomposition(VkFFTA descriptorBufferInfo.buffer = app->configuration.buffer[bufferId]; descriptorBufferInfo.range = (axis->specializationConstants.inputBufferBlockSize); descriptorBufferInfo.offset = offset * (axis->specializationConstants.inputBufferBlockSize); - if (app->configuration.enableDebug){ - myfile << "i = 0, IN101 - Range: " << descriptorBufferInfo.range << " , Offset: " << descriptorBufferInfo.offset <specializationConstants.performOffsetUpdate) { @@ -1431,9 +1336,6 @@ static inline VkFFTResult VkFFTUpdateBufferSetR2CMultiUploadDecomposition(VkFFTA descriptorBufferInfo.buffer = app->configuration.outputBuffer[bufferId]; descriptorBufferInfo.range = (axis->specializationConstants.outputBufferBlockSize); descriptorBufferInfo.offset = offset * (axis->specializationConstants.outputBufferBlockSize); - if (app->configuration.enableDebug){ - myfile << "i = 0, OUT100 - Range: " << descriptorBufferInfo.range << " , Offset: " << descriptorBufferInfo.offset <specializationConstants.performOffsetUpdate) { @@ -1461,9 +1363,6 @@ static inline VkFFTResult VkFFTUpdateBufferSetR2CMultiUploadDecomposition(VkFFTA descriptorBufferInfo.buffer = app->configuration.buffer[bufferId]; descriptorBufferInfo.range = (axis->specializationConstants.outputBufferBlockSize); descriptorBufferInfo.offset = offset * (axis->specializationConstants.outputBufferBlockSize); - if (app->configuration.enableDebug){ - myfile << "i = 0, OUT101 - Range: " << descriptorBufferInfo.range << " , Offset: " << descriptorBufferInfo.offset <specializationConstants.performOffsetUpdate) { @@ -1473,7 +1372,6 @@ static inline VkFFTResult VkFFTUpdateBufferSetR2CMultiUploadDecomposition(VkFFTA } } if (i == 1) { - descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; if (inverse) { if ((axis_upload_id == 0) && (app->configuration.numberKernels > 1) && (inverse) && (!app->configuration.performConvolution)) { if (axis->specializationConstants.performBufferSetUpdate) { @@ -1496,9 +1394,6 @@ static inline VkFFTResult VkFFTUpdateBufferSetR2CMultiUploadDecomposition(VkFFTA descriptorBufferInfo.buffer = app->configuration.outputBuffer[bufferId]; descriptorBufferInfo.range = (axis->specializationConstants.outputBufferBlockSize); descriptorBufferInfo.offset = offset * (axis->specializationConstants.outputBufferBlockSize); - if (app->configuration.enableDebug){ - myfile << "i = 1, OUT102 - Range: " << descriptorBufferInfo.range << " , Offset: " << descriptorBufferInfo.offset <specializationConstants.performOffsetUpdate) { @@ -1527,9 +1422,6 @@ static inline VkFFTResult VkFFTUpdateBufferSetR2CMultiUploadDecomposition(VkFFTA descriptorBufferInfo.buffer = app->configuration.tempBuffer[bufferId]; descriptorBufferInfo.range = (axis->specializationConstants.outputBufferBlockSize); descriptorBufferInfo.offset = offset * (axis->specializationConstants.outputBufferBlockSize); - if (app->configuration.enableDebug){ - myfile << "i = 1, OUT103 - Range: " << descriptorBufferInfo.range << " , Offset: " << descriptorBufferInfo.offset <specializationConstants.performOffsetUpdate) { @@ -1555,9 +1447,6 @@ static inline VkFFTResult VkFFTUpdateBufferSetR2CMultiUploadDecomposition(VkFFTA descriptorBufferInfo.buffer = app->configuration.buffer[bufferId]; descriptorBufferInfo.range = (axis->specializationConstants.outputBufferBlockSize); descriptorBufferInfo.offset = offset * (axis->specializationConstants.outputBufferBlockSize); - if (app->configuration.enableDebug){ - myfile << "i = 1, OUT104 - Range: " << descriptorBufferInfo.range << " , Offset: " << descriptorBufferInfo.offset <specializationConstants.performOffsetUpdate) { @@ -1600,9 +1489,6 @@ static inline VkFFTResult VkFFTUpdateBufferSetR2CMultiUploadDecomposition(VkFFTA descriptorBufferInfo.buffer = app->configuration.outputBuffer[bufferId]; descriptorBufferInfo.range = (axis->specializationConstants.outputBufferBlockSize); descriptorBufferInfo.offset = offset * (axis->specializationConstants.outputBufferBlockSize); - if (app->configuration.enableDebug){ - myfile << "i = 1, OUT105 - Range: " << descriptorBufferInfo.range << " , Offset: " << descriptorBufferInfo.offset <specializationConstants.performOffsetUpdate) { @@ -1630,9 +1516,6 @@ static inline VkFFTResult VkFFTUpdateBufferSetR2CMultiUploadDecomposition(VkFFTA descriptorBufferInfo.buffer = app->configuration.buffer[bufferId]; descriptorBufferInfo.range = (axis->specializationConstants.outputBufferBlockSize); descriptorBufferInfo.offset = offset * (axis->specializationConstants.outputBufferBlockSize); - if (app->configuration.enableDebug){ - myfile << "i = 1, OUT106 - Range: " << descriptorBufferInfo.range << " , Offset: " << descriptorBufferInfo.offset <specializationConstants.performOffsetUpdate) { @@ -1642,7 +1525,6 @@ static inline VkFFTResult VkFFTUpdateBufferSetR2CMultiUploadDecomposition(VkFFTA } } if ((i == axis->specializationConstants.convolutionBindingID) && (app->configuration.performConvolution)) { - descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; if (axis->specializationConstants.performBufferSetUpdate) { pfUINT bufferId = 0; pfUINT offset = j; @@ -1662,26 +1544,14 @@ static inline VkFFTResult VkFFTUpdateBufferSetR2CMultiUploadDecomposition(VkFFTA descriptorBufferInfo.buffer = app->configuration.kernel[bufferId]; descriptorBufferInfo.range = (axis->specializationConstants.kernelBlockSize); descriptorBufferInfo.offset = offset * (axis->specializationConstants.kernelBlockSize); - #endif } if (axis->specializationConstants.performOffsetUpdate) { axis->specializationConstants.kernelOffset.data.i = app->configuration.kernelOffset; } } - if ((i == axis->specializationConstants.currentBatchBindingID) && (app->configuration.dynamicBatch)) { -#if(VKFFT_BACKEND==0) - descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - if (axis->specializationConstants.performBufferSetUpdate) { - descriptorBufferInfo.buffer = app->configuration.currentBatchUBO; - descriptorBufferInfo.offset = app->configuration.currentBatchUBOOffset; - descriptorBufferInfo.range = app->configuration.currentBatchUBOSize; - } -#endif - } if ((i == axis->specializationConstants.LUTBindingID) && (app->configuration.useLUT == 1)) { #if(VKFFT_BACKEND==0) - descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; if (axis->specializationConstants.performBufferSetUpdate) { descriptorBufferInfo.buffer = axis->bufferLUT; descriptorBufferInfo.offset = 0; @@ -1710,10 +1580,6 @@ static inline VkFFTResult VkFFTUpdateBufferSetR2CMultiUploadDecomposition(VkFFTA if (axis->specializationConstants.performOffsetUpdate) { axis->specializationConstants.performOffsetUpdate = 0; } - if (app->configuration.enableDebug){ - myfile.close(); - } - app->debugUpdateCounter++; return VKFFT_SUCCESS; } diff --git a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_Plans/vkFFT_Plan_FFT.h b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_Plans/vkFFT_Plan_FFT.h index 5b767ba..f61c308 100644 --- a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_Plans/vkFFT_Plan_FFT.h +++ b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_Plans/vkFFT_Plan_FFT.h @@ -425,8 +425,6 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla if (app->configuration.specifyOffsetsAtLaunch) { axis->specializationConstants.performPostCompilationInputOffset = 1; axis->specializationConstants.performPostCompilationOutputOffset = 1; - //axis->specializationConstants.performPostCompilationCurrentBatch = 1; - if (app->configuration.performConvolution) axis->specializationConstants.performPostCompilationKernelOffset = 1; } @@ -437,8 +435,6 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla axis->specializationConstants.outputOffset.data.i = app->configuration.outputBufferOffset; axis->specializationConstants.kernelOffset.type = 31; axis->specializationConstants.kernelOffset.data.i = app->configuration.kernelOffset; - //axis->specializationConstants.currentBatch.type = 31; - //axis->specializationConstants.currentBatch.data.i = app->configuration.numberBatches; } resFFT = VkFFTCheckUpdateBufferSet(app, axis, 1, 0); @@ -563,9 +559,6 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla else axis->specializationConstants.zeropad[1] = 0; } - if (app->configuration.dynamicBatch){ - axis->specializationConstants.dynamicBatch = app->configuration.dynamicBatch; - } if ((app->configuration.FFTdim - 1 == axis_id) && (axis_upload_id == 0) && (app->configuration.performConvolution)) { axis->specializationConstants.convolutionStep = 1; } @@ -679,10 +672,6 @@ static inline VkFFTResult VkFFTPlanAxis(VkFFTApplication* app, VkFFTPlan* FFTPla axis->pushConstants.performPostCompilationKernelOffset = 1; axis->pushConstants.structSize += 1; } - // if (axis->specializationConstants.performPostCompilationCurrentBatch) { - // axis->pushConstants.performPostCompilationCurrentBatch = 1; - // axis->pushConstants.structSize += 1; - // } if (app->configuration.useUint64) axis->pushConstants.structSize *= sizeof(pfUINT); else diff --git a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_Plans/vkFFT_Plan_R2C.h b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_Plans/vkFFT_Plan_R2C.h index 89e905e..c060cc5 100644 --- a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_Plans/vkFFT_Plan_R2C.h +++ b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_Plans/vkFFT_Plan_R2C.h @@ -138,8 +138,6 @@ static inline VkFFTResult VkFFTPlanR2CMultiUploadDecomposition(VkFFTApplication* if (app->configuration.specifyOffsetsAtLaunch) { axis->specializationConstants.performPostCompilationInputOffset = 1; axis->specializationConstants.performPostCompilationOutputOffset = 1; - //axis->specializationConstants.performPostCompilationCurrentBatch = 1; - if (app->configuration.performConvolution) axis->specializationConstants.performPostCompilationKernelOffset = 1; } @@ -259,9 +257,6 @@ static inline VkFFTResult VkFFTPlanR2CMultiUploadDecomposition(VkFFTApplication* else axis->specializationConstants.zeropad[1] = 0; }*/ - if (app->configuration.dynamicBatch){ - axis->specializationConstants.dynamicBatch = app->configuration.dynamicBatch; - } if ((app->configuration.FFTdim == 1) && (app->configuration.performConvolution)) { axis->specializationConstants.convolutionStep = 1; } @@ -295,10 +290,6 @@ static inline VkFFTResult VkFFTPlanR2CMultiUploadDecomposition(VkFFTApplication* axis->pushConstants.performPostCompilationKernelOffset = 1; axis->pushConstants.structSize += 1; } - // if (axis->specializationConstants.performPostCompilationCurrentBatch) { - // axis->pushConstants.performPostCompilationCurrentBatch = 1; - // axis->pushConstants.structSize += 1; - // } if (app->configuration.useUint64) axis->pushConstants.structSize *= sizeof(pfUINT); else diff --git a/vkFFT/vkFFT/vkFFT_Structs/vkFFT_Structs.h b/vkFFT/vkFFT/vkFFT_Structs/vkFFT_Structs.h index cae5402..eb0d468 100644 --- a/vkFFT/vkFFT/vkFFT_Structs/vkFFT_Structs.h +++ b/vkFFT/vkFFT/vkFFT_Structs/vkFFT_Structs.h @@ -143,18 +143,15 @@ typedef struct { pfUINT* inputBufferSize;//array of input buffers sizes in bytes, if isInputFormatted is enabled pfUINT* outputBufferSize;//array of output buffers sizes in bytes, if isOutputFormatted is enabled pfUINT* kernelSize;//array of kernel buffers sizes in bytes, if performConvolution is enabled - pfUINT currentBatchUBOSize; //size of the uniform buffer containing the number of batches to perform. Set to 4 if dynamicBatch=1 and set to 8 if dynamicBatch=2. - + #if(VKFFT_BACKEND==0) VkBuffer* buffer;//pointer to array of buffers (or one buffer) used for computations VkBuffer* tempBuffer;//needed if reorderFourStep is enabled to transpose the array. Same sum size or bigger as buffer (can be split in multiple). Default 0. Setting to non zero value enables manual user allocation VkBuffer* inputBuffer;//pointer to array of input buffers (or one buffer) used to read data from if isInputFormatted is enabled VkBuffer* outputBuffer;//pointer to array of output buffers (or one buffer) used for write data to if isOutputFormatted is enabled VkBuffer* kernel;//pointer to array of kernel buffers (or one buffer) used for read kernel data from if performConvolution is enabled - VkBuffer currentBatchUBO; //uniform buffer that contains the number of batches to perform when dynamicBatch >= 1. VkBuffer indirectBuffer; unsigned int* indirectHostPointer; - #elif(VKFFT_BACKEND==1) void** buffer;//pointer to device buffer used for computations void** tempBuffer;//needed if reorderFourStep is enabled to transpose the array. Same size as buffer. Default 0. Setting to non zero value enables manual user allocation @@ -191,7 +188,6 @@ typedef struct { pfUINT inputBufferOffset;//specify if VkFFT has to offset the first element position inside the input buffer. In bytes. Default 0 pfUINT outputBufferOffset;//specify if VkFFT has to offset the first element position inside the output buffer. In bytes. Default 0 pfUINT kernelOffset;//specify if VkFFT has to offset the first element position inside the kernel. In bytes. Default 0 - pfUINT currentBatchUBOOffset; //byte offset of the number of batches in the within the currentBatchUBO pfUINT indirectBufferOffset; pfUINT specifyOffsetsAtLaunch;//specify if offsets will be selected with launch parameters VkFFTLaunchParams (0 - off, 1 - on). Default 0 @@ -328,10 +324,7 @@ typedef struct { MTL::CommandBuffer* commandBuffer;//Filled at app execution MTL::ComputeCommandEncoder* commandEncoder;//Filled at app execution #endif - pfUINT dynamicBatch; //set to 1 to dynamically limit the number of batches using the currentBatchUBO buffer. set to 2 for different numbers for fwd and inv. pfUINT indirectDispatch; //0 for direct, 1 for fwd indirect, 2 for inv indirec, 3 for both - const char* debugName; - pfUINT enableDebug; } VkFFTConfiguration;//parameters specified at plan creation typedef struct { @@ -826,7 +819,6 @@ typedef struct { int swapComputeWorkGroupID; int convolutionStep; - int dynamicBatch; int symmetricKernel; int supportAxis; int cacheShuffle; @@ -848,7 +840,6 @@ typedef struct { int forceCallbackVersionRealTransforms; int numBuffersBound[10]; - int currentBatchBindingID; int convolutionBindingID; int LUTBindingID; int BluesteinConvolutionBindingID; @@ -1011,7 +1002,6 @@ typedef struct { //int outputType; PfContainer inputsStruct; PfContainer outputsStruct; - PfContainer batchStruct; PfContainer kernelStruct; PfContainer sdataStruct; PfContainer LUTStruct; @@ -1203,8 +1193,6 @@ typedef struct { pfUINT applicationStringSize;//size of saveApplicationString in bytes void* saveApplicationString;//memory array(uint32_t* for Vulkan, char* for CUDA/HIP/OpenCL) through which user can access VkFFT generated binaries. (will be allocated by VkFFT, deallocated with deleteVkFFT call) - pfUINT debugKernelCounter; - pfUINT debugUpdateCounter; } VkFFTApplication; #endif From 74678cb15b2c43be5ffeda80a05eba648f224a08 Mon Sep 17 00:00:00 2001 From: Dirk van den Bekerom Date: Sat, 29 Mar 2025 19:01:37 +0100 Subject: [PATCH 15/18] test for inverse and add summary documentation --- .../vkFFT_AppManagement/vkFFT_InitializeApp.h | 7 ++++++- .../vkFFT_API_handles/vkFFT_DispatchPlan.h | 14 +++++++++----- vkFFT/vkFFT/vkFFT_Structs/vkFFT_Structs.h | 8 ++++---- 3 files changed, 19 insertions(+), 10 deletions(-) diff --git a/vkFFT/vkFFT/vkFFT_AppManagement/vkFFT_InitializeApp.h b/vkFFT/vkFFT/vkFFT_AppManagement/vkFFT_InitializeApp.h index 22f0eee..8d0250a 100644 --- a/vkFFT/vkFFT/vkFFT_AppManagement/vkFFT_InitializeApp.h +++ b/vkFFT/vkFFT/vkFFT_AppManagement/vkFFT_InitializeApp.h @@ -969,7 +969,12 @@ static inline VkFFTResult setConfigurationVkFFT(VkFFTApplication* app, VkFFTConf if (inputLaunchConfiguration.indirectDispatch != 0) { app->configuration.indirectDispatch = inputLaunchConfiguration.indirectDispatch; app->configuration.indirectBuffer = inputLaunchConfiguration.indirectBuffer; - app->configuration.indirectBufferOffset = inputLaunchConfiguration.indirectBufferOffset; + if (indirectBufferOffset){ + app->configuration.indirectBufferOffset = inputLaunchConfiguration.indirectBufferOffset; + } + else{ + app->configuration.indirectBufferOffset = 0; + } app->configuration.indirectHostPointer = inputLaunchConfiguration.indirectHostPointer; } resFFT = initializeBluesteinAutoPadding(app); diff --git a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_DispatchPlan.h b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_DispatchPlan.h index 091e5a1..5116cf8 100644 --- a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_DispatchPlan.h +++ b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_DispatchPlan.h @@ -153,17 +153,21 @@ static inline VkFFTResult VkFFT_DispatchPlan(VkFFTApplication* app, VkFFTAxis* a if (axis->pushConstants.structSize > 0) { vkCmdPushConstants(app->configuration.commandBuffer[0], axis->pipelineLayout, VK_SHADER_STAGE_COMPUTE_BIT, 0, (uint32_t)axis->pushConstants.structSize, axis->pushConstants.data); } - bool indirect_dispatch = (app->configuration.indirectDispatch && app->configuration.indirectHostPointer != nullptr); + bool indirect_dispatch; + if (axis->specializationConstants.inverse){ + indirect_dispatch = ((app->configuration.indirectDispatch & 0x2) && (app->configuration.indirectHostPointer != nullptr)); + } + else{ + indirect_dispatch = ((app->configuration.indirectDispatch & 0x1) && (app->configuration.indirectHostPointer != nullptr)); + } + pfUINT indirect_offset; if (indirect_dispatch){ unsigned int* host_indirect = (unsigned int*)((char*)app->configuration.indirectHostPointer + app->configuration.indirectBufferOffset + app->indirectDispatchID*16); host_indirect[0] = (uint32_t)dispatchSize[0]; host_indirect[1] = (uint32_t)dispatchSize[1]; host_indirect[2] = (uint32_t)dispatchSize[2]; - host_indirect[3] = axis->batchWorkGroup; -; - //host_indirect[3] = 1000*axis->specializationConstants.performR2C + axis->specializationConstants.performR2CmultiUpload; - //host_indirect[3] = axis->specializationConstants.inverse + axis->specializationConstants.performR2CmultiUpload * 2;// + axis->specializationConstants.axis_upload_id * 10 + axis->specializationConstants.axis_id; + host_indirect[3] = axis->batchWorkGroup + 10000*axis->specializationConstants.inverse; indirect_offset = app->configuration.indirectBufferOffset + 16*app->indirectDispatchID; vkCmdDispatchIndirect(app->configuration.commandBuffer[0], app->configuration.indirectBuffer, indirect_offset); app->indirectDispatchID++; diff --git a/vkFFT/vkFFT/vkFFT_Structs/vkFFT_Structs.h b/vkFFT/vkFFT/vkFFT_Structs/vkFFT_Structs.h index eb0d468..a800403 100644 --- a/vkFFT/vkFFT/vkFFT_Structs/vkFFT_Structs.h +++ b/vkFFT/vkFFT/vkFFT_Structs/vkFFT_Structs.h @@ -150,8 +150,8 @@ typedef struct { VkBuffer* inputBuffer;//pointer to array of input buffers (or one buffer) used to read data from if isInputFormatted is enabled VkBuffer* outputBuffer;//pointer to array of output buffers (or one buffer) used for write data to if isOutputFormatted is enabled VkBuffer* kernel;//pointer to array of kernel buffers (or one buffer) used for read kernel data from if performConvolution is enabled - VkBuffer indirectBuffer; - unsigned int* indirectHostPointer; + VkBuffer indirectBuffer; //buffer that contains workgroupsizes for indirect dispatch. Size hould be at least 4 x 4 bytes x the number of dispatches. + unsigned int* indirectHostPointer; // pointer to the array with the indirect workgroup sizes on the host side. During dispatch this array will be filled by VkFFT, which can later be updated by user. format us uint[4] = {x_size, y_size, z_size, id}, // with id the axis that contains the batch number (0=x, 1=y, 2=z), plus 10000 if the dispatch concerns an inverse FFT. #elif(VKFFT_BACKEND==1) void** buffer;//pointer to device buffer used for computations void** tempBuffer;//needed if reorderFourStep is enabled to transpose the array. Same size as buffer. Default 0. Setting to non zero value enables manual user allocation @@ -188,7 +188,7 @@ typedef struct { pfUINT inputBufferOffset;//specify if VkFFT has to offset the first element position inside the input buffer. In bytes. Default 0 pfUINT outputBufferOffset;//specify if VkFFT has to offset the first element position inside the output buffer. In bytes. Default 0 pfUINT kernelOffset;//specify if VkFFT has to offset the first element position inside the kernel. In bytes. Default 0 - pfUINT indirectBufferOffset; + pfUINT indirectBufferOffset; //specify if VkFFT has to offset the first element posigion inside the indirectBuffer. In bytes. Default 0 pfUINT specifyOffsetsAtLaunch;//specify if offsets will be selected with launch parameters VkFFTLaunchParams (0 - off, 1 - on). Default 0 //optional: (default 0 if not stated otherwise) @@ -324,7 +324,7 @@ typedef struct { MTL::CommandBuffer* commandBuffer;//Filled at app execution MTL::ComputeCommandEncoder* commandEncoder;//Filled at app execution #endif - pfUINT indirectDispatch; //0 for direct, 1 for fwd indirect, 2 for inv indirec, 3 for both + pfUINT indirectDispatch; //0 for direct dispatch, 1 for fwd indirect, 2 for inv indirect, 3 for both indirect } VkFFTConfiguration;//parameters specified at plan creation typedef struct { From e2bd39d96314bc1f352dd33629f6ae4034225cad Mon Sep 17 00:00:00 2001 From: Dirk van den Bekerom Date: Sat, 29 Mar 2025 19:06:27 +0100 Subject: [PATCH 16/18] Update vkFFT_Structs.h --- vkFFT/vkFFT/vkFFT_Structs/vkFFT_Structs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vkFFT/vkFFT/vkFFT_Structs/vkFFT_Structs.h b/vkFFT/vkFFT/vkFFT_Structs/vkFFT_Structs.h index a800403..57ebddc 100644 --- a/vkFFT/vkFFT/vkFFT_Structs/vkFFT_Structs.h +++ b/vkFFT/vkFFT/vkFFT_Structs/vkFFT_Structs.h @@ -151,7 +151,7 @@ typedef struct { VkBuffer* outputBuffer;//pointer to array of output buffers (or one buffer) used for write data to if isOutputFormatted is enabled VkBuffer* kernel;//pointer to array of kernel buffers (or one buffer) used for read kernel data from if performConvolution is enabled VkBuffer indirectBuffer; //buffer that contains workgroupsizes for indirect dispatch. Size hould be at least 4 x 4 bytes x the number of dispatches. - unsigned int* indirectHostPointer; // pointer to the array with the indirect workgroup sizes on the host side. During dispatch this array will be filled by VkFFT, which can later be updated by user. format us uint[4] = {x_size, y_size, z_size, id}, // with id the axis that contains the batch number (0=x, 1=y, 2=z), plus 10000 if the dispatch concerns an inverse FFT. + unsigned int* indirectHostPointer; // pointer to the array with the indirect workgroup sizes on the host side. During dispatch this array will be filled by VkFFT, which can later be updated by user. format us uint[4] = {x_size, y_size, z_size, id}, with id the axis that contains the batch number (0=x, 1=y, 2=z), plus 10000 if the dispatch concerns an inverse FFT. #elif(VKFFT_BACKEND==1) void** buffer;//pointer to device buffer used for computations void** tempBuffer;//needed if reorderFourStep is enabled to transpose the array. Same size as buffer. Default 0. Setting to non zero value enables manual user allocation From 1d343f9aa8f0760335455d8d9a617e2fd469e052 Mon Sep 17 00:00:00 2001 From: Dirk van den Bekerom Date: Sat, 29 Mar 2025 20:18:38 +0100 Subject: [PATCH 17/18] Update vkFFT_UpdateBuffers.h --- .../vkFFT_API_handles/vkFFT_UpdateBuffers.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_UpdateBuffers.h b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_UpdateBuffers.h index 07f4755..f005340 100644 --- a/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_UpdateBuffers.h +++ b/vkFFT/vkFFT/vkFFT_PlanManagement/vkFFT_API_handles/vkFFT_UpdateBuffers.h @@ -927,8 +927,7 @@ static inline VkFFTResult VkFFTUpdateBufferSet(VkFFTApplication* app, VkFFTPlan* } #if(VKFFT_BACKEND==0) if (axis->specializationConstants.performBufferSetUpdate) { - //descriptorBufferInfo.range = (axis->specializationConstants.inputBufferBlockSize); - descriptorBufferInfo.range = app->configuration.bufferSize[0]; + descriptorBufferInfo.range = (axis->specializationConstants.inputBufferBlockSize); descriptorBufferInfo.offset = offset * (axis->specializationConstants.inputBufferBlockSize); } #endif @@ -1106,8 +1105,7 @@ static inline VkFFTResult VkFFTUpdateBufferSet(VkFFTApplication* app, VkFFTPlan* } #if(VKFFT_BACKEND==0) if (axis->specializationConstants.performBufferSetUpdate) { - //descriptorBufferInfo.range = (axis->specializationConstants.outputBufferBlockSize); - descriptorBufferInfo.range = app->configuration.bufferSize[0]; + descriptorBufferInfo.range = (axis->specializationConstants.outputBufferBlockSize); descriptorBufferInfo.offset = offset * (axis->specializationConstants.outputBufferBlockSize); } #endif From 2c8d7a2ca1f205b4f872d0ba3f001678fec3e2be Mon Sep 17 00:00:00 2001 From: Dirk van den Bekerom Date: Sat, 29 Mar 2025 20:18:42 +0100 Subject: [PATCH 18/18] Update vkFFT_InitializeApp.h --- vkFFT/vkFFT/vkFFT_AppManagement/vkFFT_InitializeApp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vkFFT/vkFFT/vkFFT_AppManagement/vkFFT_InitializeApp.h b/vkFFT/vkFFT/vkFFT_AppManagement/vkFFT_InitializeApp.h index 8d0250a..4aa432b 100644 --- a/vkFFT/vkFFT/vkFFT_AppManagement/vkFFT_InitializeApp.h +++ b/vkFFT/vkFFT/vkFFT_AppManagement/vkFFT_InitializeApp.h @@ -969,7 +969,7 @@ static inline VkFFTResult setConfigurationVkFFT(VkFFTApplication* app, VkFFTConf if (inputLaunchConfiguration.indirectDispatch != 0) { app->configuration.indirectDispatch = inputLaunchConfiguration.indirectDispatch; app->configuration.indirectBuffer = inputLaunchConfiguration.indirectBuffer; - if (indirectBufferOffset){ + if (app->configuration.indirectBufferOffset){ app->configuration.indirectBufferOffset = inputLaunchConfiguration.indirectBufferOffset; } else{