@@ -1002,6 +1002,7 @@ inline void wait(Context &ctx, std::future<void> &future) {
10021002inline void toCPU (Context &ctx, Tensor &tensor, void *data, size_t bufferSize,
10031003 CopyData &op) {
10041004 wgpuQueueSubmit (ctx.queue , 1 , &op.commandBuffer );
1005+ wgpuCommandBufferRelease (op.commandBuffer );
10051006 CallbackData callbackData = {op.readbackBuffer , bufferSize, data, &op.promise ,
10061007 &op.future };
10071008 wgpuQueueOnSubmittedWorkDone (
@@ -1062,9 +1063,14 @@ inline void toCPU(Context &ctx, Tensor &tensor, void *data, size_t bufferSize) {
10621063 wgpuCommandEncoderCopyBufferToBuffer (commandEncoder, tensor.data .buffer , 0 ,
10631064 op.readbackBuffer , 0 , bufferSize);
10641065 op.commandBuffer = wgpuCommandEncoderFinish (commandEncoder, nullptr );
1066+ wgpuComputePassEncoderRelease (computePassEncoder);
1067+ wgpuCommandEncoderRelease (commandEncoder);
10651068 check (op.commandBuffer , " Create command buffer" , __FILE__, __LINE__);
10661069 }
10671070 toCPU (ctx, tensor, data, bufferSize, op);
1071+ if (op.readbackBuffer ) {
1072+ wgpuBufferRelease (op.readbackBuffer );
1073+ }
10681074}
10691075
10701076/* *
@@ -1103,9 +1109,12 @@ inline void toCPU(Context &ctx, WGPUBuffer buffer, void *data,
11031109 wgpuCommandEncoderCopyBufferToBuffer (commandEncoder, buffer, 0 ,
11041110 op.readbackBuffer , 0 , bufferSize);
11051111 op.commandBuffer = wgpuCommandEncoderFinish (commandEncoder, nullptr );
1112+ wgpuComputePassEncoderRelease (computePassEncoder);
1113+ wgpuCommandEncoderRelease (commandEncoder);
11061114 check (op.commandBuffer , " Create command buffer" , __FILE__, __LINE__);
11071115 }
11081116 wgpuQueueSubmit (ctx.queue , 1 , &op.commandBuffer );
1117+ wgpuCommandBufferRelease (op.commandBuffer );
11091118 CallbackData callbackData = {op.readbackBuffer , bufferSize, data, &op.promise ,
11101119 &op.future };
11111120 wgpuQueueOnSubmittedWorkDone (
@@ -1131,6 +1140,9 @@ inline void toCPU(Context &ctx, WGPUBuffer buffer, void *data,
11311140 },
11321141 &callbackData);
11331142 wait (ctx, op.future );
1143+ if (op.readbackBuffer ) {
1144+ wgpuBufferRelease (op.readbackBuffer );
1145+ }
11341146}
11351147
11361148
@@ -1217,6 +1229,9 @@ inline void resetCommandBuffer(WGPUDevice &device, Kernel &op) {
12171229 op->totalWorkgroups [2 ]);
12181230 wgpuComputePassEncoderEnd (computePassEncoder);
12191231 op->commandBuffer = wgpuCommandEncoderFinish (commandEncoder, nullptr );
1232+ wgpuComputePassEncoderRelease (computePassEncoder);
1233+ wgpuCommandEncoderRelease (commandEncoder);
1234+
12201235 op->used = false ;
12211236 }
12221237}
@@ -1507,6 +1522,7 @@ inline void dispatchKernel(Context &ctx, Kernel &kernel,
15071522 resetCommandBuffer (ctx.device , kernel);
15081523 }
15091524 wgpuQueueSubmit (ctx.queue , 1 , &kernel->commandBuffer );
1525+ wgpuCommandBufferRelease (kernel->commandBuffer );
15101526 kernel->used = true ;
15111527 wgpuQueueOnSubmittedWorkDone (
15121528 ctx.queue ,
0 commit comments