diff --git a/apps/typegpu-docs/src/content/examples/tests/shader-performance/index.html b/apps/typegpu-docs/src/content/examples/tests/shader-performance/index.html new file mode 100644 index 000000000..7c89b545c --- /dev/null +++ b/apps/typegpu-docs/src/content/examples/tests/shader-performance/index.html @@ -0,0 +1 @@ +
diff --git a/apps/typegpu-docs/src/content/examples/tests/shader-performance/index.ts b/apps/typegpu-docs/src/content/examples/tests/shader-performance/index.ts new file mode 100644 index 000000000..360c10824 --- /dev/null +++ b/apps/typegpu-docs/src/content/examples/tests/shader-performance/index.ts @@ -0,0 +1,457 @@ +import tgpu, { type TgpuComputeFn } from 'typegpu'; +import * as d from 'typegpu/data'; + +const BUFFER_SIZE = 2048; + +const benchmarkLayout = tgpu.bindGroupLayout({ + buffer: { storage: d.arrayOf(d.u32, BUFFER_SIZE), access: 'mutable' }, +}); + +// Compute functions +const basicInlined = tgpu['~unstable'].computeFn({ + workgroupSize: [1], + in: { + gid: d.builtin.globalInvocationId, + }, +})`{ + let bufferValue = targetBuffer[in.gid.x]; + var x = 1 + in.gid.x + bufferValue; + var y = 2 + 3 + x; + var z = y + bufferValue; + var w = z + bufferValue; + targetBuffer[in.gid.x] = x + y + z + w; +}`.$uses({ + targetBuffer: benchmarkLayout.bound.buffer, +}); + +const add = tgpu.fn([d.u32, d.u32], d.u32)((a, b) => a + b); + +const basic = tgpu['~unstable'].computeFn({ + workgroupSize: [1], + in: { + gid: d.builtin.globalInvocationId, + }, +})`{ + let bufferValue = targetBuffer[in.gid.x]; + var x = add(add(in.gid.x, 1), bufferValue); + var y = add(add(2, 3), x); + var z = add(y, bufferValue); + var w = add(z, bufferValue); + targetBuffer[in.gid.x] = add(add(add(x, y), z), w); +}`.$uses({ + targetBuffer: benchmarkLayout.bound.buffer, + add, +}); + +const complexInlined = tgpu['~unstable'].computeFn({ + workgroupSize: [1], + in: { + gid: d.builtin.globalInvocationId, + }, +})`{ + let bufferValue = targetBuffer[in.gid.x]; + var a = ((((in.gid.x + bufferValue) + (bufferValue * 2)) * 3 + (in.gid.x + bufferValue) * 2 + 7) * ((bufferValue * 2) * 3 + (in.gid.x + bufferValue) + 7) + ((((in.gid.x + bufferValue) + (bufferValue * 2)) * 3 + (in.gid.x + bufferValue) * 2 + 7) * 3 + ((bufferValue * 2) * 3 + (in.gid.x + bufferValue) + 7) + 7)) * 3 + (((in.gid.x + bufferValue) + (bufferValue * 2)) * 3 + (in.gid.x + bufferValue) * 2 + 7) * ((bufferValue * 2) * 3 + (in.gid.x + bufferValue) + 7) * 2 + 7; + var b = a + ((bufferValue * in.gid.x + bufferValue * 3 + in.gid.x + 7) * 3 + bufferValue * in.gid.x * 2 + 7); + var c = b * ((in.gid.x + 5) * 3 + in.gid.x * 2 + 7); + var d = c + (a * 3 + bufferValue + 7); + targetBuffer[in.gid.x] = d; +}`.$uses({ + targetBuffer: benchmarkLayout.bound.buffer, +}); + +const multiply = tgpu.fn([d.u32, d.u32], d.u32)((a, b) => a * b); + +const level4Fn = tgpu.fn([d.u32, d.u32], d.u32)((a, b) => { + return add(multiply(a, 3), add(b, 7)); +}); + +const level3Fn = tgpu.fn([d.u32, d.u32], d.u32)((a, b) => { + return level4Fn(add(a, b), multiply(a, 2)); +}); + +const level2Fn = tgpu.fn([d.u32, d.u32], d.u32)((a, b) => { + return level3Fn(multiply(a, b), level4Fn(a, b)); +}); + +const level1Fn = tgpu.fn([d.u32, d.u32], d.u32)((a, b) => { + return level2Fn(level3Fn(a, b), level4Fn(b, a)); +}); + +const complex = tgpu['~unstable'].computeFn({ + workgroupSize: [1], + in: { + gid: d.builtin.globalInvocationId, + }, +})`{ + let bufferValue = targetBuffer[in.gid.x]; + var a = level1Fn(add(in.gid.x, bufferValue), multiply(bufferValue, 2)); + var b = add(a, level2Fn(bufferValue, in.gid.x)); + var c = multiply(b, level3Fn(in.gid.x, 5)); + var d = add(c, level4Fn(a, bufferValue)); + targetBuffer[in.gid.x] = d; +}`.$uses({ + targetBuffer: benchmarkLayout.bound.buffer, + add, + multiply, + level1Fn, + level2Fn, + level3Fn, + level4Fn, +}); + +const processElement = tgpu.fn([d.u32, d.u32], d.u32)((value, index) => { + return add(multiply(value, 2), index); +}); + +const conditionalProcess = tgpu.fn([d.u32, d.u32], d.u32)( + (value, threshold) => { + if (value > threshold) { + return multiply(value, 3); + } + return add(value, 1); + }, +); + +const branchingOperations = tgpu['~unstable'].computeFn({ + workgroupSize: [1], + in: { + gid: d.builtin.globalInvocationId, + }, +})`{ + let bufferValue = targetBuffer[in.gid.x]; + var result = bufferValue; + + for (var i = 0u; i < 10u; i++) { + result = processElement(result, i); + + if (result > 100u) { + result = conditionalProcess(result, 50u); + } else { + result = add(result, multiply(i, 2)); + } + + for (var j = 0u; j < 5u; j++) { + if (j % 2u == 0u) { + result = multiply(result, 2); + } else { + result = add(result, j); + } + } + } + + targetBuffer[in.gid.x] = result; +}`.$uses({ + targetBuffer: benchmarkLayout.bound.buffer, + add, + multiply, + processElement, + conditionalProcess, +}); + +const branchingOperationsInlined = tgpu['~unstable'].computeFn({ + workgroupSize: [1], + in: { + gid: d.builtin.globalInvocationId, + }, +})`{ + let bufferValue = targetBuffer[in.gid.x]; + var result = bufferValue; + + for (var i = 0u; i < 10u; i++) { + result = (result * 2) + i; + + if (result > 100u) { + if (result > 50u) { + result = result * 3; + } else { + result = result + 1; + } + } else { + result = result + (i * 2); + } + + for (var j = 0u; j < 5u; j++) { + if (j % 2u == 0u) { + result = result * 2; + } else { + result = result + j; + } + } + } + + targetBuffer[in.gid.x] = result; +}`.$uses({ + targetBuffer: benchmarkLayout.bound.buffer, +}); + +const benchmarkPairs = { + 'Basic Operations': { + 'Function Calls': { + name: 'With Function Calls', + entrypoint: basic, + }, + 'Inlined': { + name: 'Inlined Operations', + entrypoint: basicInlined, + }, + }, + 'Complex Operations': { + 'Function Calls': { + name: 'With Function Calls', + entrypoint: complex, + }, + 'Inlined': { + name: 'Inlined Operations', + entrypoint: complexInlined, + }, + }, + 'Branching Operations': { + 'Function Calls': { + name: 'With Function Calls', + entrypoint: branchingOperations, + }, + 'Inlined': { + name: 'Inlined Operations', + entrypoint: branchingOperationsInlined, + }, + }, +}; + +async function createBenchmarkSetup( + entrypoint: TgpuComputeFn, + initialData?: number[], +) { + const root = await tgpu.init({ + device: { + requiredFeatures: ['timestamp-query'], + }, + }); + + const targetBuffer = root.createBuffer(d.arrayOf(d.u32, BUFFER_SIZE)).$usage( + 'storage', + ); + if (initialData) { + targetBuffer.write(initialData); + } + + const bindGroup = root.createBindGroup(benchmarkLayout, { + buffer: targetBuffer, + }); + const pipeline = root['~unstable'].withCompute(entrypoint).createPipeline() + .with(benchmarkLayout, bindGroup); + + const querySet = root.createQuerySet('timestamp', 2); + + return { root, pipeline, querySet }; +} + +async function runSingleMeasurement( + setup: Awaited