From f2ff8c03709194f89eda27975043db2b08f50b0a Mon Sep 17 00:00:00 2001
From: Konrad Reczko <reczkok@gmail.com>
Date: Thu, 11 Sep 2025 15:30:56 +0200
Subject: [PATCH 1/4] add some benchmarks

---
 .../tests/shader-performance/index.html       |   1 +
 .../tests/shader-performance/index.ts         | 237 ++++++++++++++++++
 .../tests/shader-performance/meta.json        |   5 +
 3 files changed, 243 insertions(+)
 create mode 100644 apps/typegpu-docs/src/content/examples/tests/shader-performance/index.html
 create mode 100644 apps/typegpu-docs/src/content/examples/tests/shader-performance/index.ts
 create mode 100644 apps/typegpu-docs/src/content/examples/tests/shader-performance/meta.json
diff --git a/apps/typegpu-docs/src/content/examples/tests/shader-performance/index.html b/apps/typegpu-docs/src/content/examples/tests/shader-performance/index.html
new file mode 100644
index 000000000..7c89b545c
--- /dev/null
+++ b/apps/typegpu-docs/src/content/examples/tests/shader-performance/index.html
@@ -0,0 +1 @@
+<div></div>
diff --git a/apps/typegpu-docs/src/content/examples/tests/shader-performance/index.ts b/apps/typegpu-docs/src/content/examples/tests/shader-performance/index.ts
new file mode 100644
index 000000000..4d165a2ad
--- /dev/null
+++ b/apps/typegpu-docs/src/content/examples/tests/shader-performance/index.ts
@@ -0,0 +1,237 @@
+import tgpu, { type TgpuComputeFn } from 'typegpu';
+import * as d from 'typegpu/data';
+
+const BUFFER_SIZE = 2048;
+
+const benchmarkLayout = tgpu.bindGroupLayout({
+  buffer: { storage: d.arrayOf(d.u32, BUFFER_SIZE), access: 'mutable' },
+});
+
+// Compute functions
+const basicCompute = tgpu['~unstable'].computeFn({
+  workgroupSize: [1],
+  in: {
+    gid: d.builtin.globalInvocationId,
+  },
+})`{
+  let bufferValue = targetBuffer[in.gid.x];
+  var x = 1 + in.gid.x + bufferValue;
+  var y = 2 + 3 + x;
+  var z = y + bufferValue;
+  var w = z + bufferValue;
+  targetBuffer[in.gid.x] = x + y + z + w;
+}`.$uses({
+  targetBuffer: benchmarkLayout.bound.buffer,
+});
+
+const add = tgpu.fn([d.u32, d.u32], d.u32)((a, b) => a + b);
+
+const functionCallCompute = tgpu['~unstable'].computeFn({
+  workgroupSize: [1],
+  in: {
+    gid: d.builtin.globalInvocationId,
+  },
+})`{
+  let bufferValue = targetBuffer[in.gid.x];
+  var x = add(add(in.gid.x, 1), bufferValue);
+  var y = add(add(2, 3), x);
+  var z = add(y, bufferValue);
+  var w = add(z, bufferValue);
+  targetBuffer[in.gid.x] = add(add(add(x, y), z), w);
+}`.$uses({
+  targetBuffer: benchmarkLayout.bound.buffer,
+  add,
+});
+
+// Inlined complex math operations
+const inlinedMathCompute = tgpu['~unstable'].computeFn({
+  workgroupSize: [1],
+  in: {
+    gid: d.builtin.globalInvocationId,
+  },
+})`{
+  let bufferValue = targetBuffer[in.gid.x];
+  var a = (in.gid.x * 3 + 7) * bufferValue + 15;
+  var b = ((a * 2 + 9) * (bufferValue + 4)) - (a * 3);
+  var c = (b * b + a * a) / (a + 1);
+  var d = ((c + b) * (a - b + 1)) + ((c * 2) * (a + 5));
+  var e = (d * d + c * c + b * b + a * a) / (bufferValue + 1);
+  var f = e * (d + c + b + a) + (e * e) / (d + 1);
+  targetBuffer[in.gid.x] = f;
+}`.$uses({
+  targetBuffer: benchmarkLayout.bound.buffer,
+});
+
+// Nested function calls (4 levels deep)
+const multiply = tgpu.fn([d.u32, d.u32], d.u32)((a, b) => a * b);
+
+const level4Fn = tgpu.fn([d.u32, d.u32], d.u32)((a, b) => {
+  return add(multiply(a, 3), add(b, 7));
+});
+
+const level3Fn = tgpu.fn([d.u32, d.u32], d.u32)((a, b) => {
+  return level4Fn(add(a, b), multiply(a, 2));
+});
+
+const level2Fn = tgpu.fn([d.u32, d.u32], d.u32)((a, b) => {
+  return level3Fn(multiply(a, b), level4Fn(a, b));
+});
+
+const level1Fn = tgpu.fn([d.u32, d.u32], d.u32)((a, b) => {
+  return level2Fn(level3Fn(a, b), level4Fn(b, a));
+});
+
+const nestedFunctionCompute = tgpu['~unstable'].computeFn({
+  workgroupSize: [1],
+  in: {
+    gid: d.builtin.globalInvocationId,
+  },
+})`{
+  let bufferValue = targetBuffer[in.gid.x];
+  var result = level1Fn(add(in.gid.x, bufferValue), multiply(bufferValue, 2));
+  result = add(result, level2Fn(bufferValue, in.gid.x));
+  result = multiply(result, level3Fn(in.gid.x, 5));
+  result = add(result, level4Fn(result, bufferValue));
+  targetBuffer[in.gid.x] = result;
+}`.$uses({
+  targetBuffer: benchmarkLayout.bound.buffer,
+  add,
+  multiply,
+  level1Fn,
+  level2Fn,
+  level3Fn,
+  level4Fn,
+});
+
+// Pipeline configurations
+const pipelines = {
+  basic: {
+    name: 'Basic Arithmetic',
+    entrypoint: basicCompute,
+  },
+  functionCall: {
+    name: 'Function Call',
+    entrypoint: functionCallCompute,
+  },
+  nestedFunction: {
+    name: 'Nested Function Calls',
+    entrypoint: nestedFunctionCompute,
+  },
+  inlinedMath: {
+    name: 'Inlined Complex Math',
+    entrypoint: inlinedMathCompute,
+  },
+} as const;
+
+async function runBenchmark(
+  entrypoint: TgpuComputeFn,
+  name: string,
+  options?: {
+    iterations?: number;
+    times?: number;
+    warmup?: boolean;
+    timeLimitMs?: number;
+  },
+) {
+  const root = await tgpu.init({
+    device: {
+      requiredFeatures: ['timestamp-query'],
+    },
+  });
+  const targetBuffer = root.createBuffer(d.arrayOf(d.u32, BUFFER_SIZE)).$usage(
+    'storage',
+  );
+  const bindGroup = root.createBindGroup(benchmarkLayout, {
+    buffer: targetBuffer,
+  });
+  const pipeline = root['~unstable'].withCompute(entrypoint).createPipeline()
+    .with(benchmarkLayout, bindGroup);
+
+  const {
+    iterations = 1_000,
+    times = 100_000,
+    warmup = true,
+    timeLimitMs = 10_000,
+  } = options || {};
+
+  console.groupCollapsed(`Pipeline Details: ${name}`);
+  console.log(tgpu.resolve({ externals: { pipeline } }));
+  console.groupEnd();
+
+  if (warmup) {
+    for (let i = 0; i < Math.floor(times / 2); i++) {
+      pipeline.dispatchWorkgroups(BUFFER_SIZE);
+    }
+  }
+
+  const querySet = root.createQuerySet('timestamp', 2);
+  let runningAverage = 0;
+  const startTime = Date.now();
+
+  let actualTimes = 0;
+  for (let i = 0; i < times && (Date.now() - startTime) < timeLimitMs; i++) {
+    pipeline.withTimestampWrites({
+      querySet,
+      beginningOfPassWriteIndex: 0,
+    }).dispatchWorkgroups(BUFFER_SIZE);
+
+    for (let j = 1; j < iterations - 1; j++) {
+      pipeline.dispatchWorkgroups(BUFFER_SIZE);
+    }
+
+    pipeline.withTimestampWrites({
+      querySet,
+      endOfPassWriteIndex: 1,
+    }).dispatchWorkgroups(BUFFER_SIZE);
+
+    root['~unstable'].flush();
+
+    querySet.resolve();
+    const [start, end] = await querySet.read();
+    const currentTime = Number(end - start);
+
+    actualTimes++;
+    runningAverage = runningAverage +
+      (currentTime - runningAverage) / actualTimes;
+  }
+
+  const avgTimeMs = runningAverage / 1_000_000;
+
+  root.destroy();
+
+  return {
+    averageTimeNs: runningAverage,
+    averageTimeMs: avgTimeMs,
+    runs: actualTimes,
+  };
+}
+
+async function runAllBenchmarks(options?: Parameters<typeof runBenchmark>[2]) {
+  console.log(`Running ${Object.keys(pipelines).length} benchmarks...`);
+
+  const results: Record<string, Awaited<ReturnType<typeof runBenchmark>>> = {};
+
+  for (const [key, { name, entrypoint }] of Object.entries(pipelines)) {
+    results[key] = await runBenchmark(entrypoint, name, options);
+  }
+
+  return results;
+}
+
+// Run benchmarks
+const results = await runAllBenchmarks();
+
+console.log('\nBenchmark Results:');
+console.table(Object.fromEntries(
+  Object.entries(results).map(([key, result]) => [
+    pipelines[key as keyof typeof pipelines].name,
+    {
+      'Avg Time (ms)': result.averageTimeMs.toFixed(3),
+      'Runs': result.runs,
+      'Avg Time (ns)': result.averageTimeNs.toFixed(0),
+    },
+  ]),
+));
+
+export function onCleanup() {
+}
diff --git a/apps/typegpu-docs/src/content/examples/tests/shader-performance/meta.json b/apps/typegpu-docs/src/content/examples/tests/shader-performance/meta.json
new file mode 100644
index 000000000..cc6fdb5bd
--- /dev/null
+++ b/apps/typegpu-docs/src/content/examples/tests/shader-performance/meta.json
@@ -0,0 +1,5 @@
+{
+  "title": "Shader performance",
+  "category": "tests",
+  "tags": ["experimental"]
+}

From a525bad84690e3ace24a7cf041890177924deb73 Mon Sep 17 00:00:00 2001
From: Konrad Reczko <reczkok@gmail.com>
Date: Thu, 11 Sep 2025 15:58:18 +0200
Subject: [PATCH 2/4] better inline

---
 .../tests/shader-performance/index.ts         | 25 ++++++++-----------
 1 file changed, 11 insertions(+), 14 deletions(-)

diff --git a/apps/typegpu-docs/src/content/examples/tests/shader-performance/index.ts b/apps/typegpu-docs/src/content/examples/tests/shader-performance/index.ts
index 4d165a2ad..4b47d799b 100644
--- a/apps/typegpu-docs/src/content/examples/tests/shader-performance/index.ts
+++ b/apps/typegpu-docs/src/content/examples/tests/shader-performance/index.ts
@@ -43,7 +43,6 @@ const functionCallCompute = tgpu['~unstable'].computeFn({
   add,
 });
 
-// Inlined complex math operations
 const inlinedMathCompute = tgpu['~unstable'].computeFn({
   workgroupSize: [1],
   in: {
@@ -51,13 +50,11 @@ const inlinedMathCompute = tgpu['~unstable'].computeFn({
   },
 })`{
   let bufferValue = targetBuffer[in.gid.x];
-  var a = (in.gid.x * 3 + 7) * bufferValue + 15;
-  var b = ((a * 2 + 9) * (bufferValue + 4)) - (a * 3);
-  var c = (b * b + a * a) / (a + 1);
-  var d = ((c + b) * (a - b + 1)) + ((c * 2) * (a + 5));
-  var e = (d * d + c * c + b * b + a * a) / (bufferValue + 1);
-  var f = e * (d + c + b + a) + (e * e) / (d + 1);
-  targetBuffer[in.gid.x] = f;
+  var a = ((((in.gid.x + bufferValue) + (bufferValue * 2)) * 3 + (in.gid.x + bufferValue) * 2 + 7) * ((bufferValue * 2) * 3 + (in.gid.x + bufferValue) + 7) + ((((in.gid.x + bufferValue) + (bufferValue * 2)) * 3 + (in.gid.x + bufferValue) * 2 + 7) * 3 + ((bufferValue * 2) * 3 + (in.gid.x + bufferValue) + 7) + 7)) * 3 + (((in.gid.x + bufferValue) + (bufferValue * 2)) * 3 + (in.gid.x + bufferValue) * 2 + 7) * ((bufferValue * 2) * 3 + (in.gid.x + bufferValue) + 7) * 2 + 7;
+  var b = a + ((bufferValue * in.gid.x + bufferValue * 3 + in.gid.x + 7) * 3 + bufferValue * in.gid.x * 2 + 7);
+  var c = b * ((in.gid.x + 5) * 3 + in.gid.x * 2 + 7);
+  var d = c + (a * 3 + bufferValue + 7);
+  targetBuffer[in.gid.x] = d;
 }`.$uses({
   targetBuffer: benchmarkLayout.bound.buffer,
 });
@@ -88,11 +85,11 @@ const nestedFunctionCompute = tgpu['~unstable'].computeFn({
   },
 })`{
   let bufferValue = targetBuffer[in.gid.x];
-  var result = level1Fn(add(in.gid.x, bufferValue), multiply(bufferValue, 2));
-  result = add(result, level2Fn(bufferValue, in.gid.x));
-  result = multiply(result, level3Fn(in.gid.x, 5));
-  result = add(result, level4Fn(result, bufferValue));
-  targetBuffer[in.gid.x] = result;
+  var a = level1Fn(add(in.gid.x, bufferValue), multiply(bufferValue, 2));
+  var b = add(a, level2Fn(bufferValue, in.gid.x));
+  var c = multiply(b, level3Fn(in.gid.x, 5));
+  var d = add(c, level4Fn(a, bufferValue));
+  targetBuffer[in.gid.x] = d;
 }`.$uses({
   targetBuffer: benchmarkLayout.bound.buffer,
   add,
@@ -118,7 +115,7 @@ const pipelines = {
     entrypoint: nestedFunctionCompute,
   },
   inlinedMath: {
-    name: 'Inlined Complex Math',
+    name: 'Inlined Nested Function Calls',
     entrypoint: inlinedMathCompute,
   },
 } as const;

From 5000b587f7fbbb6b80c51d54ae8983ad4e5e37fa Mon Sep 17 00:00:00 2001
From: Konrad Reczko <reczkok@gmail.com>
Date: Thu, 11 Sep 2025 16:18:05 +0200
Subject: [PATCH 3/4] better method

---
 .../tests/shader-performance/index.ts         | 300 ++++++++++++------
 1 file changed, 208 insertions(+), 92 deletions(-)

diff --git a/apps/typegpu-docs/src/content/examples/tests/shader-performance/index.ts b/apps/typegpu-docs/src/content/examples/tests/shader-performance/index.ts
index 4b47d799b..fe7e63387 100644
--- a/apps/typegpu-docs/src/content/examples/tests/shader-performance/index.ts
+++ b/apps/typegpu-docs/src/content/examples/tests/shader-performance/index.ts
@@ -8,7 +8,7 @@ const benchmarkLayout = tgpu.bindGroupLayout({
 });
 
 // Compute functions
-const basicCompute = tgpu['~unstable'].computeFn({
+const basicInlined = tgpu['~unstable'].computeFn({
   workgroupSize: [1],
   in: {
     gid: d.builtin.globalInvocationId,
@@ -26,7 +26,7 @@ const basicCompute = tgpu['~unstable'].computeFn({
 
 const add = tgpu.fn([d.u32, d.u32], d.u32)((a, b) => a + b);
 
-const functionCallCompute = tgpu['~unstable'].computeFn({
+const basic = tgpu['~unstable'].computeFn({
   workgroupSize: [1],
   in: {
     gid: d.builtin.globalInvocationId,
@@ -43,7 +43,7 @@ const functionCallCompute = tgpu['~unstable'].computeFn({
   add,
 });
 
-const inlinedMathCompute = tgpu['~unstable'].computeFn({
+const complexInlined = tgpu['~unstable'].computeFn({
   workgroupSize: [1],
   in: {
     gid: d.builtin.globalInvocationId,
@@ -59,7 +59,6 @@ const inlinedMathCompute = tgpu['~unstable'].computeFn({
   targetBuffer: benchmarkLayout.bound.buffer,
 });
 
-// Nested function calls (4 levels deep)
 const multiply = tgpu.fn([d.u32, d.u32], d.u32)((a, b) => a * b);
 
 const level4Fn = tgpu.fn([d.u32, d.u32], d.u32)((a, b) => {
@@ -78,7 +77,7 @@ const level1Fn = tgpu.fn([d.u32, d.u32], d.u32)((a, b) => {
   return level2Fn(level3Fn(a, b), level4Fn(b, a));
 });
 
-const nestedFunctionCompute = tgpu['~unstable'].computeFn({
+const complex = tgpu['~unstable'].computeFn({
   workgroupSize: [1],
   in: {
     gid: d.builtin.globalInvocationId,
@@ -100,41 +99,36 @@ const nestedFunctionCompute = tgpu['~unstable'].computeFn({
   level4Fn,
 });
 
-// Pipeline configurations
-const pipelines = {
-  basic: {
-    name: 'Basic Arithmetic',
-    entrypoint: basicCompute,
-  },
-  functionCall: {
-    name: 'Function Call',
-    entrypoint: functionCallCompute,
-  },
-  nestedFunction: {
-    name: 'Nested Function Calls',
-    entrypoint: nestedFunctionCompute,
-  },
-  inlinedMath: {
-    name: 'Inlined Nested Function Calls',
-    entrypoint: inlinedMathCompute,
+const benchmarkPairs = {
+  'Basic Operations': {
+    'Function Calls': {
+      name: 'With Function Calls',
+      entrypoint: basic,
+    },
+    'Inlined': {
+      name: 'Inlined Operations',
+      entrypoint: basicInlined,
+    },
   },
-} as const;
-
-async function runBenchmark(
-  entrypoint: TgpuComputeFn,
-  name: string,
-  options?: {
-    iterations?: number;
-    times?: number;
-    warmup?: boolean;
-    timeLimitMs?: number;
+  'Complex Operations': {
+    'Function Calls': {
+      name: 'With Function Calls',
+      entrypoint: complex,
+    },
+    'Inlined': {
+      name: 'Inlined Operations',
+      entrypoint: complexInlined,
+    },
   },
-) {
+};
+
+async function createBenchmarkSetup(entrypoint: TgpuComputeFn) {
   const root = await tgpu.init({
     device: {
       requiredFeatures: ['timestamp-query'],
     },
   });
+
   const targetBuffer = root.createBuffer(d.arrayOf(d.u32, BUFFER_SIZE)).$usage(
     'storage',
   );
@@ -144,6 +138,52 @@ async function runBenchmark(
   const pipeline = root['~unstable'].withCompute(entrypoint).createPipeline()
     .with(benchmarkLayout, bindGroup);
 
+  const querySet = root.createQuerySet('timestamp', 2);
+
+  return { root, pipeline, querySet };
+}
+
+async function runSingleMeasurement(
+  setup: Awaited<ReturnType<typeof createBenchmarkSetup>>,
+  iterations = 1_000,
+) {
+  const { root, pipeline, querySet } = setup;
+
+  pipeline.withTimestampWrites({
+    querySet,
+    beginningOfPassWriteIndex: 0,
+  }).dispatchWorkgroups(BUFFER_SIZE);
+
+  for (let j = 1; j < iterations - 1; j++) {
+    pipeline.dispatchWorkgroups(BUFFER_SIZE);
+  }
+
+  pipeline.withTimestampWrites({
+    querySet,
+    endOfPassWriteIndex: 1,
+  }).dispatchWorkgroups(BUFFER_SIZE);
+
+  root['~unstable'].flush();
+
+  querySet.resolve();
+  const [start, end] = await querySet.read();
+  return Number(end - start);
+}
+
+type BenchmarkOptions = {
+  iterations?: number;
+  times?: number;
+  warmup?: boolean;
+  timeLimitMs?: number;
+};
+
+type PipelineConfig = { name: string; entrypoint: TgpuComputeFn };
+
+async function runInterleavedBenchmarkPair(
+  pairName: string,
+  pipelineConfigs: Record<string, PipelineConfig>,
+  options?: BenchmarkOptions,
+) {
   const {
     iterations = 1_000,
     times = 100_000,
@@ -151,84 +191,160 @@ async function runBenchmark(
     timeLimitMs = 10_000,
   } = options || {};
 
-  console.groupCollapsed(`Pipeline Details: ${name}`);
-  console.log(tgpu.resolve({ externals: { pipeline } }));
-  console.groupEnd();
-
-  if (warmup) {
-    for (let i = 0; i < Math.floor(times / 2); i++) {
-      pipeline.dispatchWorkgroups(BUFFER_SIZE);
+  console.log(`\nSetting up benchmark pair: ${pairName}`);
+
+  const setups = {} as Record<
+    string,
+    Awaited<ReturnType<typeof createBenchmarkSetup>>
+  >;
+  const results = {} as Record<
+    string,
+    { measurements: number[]; runningAverage: number; runs: number }
+  >;
+
+  for (const [key, config] of Object.entries(pipelineConfigs)) {
+    console.groupCollapsed(`Pipeline Details: ${config.name}`);
+    console.log(tgpu.resolve({ externals: { pipeline: config.entrypoint } }));
+    console.groupEnd();
+
+    setups[key] = await createBenchmarkSetup(config.entrypoint);
+    results[key] = { measurements: [], runningAverage: 0, runs: 0 };
+
+    if (warmup) {
+      for (let i = 0; i < Math.floor(times / 10); i++) {
+        await runSingleMeasurement(setups[key], iterations);
+      }
     }
   }
 
-  const querySet = root.createQuerySet('timestamp', 2);
-  let runningAverage = 0;
-  const startTime = Date.now();
-
-  let actualTimes = 0;
-  for (let i = 0; i < times && (Date.now() - startTime) < timeLimitMs; i++) {
-    pipeline.withTimestampWrites({
-      querySet,
-      beginningOfPassWriteIndex: 0,
-    }).dispatchWorkgroups(BUFFER_SIZE);
-
-    for (let j = 1; j < iterations - 1; j++) {
-      pipeline.dispatchWorkgroups(BUFFER_SIZE);
-    }
+  const startTime = performance.now();
+  const pipelineKeys = Object.keys(pipelineConfigs);
 
-    pipeline.withTimestampWrites({
-      querySet,
-      endOfPassWriteIndex: 1,
-    }).dispatchWorkgroups(BUFFER_SIZE);
+  let totalRuns = 0;
+  for (
+    let i = 0;
+    i < times && (performance.now() - startTime) < timeLimitMs;
+    i++
+  ) {
+    for (const key of pipelineKeys) {
+      if ((performance.now() - startTime) >= timeLimitMs) break;
 
-    root['~unstable'].flush();
+      const measurement = await runSingleMeasurement(setups[key], iterations);
+      results[key].measurements.push(measurement);
+      results[key].runs++;
 
-    querySet.resolve();
-    const [start, end] = await querySet.read();
-    const currentTime = Number(end - start);
+      results[key].runningAverage = results[key].runningAverage +
+        (measurement - results[key].runningAverage) / results[key].runs;
 
-    actualTimes++;
-    runningAverage = runningAverage +
-      (currentTime - runningAverage) / actualTimes;
+      totalRuns++;
+    }
   }
 
-  const avgTimeMs = runningAverage / 1_000_000;
+  for (const setup of Object.values(setups)) {
+    setup.root.destroy();
+  }
 
-  root.destroy();
+  const finalResults = {} as Record<string, {
+    name: string;
+    averageTimeNs: number;
+    averageTimeMs: number;
+    runs: number;
+    minTimeMs: number;
+    maxTimeMs: number;
+    stdDevMs: number;
+  }>;
+
+  for (const [key, result] of Object.entries(results)) {
+    const avgNs = result.runningAverage;
+    const avgMs = avgNs / 1_000_000;
+    const measurementsMs = result.measurements.map((m) => m / 1_000_000);
+    const minMs = Math.min(...measurementsMs);
+    const maxMs = Math.max(...measurementsMs);
+    const variance = measurementsMs.reduce((acc, val) =>
+      acc + (val - avgMs) ** 2, 0) / measurementsMs.length;
+    const stdDevMs = Math.sqrt(variance);
+
+    finalResults[key] = {
+      name: pipelineConfigs[key].name,
+      averageTimeNs: avgNs,
+      averageTimeMs: avgMs,
+      runs: result.runs,
+      minTimeMs: minMs,
+      maxTimeMs: maxMs,
+      stdDevMs: stdDevMs,
+    };
+  }
 
-  return {
-    averageTimeNs: runningAverage,
-    averageTimeMs: avgTimeMs,
-    runs: actualTimes,
-  };
+  return { pairName, results: finalResults, totalRuns };
 }
 
-async function runAllBenchmarks(options?: Parameters<typeof runBenchmark>[2]) {
-  console.log(`Running ${Object.keys(pipelines).length} benchmarks...`);
+async function runAllBenchmarkPairs(options?: BenchmarkOptions) {
+  console.log(
+    `Running ${Object.keys(benchmarkPairs).length} benchmark pairs...`,
+  );
 
-  const results: Record<string, Awaited<ReturnType<typeof runBenchmark>>> = {};
+  const allResults = [];
 
-  for (const [key, { name, entrypoint }] of Object.entries(pipelines)) {
-    results[key] = await runBenchmark(entrypoint, name, options);
+  for (const [pairName, pipelineConfigs] of Object.entries(benchmarkPairs)) {
+    const pairResult = await runInterleavedBenchmarkPair(
+      pairName,
+      pipelineConfigs,
+      options,
+    );
+    allResults.push(pairResult);
   }
 
-  return results;
+  return allResults;
 }
 
-// Run benchmarks
-const results = await runAllBenchmarks();
-
-console.log('\nBenchmark Results:');
-console.table(Object.fromEntries(
-  Object.entries(results).map(([key, result]) => [
-    pipelines[key as keyof typeof pipelines].name,
-    {
-      'Avg Time (ms)': result.averageTimeMs.toFixed(3),
-      'Runs': result.runs,
-      'Avg Time (ns)': result.averageTimeNs.toFixed(0),
-    },
-  ]),
-));
+function displayResults(
+  allResults: Awaited<ReturnType<typeof runAllBenchmarkPairs>>,
+) {
+  console.log('\n=== Benchmark Results ===');
+
+  for (const { pairName, results, totalRuns } of allResults) {
+    console.log(`\n${pairName} (${totalRuns} total measurements)`);
+    console.log('-'.repeat(60));
+
+    const tableData = Object.fromEntries(
+      Object.entries(results).map(([key, result]) => [
+        result.name,
+        {
+          'Avg Time (ms)': result.averageTimeMs.toFixed(3),
+          'Min (ms)': result.minTimeMs.toFixed(3),
+          'Max (ms)': result.maxTimeMs.toFixed(3),
+          'Std Dev (ms)': result.stdDevMs.toFixed(3),
+          'Runs': result.runs,
+        },
+      ]),
+    );
+
+    console.table(tableData);
+
+    const resultEntries = Object.entries(results);
+    if (resultEntries.length === 2) {
+      const [first, second] = resultEntries;
+      const [, firstResult] = first;
+      const [, secondResult] = second;
+
+      const ratio = firstResult.averageTimeMs / secondResult.averageTimeMs;
+      const faster = ratio > 1 ? secondResult.name : firstResult.name;
+      const slower = ratio > 1 ? firstResult.name : secondResult.name;
+      const speedup = Math.abs(ratio - 1) * 100;
+
+      console.log(`${faster} is ${speedup.toFixed(1)}% faster than ${slower}`);
+    }
+  }
+}
+
+const allResults = await runAllBenchmarkPairs({
+  iterations: 1000,
+  times: 1000,
+  warmup: true,
+  timeLimitMs: 10_000,
+});
+
+displayResults(allResults);
 
 export function onCleanup() {
 }

From 409987d4da2e1f952ef11319a343008bf2642572 Mon Sep 17 00:00:00 2001
From: Konrad Reczko <reczkok@gmail.com>
Date: Thu, 11 Sep 2025 16:28:46 +0200
Subject: [PATCH 4/4] add some random data

---
 .../tests/shader-performance/index.ts         | 111 +++++++++++++++++-
 1 file changed, 109 insertions(+), 2 deletions(-)

diff --git a/apps/typegpu-docs/src/content/examples/tests/shader-performance/index.ts b/apps/typegpu-docs/src/content/examples/tests/shader-performance/index.ts
index fe7e63387..360c10824 100644
--- a/apps/typegpu-docs/src/content/examples/tests/shader-performance/index.ts
+++ b/apps/typegpu-docs/src/content/examples/tests/shader-performance/index.ts
@@ -99,6 +99,91 @@ const complex = tgpu['~unstable'].computeFn({
   level4Fn,
 });
 
+const processElement = tgpu.fn([d.u32, d.u32], d.u32)((value, index) => {
+  return add(multiply(value, 2), index);
+});
+
+const conditionalProcess = tgpu.fn([d.u32, d.u32], d.u32)(
+  (value, threshold) => {
+    if (value > threshold) {
+      return multiply(value, 3);
+    }
+    return add(value, 1);
+  },
+);
+
+const branchingOperations = tgpu['~unstable'].computeFn({
+  workgroupSize: [1],
+  in: {
+    gid: d.builtin.globalInvocationId,
+  },
+})`{
+  let bufferValue = targetBuffer[in.gid.x];
+  var result = bufferValue;
+
+  for (var i = 0u; i < 10u; i++) {
+    result = processElement(result, i);
+
+    if (result > 100u) {
+      result = conditionalProcess(result, 50u);
+    } else {
+      result = add(result, multiply(i, 2));
+    }
+
+    for (var j = 0u; j < 5u; j++) {
+      if (j % 2u == 0u) {
+        result = multiply(result, 2);
+      } else {
+        result = add(result, j);
+      }
+    }
+  }
+
+  targetBuffer[in.gid.x] = result;
+}`.$uses({
+  targetBuffer: benchmarkLayout.bound.buffer,
+  add,
+  multiply,
+  processElement,
+  conditionalProcess,
+});
+
+const branchingOperationsInlined = tgpu['~unstable'].computeFn({
+  workgroupSize: [1],
+  in: {
+    gid: d.builtin.globalInvocationId,
+  },
+})`{
+  let bufferValue = targetBuffer[in.gid.x];
+  var result = bufferValue;
+
+  for (var i = 0u; i < 10u; i++) {
+    result = (result * 2) + i;
+
+    if (result > 100u) {
+      if (result > 50u) {
+        result = result * 3;
+      } else {
+        result = result + 1;
+      }
+    } else {
+      result = result + (i * 2);
+    }
+
+    for (var j = 0u; j < 5u; j++) {
+      if (j % 2u == 0u) {
+        result = result * 2;
+      } else {
+        result = result + j;
+      }
+    }
+  }
+
+  targetBuffer[in.gid.x] = result;
+}`.$uses({
+  targetBuffer: benchmarkLayout.bound.buffer,
+});
+
 const benchmarkPairs = {
   'Basic Operations': {
     'Function Calls': {
@@ -120,9 +205,22 @@ const benchmarkPairs = {
       entrypoint: complexInlined,
     },
   },
+  'Branching Operations': {
+    'Function Calls': {
+      name: 'With Function Calls',
+      entrypoint: branchingOperations,
+    },
+    'Inlined': {
+      name: 'Inlined Operations',
+      entrypoint: branchingOperationsInlined,
+    },
+  },
 };
 
-async function createBenchmarkSetup(entrypoint: TgpuComputeFn) {
+async function createBenchmarkSetup(
+  entrypoint: TgpuComputeFn,
+  initialData?: number[],
+) {
   const root = await tgpu.init({
     device: {
       requiredFeatures: ['timestamp-query'],
@@ -132,6 +230,10 @@ async function createBenchmarkSetup(entrypoint: TgpuComputeFn) {
   const targetBuffer = root.createBuffer(d.arrayOf(d.u32, BUFFER_SIZE)).$usage(
     'storage',
   );
+  if (initialData) {
+    targetBuffer.write(initialData);
+  }
+
   const bindGroup = root.createBindGroup(benchmarkLayout, {
     buffer: targetBuffer,
   });
@@ -193,6 +295,11 @@ async function runInterleavedBenchmarkPair(
 
   console.log(`\nSetting up benchmark pair: ${pairName}`);
 
+  const initialData = Array.from(
+    { length: BUFFER_SIZE },
+    () => Math.floor(Math.random() * 101),
+  );
+
   const setups = {} as Record<
     string,
     Awaited<ReturnType<typeof createBenchmarkSetup>>
@@ -207,7 +314,7 @@ async function runInterleavedBenchmarkPair(
     console.log(tgpu.resolve({ externals: { pipeline: config.entrypoint } }));
     console.groupEnd();
 
-    setups[key] = await createBenchmarkSetup(config.entrypoint);
+    setups[key] = await createBenchmarkSetup(config.entrypoint, initialData);
     results[key] = { measurements: [], runningAverage: 0, runs: 0 };
 
     if (warmup) {