diff --git a/README.md b/README.md index 0c1d111..adb8ee8 100644 --- a/README.md +++ b/README.md @@ -211,6 +211,13 @@ Apply when writing `.webp` (lossless WebP rendered via GPU rasterizer). --resolution Output resolution, e.g. 1920x1080. Default: 1280x720 (pinhole) or 2048x1024 (equirect) --near Near clip distance. Default: 0.2 (matches reference 3DGS) --background Background color in [0,1]. Default: 0,0,0,1 + --f-stop Aperture as a photographic f-stop (e.g. 2.8, 5.6, 11). Enables defocus blur; + smaller = more blur. Pinhole only. Default: disabled (no defocus). + --focus-distance Camera-space Z of the focus plane (world units). Default: distance to --look-at. + Pinhole only; only meaningful with --f-stop. + --sensor-size Vertical sensor height in world units. Gives --f-stop a physical meaning. + Default: 0.024 (35mm full-frame, world units = meters). Scale to your world: + world unit = decimeter → 0.24, world unit = millimeter → 24. ``` ## Examples @@ -392,6 +399,13 @@ splat-transform input.ply view.webp \ # Transparent background splat-transform input.ply view.webp --background 0,0,0,0 +# Defocus blur (focus on look-at, f/2.8 aperture) +splat-transform input.ply view.webp --f-stop 2.8 + +# Defocus with explicit focus distance and a smaller world scale +splat-transform input.ply view.webp \ + --f-stop 2.8 --focus-distance 3 --sensor-size 0.1 + # 360° equirectangular panorama from camera position splat-transform input.ply pano.webp \ --projection equirect --camera 0,1,0 --look-at 0,1,1 diff --git a/src/cli/index.ts b/src/cli/index.ts index ada9bc8..00d8ec0 100644 --- a/src/cli/index.ts +++ b/src/cli/index.ts @@ -147,6 +147,9 @@ const cliOptionsConfig = { 'resolution': { type: 'string' }, 'near': { type: 'string' }, 'background': { type: 'string' }, + 'f-stop': { type: 'string' }, + 'focus-distance': { type: 'string' }, + 'sensor-size': { type: 'string' }, // per-file options translate: { type: 'string', short: 't', multiple: true }, @@ -407,6 +410,18 @@ const parseArguments = async () => { renderHeight = parseInteger(m[2]); } const renderNear = v.near !== undefined ? parseNumber(v.near, 0) : undefined; + const renderFStop = v['f-stop'] !== undefined ? parseNumber(v['f-stop'], 0) : undefined; + if (renderFStop !== undefined && renderFStop <= 0) { + throw new Error(`Invalid --f-stop value: ${v['f-stop']}. Must be > 0.`); + } + const renderFocusDistance = v['focus-distance'] !== undefined ? parseNumber(v['focus-distance'], 0) : undefined; + if (renderFocusDistance !== undefined && renderFocusDistance <= 0) { + throw new Error(`Invalid --focus-distance value: ${v['focus-distance']}. Must be > 0.`); + } + const renderSensorSize = v['sensor-size'] !== undefined ? parseNumber(v['sensor-size'], 0) : undefined; + if (renderSensorSize !== undefined && renderSensorSize <= 0) { + throw new Error(`Invalid --sensor-size value: ${v['sensor-size']}. Must be > 0.`); + } let renderBackground: { r: number; g: number; b: number; a: number } | undefined; if (v.background !== undefined) { const parts = v.background.split(',').map((p: string) => parseNumber(p.trim())); @@ -455,7 +470,10 @@ const parseArguments = async () => { renderWidth, renderHeight, renderNear, - renderBackground + renderBackground, + renderFStop, + renderFocusDistance, + renderSensorSize }; for (const t of tokens) { @@ -746,6 +764,13 @@ IMAGE OUTPUT (.webp) — lossless WebP rendered via GPU rasterizer --resolution Output resolution, e.g. 1920x1080. Default: 1280x720 (pinhole) or 2048x1024 (equirect) --near Near clip distance. Default: 0.2 (matches reference 3DGS) --background Background color in [0,1]. Default: 0,0,0,1 + --f-stop Aperture as a photographic f-stop (e.g. 2.8, 5.6, 11). Enables defocus blur; + smaller = more blur. Pinhole only. Default: disabled (no defocus). + --focus-distance Camera-space Z of the focus plane (world units). Default: distance to --look-at. + Pinhole only; only meaningful with --f-stop. + --sensor-size Vertical sensor height in world units. Gives --f-stop a physical meaning. + Default: 0.024 (35mm full-frame, world units = meters). Scale to your world: + world unit = decimeter → 0.24, world unit = millimeter → 24. EXAMPLES # Convert formats diff --git a/src/lib/gpu/gpu-splat-rasterizer.ts b/src/lib/gpu/gpu-splat-rasterizer.ts index 43138c7..6cf9a87 100644 --- a/src/lib/gpu/gpu-splat-rasterizer.ts +++ b/src/lib/gpu/gpu-splat-rasterizer.ts @@ -111,6 +111,18 @@ interface SplatRasterizerOptions { eyeX: number; eyeY: number; eyeZ: number; /** Focal lengths in pixel units. */ focalX: number; focalY: number; + /** + * Camera-space Z of the focus plane, world units. Pinhole-only; + * unused when `projection === 'equirect'`. + */ + focusDistance: number; + /** + * DoF strength as a pixel-space scalar: the CoC radius in pixels when + * `|1 − focusDistance/cz| = 1`. `0` disables defocus. The writer + * derives this from `--f-stop` + `--sensor-size` using the thin-lens + * CoC formula. Pinhole-only. + */ + apertureScale: number; /** RGBA background, each channel in [0, 1]. */ bgR: number; bgG: number; bgB: number; bgA: number; } @@ -559,6 +571,9 @@ class GpuSplatRasterizer { c.setParameter('_p3', 0); c.setParameter('focalX', o.focalX); c.setParameter('focalY', o.focalY); c.setParameter('near', o.near); c.setParameter('_p4', 0); + c.setParameter('focusDistance', o.focusDistance); + c.setParameter('apertureScale', o.apertureScale); + c.setParameter('_p5', 0); c.setParameter('_p6', 0); c.setParameter('imageWidth', o.imageWidth); c.setParameter('imageHeight', o.imageHeight); c.setParameter('splatStride', this.inputStride); // chunkSize set per-dispatch diff --git a/src/lib/gpu/shaders/chunks/constants.ts b/src/lib/gpu/shaders/chunks/constants.ts index 86fa041..966a27d 100644 --- a/src/lib/gpu/shaders/chunks/constants.ts +++ b/src/lib/gpu/shaders/chunks/constants.ts @@ -1,6 +1,7 @@ import { AA_DILATION_COV, DISCRIMINANT_FLOOR, + GAUSSIAN_FLOOR, JACOBIAN_LIMIT_FACTOR, MIN_ALPHA, MIN_TRANSMITTANCE, @@ -42,6 +43,7 @@ const wgslF32 = (n: number): string => { const constantsChunk = /* wgsl */` const TILE_SIZE: u32 = ${TILE_SIZE}u; const SIGMA_CUTOFF: f32 = ${wgslF32(SIGMA_CUTOFF)}; +const GAUSSIAN_FLOOR: f32 = ${wgslF32(GAUSSIAN_FLOOR)}; const AA_DILATION_COV: f32 = ${wgslF32(AA_DILATION_COV)}; const DISCRIMINANT_FLOOR: f32 = ${wgslF32(DISCRIMINANT_FLOOR)}; const JACOBIAN_LIMIT_FACTOR: f32 = ${wgslF32(JACOBIAN_LIMIT_FACTOR)}; diff --git a/src/lib/gpu/shaders/project.ts b/src/lib/gpu/shaders/project.ts index ba250fe..fc4748e 100644 --- a/src/lib/gpu/shaders/project.ts +++ b/src/lib/gpu/shaders/project.ts @@ -97,6 +97,17 @@ fn main(@builtin(global_invocation_id) gid: vec3) { cov00 = cov00 + AA_DILATION_COV; cov11 = cov11 + AA_DILATION_COV; +#ifndef PROJECTION_EQUIRECT + // Defocus (DoF), pinhole only. Capture detPreDoF before dilating so the + // alpha rescale below conserves integrated energy — without it, + // defocused foreground splats over-occlude what is behind them. + let detPreDoF = cov00 * cov11 - cov01 * cov01; + let coc = uniforms.apertureScale * abs(1.0 - uniforms.focusDistance / cz); + let cocVar = coc * coc; + cov00 = cov00 + cocVar; + cov11 = cov11 + cocVar; +#endif + let det = cov00 * cov11 - cov01 * cov01; if (det <= 0.0) { writeInvalid(i); return; } @@ -168,7 +179,15 @@ fn main(@builtin(global_invocation_id) gid: vec3) { let colG = max(0.0, cG + 0.5); let colB = max(0.0, cB + 0.5); - let alpha = (1.0 / (1.0 + exp(-opacity))) * radiusFade; +#ifndef PROJECTION_EQUIRECT + // Energy-preserving alpha rescale for DoF. When apertureScale == 0, + // detPreDoF == det so dofAlphaScale == 1 (no-op). + let dofAlphaScale = sqrt(max(0.0, detPreDoF) / det); +#else + let dofAlphaScale = 1.0; +#endif + + let alpha = (1.0 / (1.0 + exp(-opacity))) * radiusFade * dofAlphaScale; projected[i * 3u + 0u] = vec4(screenX, screenY, radius, 0.0); projected[i * 3u + 1u] = vec4(covInvA, covInvB, covInvC, alpha); diff --git a/src/lib/gpu/shaders/rasterize-binned.ts b/src/lib/gpu/shaders/rasterize-binned.ts index acc86cc..83e84f7 100644 --- a/src/lib/gpu/shaders/rasterize-binned.ts +++ b/src/lib/gpu/shaders/rasterize-binned.ts @@ -83,7 +83,11 @@ fn main( let v1 = projected[splatIdx * 3u + 1u]; let power = -0.5 * (v1.x * dx * dx + 2.0 * v1.y * dx * dy + v1.z * dy * dy); if (power > 0.0) { continue; } - let alpha = min(OPACITY_CAP, v1.w * exp(power)); + // Subtract GAUSSIAN_FLOOR so each splat's alpha reaches 0 exactly + // at the 3σ truncation radius instead of clipping at ~1.1% — + // eliminates faint ring artifacts at splat edges. Matches the + // PlayCanvas engine. + let alpha = min(OPACITY_CAP, v1.w * max(0.0, exp(power) - GAUSSIAN_FLOOR)); if (alpha < MIN_ALPHA) { continue; } let weight = T * alpha; let v2 = projected[splatIdx * 3u + 2u]; diff --git a/src/lib/gpu/shaders/uniforms.ts b/src/lib/gpu/shaders/uniforms.ts index b6b5897..94694dd 100644 --- a/src/lib/gpu/shaders/uniforms.ts +++ b/src/lib/gpu/shaders/uniforms.ts @@ -26,6 +26,7 @@ struct Uniforms { forwardX: f32, forwardY: f32, forwardZ: f32, _p2: f32, eyeX: f32, eyeY: f32, eyeZ: f32, _p3: f32, focalX: f32, focalY: f32, near: f32, _p4: f32, + focusDistance: f32, apertureScale: f32, _p5: f32, _p6: f32, imageWidth: u32, imageHeight: u32, splatStride: u32, chunkSize: u32, groupPixelMinX: u32, groupPixelMinY: u32, groupPixelMaxX: u32, groupPixelMaxY: u32, groupTilesX: u32, groupTilesY: u32, groupPixelOriginX: u32, groupPixelOriginY: u32, @@ -64,6 +65,10 @@ const uniformFormatEntries = (): UniformFormat[] => [ new UniformFormat('focalY', UNIFORMTYPE_FLOAT), new UniformFormat('near', UNIFORMTYPE_FLOAT), new UniformFormat('_p4', UNIFORMTYPE_FLOAT), + new UniformFormat('focusDistance', UNIFORMTYPE_FLOAT), + new UniformFormat('apertureScale', UNIFORMTYPE_FLOAT), + new UniformFormat('_p5', UNIFORMTYPE_FLOAT), + new UniformFormat('_p6', UNIFORMTYPE_FLOAT), new UniformFormat('imageWidth', UNIFORMTYPE_UINT), new UniformFormat('imageHeight', UNIFORMTYPE_UINT), new UniformFormat('splatStride', UNIFORMTYPE_UINT), diff --git a/src/lib/render/camera.ts b/src/lib/render/camera.ts index 684d4b7..8068356 100644 --- a/src/lib/render/camera.ts +++ b/src/lib/render/camera.ts @@ -50,6 +50,18 @@ type RenderCamera = { height: number; /** Near clipping distance in world units. For pinhole, splats with `cz <= near` are culled; for equirect, splats with radial `r <= near`. */ near: number; + /** + * Camera-space Z of the focus plane in world units. Pinhole only; + * ignored for equirect. Optional — only meaningful when + * `apertureScale > 0`. + */ + focusDistance?: number; + /** + * DoF strength as a pixel-space scalar: the CoC radius in pixels + * when `|1 − focusDistance/cz| = 1`. Pinhole only; ignored for + * equirect. Default `0` disables defocus. + */ + apertureScale?: number; }; /** diff --git a/src/lib/render/config.ts b/src/lib/render/config.ts index 07a469b..c43cae3 100644 --- a/src/lib/render/config.ts +++ b/src/lib/render/config.ts @@ -72,6 +72,15 @@ export const DISCRIMINANT_FLOOR = 0.1; */ export const SIGMA_CUTOFF = 3.0; +/** + * Value of the unit gaussian at the truncation radius. Subtracted from + * `exp(power)` in the rasterizer so each splat's alpha reaches exactly + * 0 at `SIGMA_CUTOFF · σ` instead of clipping at ≈ 1.1% (which would + * leave a faint ring at the splat boundary). Matches the PlayCanvas + * engine's edge compensation. + */ +export const GAUSSIAN_FLOOR = Math.exp(-0.5 * SIGMA_CUTOFF * SIGMA_CUTOFF); + /** * Floor on the far-plane distance, expressed as a multiple of the near * plane. If every scene-AABB corner sits behind the camera the diff --git a/src/lib/render/raster-pass.ts b/src/lib/render/raster-pass.ts index 5c532c3..20fce39 100644 --- a/src/lib/render/raster-pass.ts +++ b/src/lib/render/raster-pass.ts @@ -375,6 +375,8 @@ const renderRasterPass = async ( eyeZ: basis.eye.z, focalX: basis.focalX, focalY: basis.focalY, + focusDistance: camera.focusDistance ?? 0, + apertureScale: camera.apertureScale ?? 0, bgR: background.r, bgG: background.g, bgB: background.b, diff --git a/src/lib/types.ts b/src/lib/types.ts index 03bb3bf..5297bcb 100644 --- a/src/lib/types.ts +++ b/src/lib/types.ts @@ -81,6 +81,28 @@ type Options = { /** RGBA background (each channel in [0, 1]) for image output. Default: (0, 0, 0, 1). */ renderBackground?: { r: number; g: number; b: number; a: number }; + + /** + * Aperture as a photographic f-stop (e.g. 2.8, 5.6, 11) for image + * output. Enables defocus blur / depth-of-field: smaller numbers = + * stronger blur. Defaults to disabled. Pinhole projection only. + */ + renderFStop?: number; + + /** + * Camera-space Z of the focus plane in world units for image output. + * Defaults to the distance from the camera to the look-at point when + * `renderFStop` is set. No effect without `renderFStop`. Pinhole + * projection only. + */ + renderFocusDistance?: number; + + /** + * Vertical sensor height in world units. Calibrates `renderFStop` + * to your world scale. Default `0.024` (35mm full-frame in meters). + * No effect without `renderFStop`. Pinhole projection only. + */ + renderSensorSize?: number; }; /** diff --git a/src/lib/write.ts b/src/lib/write.ts index 3927d8d..de7bdc2 100644 --- a/src/lib/write.ts +++ b/src/lib/write.ts @@ -198,6 +198,9 @@ const writeFile = async (writeOptions: WriteOptions, fs: FileSystem) => { height: options.renderHeight, near: options.renderNear, background: options.renderBackground, + fStop: options.renderFStop, + focusDistance: options.renderFocusDistance, + sensorSize: options.renderSensorSize, createDevice }, fs); break; diff --git a/src/lib/writers/write-image.ts b/src/lib/writers/write-image.ts index f891359..f1e9bdb 100644 --- a/src/lib/writers/write-image.ts +++ b/src/lib/writers/write-image.ts @@ -57,6 +57,32 @@ type WriteImageOptions = { /** RGBA background, each channel in [0, 1]. Default: (0, 0, 0, 1). */ background?: { r: number; g: number; b: number; a: number }; + /** + * Aperture as a photographic f-stop (e.g. 2.8, 5.6, 11). Enables + * defocus blur / depth-of-field: smaller numbers = stronger blur. + * Defaults to disabled. Pinhole only — passing this with + * `projection: 'equirect'` is an error. + */ + fStop?: number; + + /** + * Camera-space Z of the focus plane in world units. Defaults to the + * distance from `cameraPosition` to `lookAt` along the forward axis + * (i.e. focus on the look-at point) when `fStop` is set. Has no + * effect without `fStop`. Pinhole only — passing this with + * `projection: 'equirect'` is an error. + */ + focusDistance?: number; + + /** + * Vertical sensor height in world units, used to give `fStop` a + * defined physical meaning. Default `0.024` matches a 35mm + * full-frame sensor when world units are meters. Scale this with + * your scene's units (e.g. world unit = decimeter → 0.24, world + * unit = millimeter → 24). Has no effect without `fStop`. + */ + sensorSize?: number; + /** Function returning a GraphicsDevice. Required — rasterization runs on GPU. */ createDevice?: DeviceCreator; }; @@ -89,6 +115,9 @@ const writeImage = async (options: WriteImageOptions, fs: FileSystem): Promise= 180) { throw new Error(`Invalid fov: ${fov}. Must be in (0, 180).`); } + if (fStop !== undefined && !(fStop > 0)) { + throw new Error(`Invalid f-stop: ${fStop}. Must be > 0.`); + } + if (focusDistance !== undefined && !(focusDistance > 0)) { + throw new Error(`Invalid focus-distance: ${focusDistance}. Must be > 0.`); + } + if (!(sensorSize > 0)) { + throw new Error(`Invalid sensor-size: ${sensorSize}. Must be > 0.`); + } } const g = logger.group('Render'); + const fovY = projection === 'equirect' ? 0 : (fov! * Math.PI) / 180; + + // Resolve DoF for pinhole only. The project shader consumes a single + // pre-baked scalar `apertureScale` (pixel CoC per unit relative + // defocus) and the focus distance. Physical CoC for a thin lens is: + // + // CoC_pixels = (focal_real² / (N · focus)) × |1 − focus/cz| + // × image_height / sensor_height + // + // where focal_real is the real lens focal length implied by + // `fovY` and `sensorSize`. Apply image_height / sensor_height to + // convert physical CoC (sensor units) to pixels. Defaulting + // `sensorSize` to 0.024 makes f-stops behave like a 35mm + // full-frame camera when world units are meters; scale to suit + // non-meter scenes. Focus defaults to the look-at point. + let resolvedFocusDistance = 0; + let resolvedApertureScale = 0; + if (projection !== 'equirect' && fStop !== undefined) { + if (focusDistance !== undefined) { + resolvedFocusDistance = focusDistance; + } else { + const fwdX = lookAt.x - cameraPosition.x; + const fwdY = lookAt.y - cameraPosition.y; + const fwdZ = lookAt.z - cameraPosition.z; + const fwdLen = Math.hypot(fwdX, fwdY, fwdZ); + if (fwdLen === 0) { + throw new Error('writeImage: cannot derive default --focus-distance because --camera equals --look-at.'); + } + // forward · (lookAt - cameraPosition) where forward is unit + // = fwdLen (the basis forward is the same vector normalized). + resolvedFocusDistance = fwdLen; + } + const focalRealWorld = (sensorSize / 2) / Math.tan(fovY * 0.5); + const focalYPx = (height / 2) / Math.tan(fovY * 0.5); + resolvedApertureScale = focalRealWorld * focalYPx / (fStop * resolvedFocusDistance); + } + const camera: RenderCamera = { projection, position: new Vec3(cameraPosition.x, cameraPosition.y, cameraPosition.z), target: new Vec3(lookAt.x, lookAt.y, lookAt.z), up: new Vec3(up.x, up.y, up.z), - fovY: projection === 'equirect' ? 0 : (fov! * Math.PI) / 180, + fovY, width, height, - near + near, + focusDistance: resolvedFocusDistance, + apertureScale: resolvedApertureScale }; const device = await createDevice(); @@ -144,6 +230,8 @@ const writeImage = async (options: WriteImageOptions, fs: FileSystem): Promise 0) { + logger.info(`${width}x${height} fov ${fov}° f/${fStop} focus ${resolvedFocusDistance.toFixed(3)} sensor ${sensorSize}`); } else { logger.info(`${width}x${height} fov ${fov}°`); } diff --git a/test/fixtures/golden-render/mid.webp b/test/fixtures/golden-render/mid.webp index 61f08d5..af09db3 100644 Binary files a/test/fixtures/golden-render/mid.webp and b/test/fixtures/golden-render/mid.webp differ diff --git a/test/fixtures/golden-render/tiny-dof.webp b/test/fixtures/golden-render/tiny-dof.webp new file mode 100644 index 0000000..7681abd Binary files /dev/null and b/test/fixtures/golden-render/tiny-dof.webp differ diff --git a/test/fixtures/golden-render/tiny.webp b/test/fixtures/golden-render/tiny.webp index 260ada1..66168c5 100644 Binary files a/test/fixtures/golden-render/tiny.webp and b/test/fixtures/golden-render/tiny.webp differ diff --git a/test/render-golden.cases.mjs b/test/render-golden.cases.mjs index 7e18ec7..fb486a8 100644 --- a/test/render-golden.cases.mjs +++ b/test/render-golden.cases.mjs @@ -42,6 +42,27 @@ const CASES = [ '--resolution', '640x360' ], goldenPath: 'fixtures/golden-render/mid.webp' + }, + { + // 'tiny' scene + DoF. Exercises the DoF code paths end-to-end: + // CoC uniform plumbing, per-splat covariance dilation, energy- + // preserving alpha rescale, and default focus-distance derivation + // (look-at distance, since --focus-distance is omitted). + // sensor-size scales f-stop into the unitless world the synthetic + // scene lives in so the blur is clearly visible (~3 px CoC across + // the grid's depth range). + name: 'tiny-dof', + args: [ + 'test/fixtures/generator.mjs', + '-p', 'width=20,height=20,spacing=1.0,scale=0.1', + '--camera', '0,5,-8', + '--look-at', '0,0,0', + '--fov', '60', + '--resolution', '320x240', + '--f-stop', '2.8', + '--sensor-size', '0.5' + ], + goldenPath: 'fixtures/golden-render/tiny-dof.webp' } ];