diff --git a/README.md b/README.md index ad15fda..9f162ae 100644 --- a/README.md +++ b/README.md @@ -84,6 +84,10 @@ svgsmith convert path/to/image.png --out out.svg --report json fringes, so illustrations come out clean and editable without losing their character or ballooning in size. `--detail high` additionally preserves subtle painterly brush-grain for heavily textured art. +- **Crisp, high-resolution lines** — low-resolution flat cartoon art (e.g. a 640px JPEG) is + auto-traced on a supersampled grid, so outlines and curves come out smooth instead of + staircased on the native pixel grid, while same-color fragments are merged back so the path + count stays economical. Applied only to that class; `--hires` forces it on any input. - **Editable output** — instead of one monolithic ``, output is grouped into `` layers with simplified paths and a consolidated color palette. - **Self-verifying** — converts, re-rasterizes, diffs against the original (SSIM), and @@ -125,6 +129,7 @@ svgsmith convert input.png \ | `--editable` / `--no-editable` | on | Editable grouped/simplified SVG, or the raw traced output. | | `--smooth` / `--no-smooth` | on | Curve-refit color contours into smooth, sparse Béziers (Schneider least-squares). | | `--detail {high,normal,clean,poster}` | `normal` | Color detail dial. `high` = maximum detail, preserving fine texture / painterly brush-grain; `normal` = faithful but economical (keeps deliberate detail, drops anti-alias fringes); `clean` = edge-preserving cleanup (less noise/grain); `poster` = bold flat graphic with few colors. | +| `--hires` | off | Force the high-resolution supersampled trace (crisp, smooth lines) on any color input. Low-res flat illustrations get it automatically; `--hires` forces it on textured/large art too (more paths/bytes, slower). | | `--solid-background` | off | Isolate the subject and repaint the background as one clean solid color — removes texture/grain/specks while keeping subject detail. | | `--background COLOR` | off | Like `--solid-background`, but repaint the detected background to a **specific** color (`#RRGGBB` or named, e.g. `white`). `auto` = the detected median. | | `--transparent-background` | off | **Remove** the background instead of repainting it — the edge-connected background is cut, leaving a **transparent** SVG. The subject is kept even where it shares the background color (color mode). | diff --git a/src/svgsmith/__init__.py b/src/svgsmith/__init__.py index 6ec9ab7..2476048 100644 --- a/src/svgsmith/__init__.py +++ b/src/svgsmith/__init__.py @@ -1,3 +1,3 @@ """svgsmith — convert raster images into clean, editable SVG.""" -__version__ = "0.5.1" +__version__ = "0.6.0" diff --git a/src/svgsmith/cli.py b/src/svgsmith/cli.py index d91e9a4..7fe6981 100644 --- a/src/svgsmith/cli.py +++ b/src/svgsmith/cli.py @@ -58,6 +58,7 @@ def _convert(args: argparse.Namespace) -> int: transparent_background=args.transparent_background, flatten_shading=args.flatten_shading, detail=args.detail, + hires=args.hires, out=args.out, ) @@ -206,6 +207,17 @@ def build_parser() -> argparse.ArgumentParser: "few colors." ), ) + convert.add_argument( + "--hires", + action="store_true", + default=False, + help=( + "Force the high-resolution trace (supersample the mask) for crisp, smooth " + "lines on ANY color input. Low-resolution flat illustrations get this " + "automatically; --hires forces it on textured or already-large art too " + "(more paths/bytes, slower)." + ), + ) convert.add_argument( "--solid-background", action="store_true", diff --git a/src/svgsmith/pipeline.py b/src/svgsmith/pipeline.py index 326e0bb..78c87cb 100644 --- a/src/svgsmith/pipeline.py +++ b/src/svgsmith/pipeline.py @@ -15,7 +15,12 @@ from svgsmith.classify import Classification, classify from svgsmith.engines.base import ImageInput, load_image -from svgsmith.postprocess import drop_background_paths, snap_background_layer, snap_dark_fills +from svgsmith.postprocess import ( + drop_background_paths, + global_same_fill_merge, + snap_background_layer, + snap_dark_fills, +) from svgsmith.preprocess import PreprocessOptions, _edge_flood_fill_mask, preprocess from svgsmith.report import Report, svg_stats from svgsmith.smooth import smooth_svg @@ -47,6 +52,13 @@ # anchor snaps each region to one clean color and keeps the outline pure black. # K scales with detail so a genuinely rich illustration is not crushed at "high". _TRACE_RESOLUTION = 2048 +# Auto/--hires supersample target long-edge (line-quality Phase 3): the loop-validated minimum +# that lifts a sub-768px input off the native pixel staircase (subpixel grid + SSIM gain) without +# the heavier cost of the full reference-grid factor. Paired with an uncapped region merge so the +# flat-economy levers can claw the high-res path blow-up back toward reference counts. +_SUPERSAMPLE_AUTO_RES = 1536 +_SUPERSAMPLE_REGION_MAX_PX = 20000 # uncap the region merge at high res (native cap is 2000) +_SAME_FILL_MERGE_SSIM_DROP = 0.01 # gate: keep the same-fill merge only within this SSIM cost # Only low-resolution color inputs get the supersample + k-means treatment; # already-large clean art traces smoothly on the proven path and upscaling it # just bloats node count (a 1024px PNG shiba regresses 95→219 paths if upscaled). @@ -303,6 +315,13 @@ class ConvertOptions: # resolution) so thick outline blobs become thin crescents (small dark detail is protected). illustration_supersample: int = 0 illustration_dark_thin: int = 0 + # Resolution lever (line-quality Phase 3): trace a supersampled mask so curves are fit on + # a fine grid (crisp, high-resolution lines) instead of the native pixel staircase. AUTO for + # flat low-resolution illustrations (the ``_supersample_candidate`` class — e.g. a 640px JPEG + # cartoon that traces staircased); ``hires`` FORCES it on any color input (textured/high-res + # too, where the same-fill economy may not apply so it just costs more paths). Gradients, + # photos and already-high-res inputs are untouched. + hires: bool = False out: str | None = None def __post_init__(self) -> None: @@ -455,7 +474,11 @@ def render( similarity = result.best_score if opts.smooth and opts.editable and is_color: reference = load_image(image, "RGB") - smoothed = smooth_svg(svg) + # Lossless supersample byte lever: when the mask was traced above native + # resolution the viewBox is N× the native grid, so default .2f coordinates + # resolve far finer than the original pixels — pure byte bloat. smooth_svg + # auto-drops decimals by log10(factor) so granularity tracks the native grid. + smoothed = smooth_svg(svg, native_long_edge=max(reference.size)) smoothed_score = score(reference, rasterize(smoothed, reference.size)) keep = smoothed_score >= result.best_score - _SMOOTH_SSIM_TOLERANCE # Wobble-relief escape clause: raw SSIM rewards the antialiased staircase, so a @@ -500,23 +523,39 @@ def render( # Other levels keep the flatten (the loop-validated economical default). if opts.detail == "high": cov_pre = replace(cov_pre, flatten=False) - # Illustration-geometry knobs (experimental, loop-tuned): supersample the flat-colour - # mask for round/uniform scallop boundaries, and/or thin the dark linework into crescents. - # Applied ONLY to the outlined low-res illustration class and ONLY when requested, so the - # default path and gradients/photos are untouched. - want_geometry = opts.illustration_supersample or opts.illustration_dark_thin - if want_geometry and _supersample_candidate(image): + # Resolution lever (line-quality Phase 3): supersample the flat-colour mask so curves are + # fit on a fine grid (crisp, high-resolution lines) instead of the native pixel staircase. + # AUTO for the flat low-res illustration class (``_supersample_candidate`` — e.g. a 640px + # JPEG cartoon that traces staircased); FORCED by ``--hires``; or set explicitly by the + # experimental illustration knob. When supersampling, uncap the region merge (the 2000px + # cap throttles consolidation at high res) so the flat-economy levers can claw the path + # blow-up back down. Gradients, photos and already-high-res inputs never enter here. + ss_res = opts.illustration_supersample or ( + _SUPERSAMPLE_AUTO_RES if (_supersample_candidate(image) or opts.hires) else 0 + ) + did_supersample = bool(ss_res) + if did_supersample or opts.illustration_dark_thin: cov_pre = replace( cov_pre, - trace_resolution=opts.illustration_supersample or cov_pre.trace_resolution, + trace_resolution=ss_res or cov_pre.trace_resolution, coverage_dark_thin=opts.illustration_dark_thin, + coverage_region_max_px=( + _SUPERSAMPLE_REGION_MAX_PX + if did_supersample + else cov_pre.coverage_region_max_px + ), ) cov_class = classification._replace(preset="continuous") # Dark-outline black snap (#lever-C): collapse scattered near-black tints into one clean # #000000 outline layer. Only engage when there is genuine near-black mass (so no-black art # is untouched and pays no extra render); then SSIM-guard against the no-snap trace so a - # dark-but-not-black-detail image can never regress. - snap_black = _COVERAGE_BLACK_SNAP_DE > 0.0 and _has_black_outline(image) + # dark-but-not-black-detail image can never regress. SKIPPED when supersampling: at high res + # the sharper black core makes the colour tracer carve MORE edge tints, so the cleaner-guard + # rejects the snap anyway — running it just pays for a second full-res trace for nothing + # (verified byte-identical output at half the time on the flat low-res class). + snap_black = ( + _COVERAGE_BLACK_SNAP_DE > 0.0 and not did_supersample and _has_black_outline(image) + ) base_svg, base_sim, base_iters = render( cov_pre, cov_class, palette_threshold=0.0, max_iters=1 ) @@ -548,6 +587,17 @@ def render( svg, similarity, iterations = base_svg, base_sim, base_iters else: svg, similarity, iterations = base_svg, base_sim, base_iters + # Flat-region economy (Phase 3): supersampling fragments a clean solid region into many + # same-fill paths; collapse each colour's fragments into one compound path. SSIM-gated — + # the hoist reorders paint and is lossless only on flat tiling art, so textured/overlapping + # content fails the gate and keeps the un-merged trace (where the supersample cost stays). + if did_supersample: + merged_svg = global_same_fill_merge(svg) + if merged_svg != svg: + ref_img = load_image(image, "RGB") + merged_sim = score(ref_img, rasterize(merged_svg, ref_img.size)) + if merged_sim >= similarity - _SAME_FILL_MERGE_SSIM_DROP: + svg, similarity = merged_svg, merged_sim # Path cap = misgated-photo blowup guard. At --detail high the user opted into # max fidelity, so a high count is INTENDED for legitimately grainy/painterly art # (the reference traces such inputs into thousands of micro-tiles) — raise the cap diff --git a/src/svgsmith/postprocess.py b/src/svgsmith/postprocess.py index 90675cc..2cf2a62 100644 --- a/src/svgsmith/postprocess.py +++ b/src/svgsmith/postprocess.py @@ -772,6 +772,43 @@ def _swap(match: re.Match[str]) -> str: return re.sub(r'fill="(#[0-9a-fA-F]{3,6})"', _swap, svg_str) +def global_same_fill_merge(svg_str: str, precision: int = 2) -> str: + """Hoist every path of a given fill into ONE ```` at that fill's top-most + paint index. + + Supersampling a flat-colour region fragments it into many same-fill paths (a clean + solid area becomes a spray of tiles), inflating the path/byte count for zero fidelity + gain. This collapses each colour's fragments back into a single compound path. It is + LOSSLESS only when same-fill regions tile without a *different* fill painting between + and over them (true for flat/poster art) — hoisting reorders paint, so the CALLER + MUST SSIM-verify the result and fall back on a drop. Bails (returns the input) when any + path carries a non-translate transform (baking would break a scaled/flipped shape) or + when every fill is already unique (nothing to merge). + """ + root = ET.fromstring(svg_str) + paths = _collect_paths(root) + if not paths: + return svg_str + groups: dict[str, dict] = {} + for idx, path in enumerate(paths): + if not _is_translate_only(path["transform"]): + return svg_str + fill = path["fill"] or "none" + group = groups.get(fill) + if group is None: + group = groups[fill] = {"max_idx": idx, "subs": []} + group["max_idx"] = idx + group["subs"].extend(_baked_subpaths(path)) + if len(groups) == len(paths): + return svg_str # no fill repeats — nothing to collapse + merged: list[dict] = [] + for fill, group in sorted(groups.items(), key=lambda kv: kv[1]["max_idx"]): + d = _emit_d(group["subs"], precision) + if d: + merged.append({"fill": None if fill == "none" else fill, "d": d, "transform": ""}) + return _build_svg(root, merged, group=False, precision=precision, merge_fill_runs=False) + + def svg_bbox(svg_str: str, samples: int = 18) -> tuple[float, float, float, float] | None: """Overall geometry bounding box ``(minx, miny, maxx, maxy)``, or None.""" root = ET.fromstring(svg_str) diff --git a/src/svgsmith/smooth.py b/src/svgsmith/smooth.py index 07b3fd9..a9f5831 100644 --- a/src/svgsmith/smooth.py +++ b/src/svgsmith/smooth.py @@ -381,6 +381,47 @@ def _len(value: str | None) -> float: return math.hypot(_len(root.get("width")), _len(root.get("height"))) or 1000.0 +def _viewbox_long_edge(root: ET.Element) -> float: + vb = root.get("viewBox") + if vb: + parts = [float(v) for v in vb.replace(",", " ").split()] + if len(parts) == 4: + return max(parts[2], parts[3]) + return 0.0 + + +def _resolution_precision( + root: ET.Element, native_long_edge: int, precision: int | None +) -> int: + """Choose lossless output decimals for the (possibly supersampled) viewBox. + + The supersample factor is ``viewBox_long_edge / native_long_edge``, so 1 viewBox unit + spans ``1/factor`` of a *native* pixel. A coordinate rounded to ``10**-d`` viewBox units + therefore moves at most ``10**-d / factor`` native pixels; keeping that step ≤ ~½ a + native pixel is visually lossless. Solving ``10**-d / factor <= 0.5`` gives + ``d >= log10(2 / factor)``, so the smallest lossless decimal count is + ``max(0, ceil(log10(2 / factor)))`` — for the 2-4× supersample factors this is 0, but + we clamp to ``min(base, …)`` so the lever only ever *removes* decimals (saves bytes), + never adds them. Empirically ``.1f`` is exactly lossless at 2.4-3× and ``.0f`` costs + ≤0.0005 SSIM; we keep one guard decimal over the theoretical floor by clamping to + ``base`` and letting the render-verified smooth gate catch any regression. + ``precision`` None starts from 2; an unknown native size returns ``precision``. + """ + base = 2 if precision is None else precision + if native_long_edge <= 0: + return base + long_edge = _viewbox_long_edge(root) + if long_edge <= native_long_edge: + return base + factor = long_edge / float(native_long_edge) + # Lossless floor is max(0, ceil(log10(2/factor))); we keep ONE guard decimal above it + # (so 2-4× lands .1f, the render-verified-lossless point, not the riskier .0f). + if factor <= 0: + return base + raw_floor = int(math.ceil(math.log10(2.0 / factor))) # may be negative at high factor + return max(0, min(base, raw_floor + 1)) + + def smooth_svg( svg: str, *, @@ -393,16 +434,27 @@ def smooth_svg( snap_deg: float = 10.0, max_drift_ratio: float = 0.012, samples: int = 6, - precision: int = 2, + precision: int | None = 2, + native_long_edge: int = 0, ) -> str: """Return ``svg`` with every path's geometry curve-refit for smooth, sparse Béziers. ``tol_ratio`` / ``min_perim_ratio`` / ``straight_tol_ratio`` are fractions of the viewBox diagonal, so behavior is independent of canvas size. ``tol_ratio`` is the Bézier fit error budget — larger means fewer, smoother curves. + + Coordinate precision is *resolution-aware* (the lossless supersample byte lever). + ``precision`` decimals are emitted at native scale, but when the SVG was traced on a + supersampled mask its viewBox is N× the native pixel grid, so two ``.2f`` decimals + there resolve ~Nx finer than the original pixels ever could — pure byte bloat. When + ``native_long_edge`` is known we drop the decimal count by ``log10(supersample factor)`` + so the absolute coordinate granularity tracks the *native* grid, not the inflated one + (e.g. a 640px image traced at 2048 ≈ 3.2×, so ``.2f`` → ``.1f`` losslessly). Pass + ``precision=None`` to auto-pick purely from the factor. """ root = ET.fromstring(svg) diag = _diagonal(root) + precision = _resolution_precision(root, native_long_edge, precision) tol = (tol_ratio * diag) ** 2 # _max_error works in squared distance min_perim = min_perim_ratio * diag straight_tol = straight_tol_ratio * diag diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py index 0c7c5a7..bce177b 100644 --- a/tests/test_pipeline.py +++ b/tests/test_pipeline.py @@ -322,19 +322,29 @@ def test_lossy_denoise_is_gated_to_lossy_sources(tmp_path): assert np.array_equal(np.array(on), np.array(off)) # clean PNG untouched by the gate -def test_illustration_geometry_knobs_are_opt_in(tmp_path): - """Phase 0: ``illustration_supersample`` / ``illustration_dark_thin`` are OFF by default - (output unchanged) and only take effect when explicitly set on the illustration class.""" +def test_supersample_is_auto_for_flat_low_res_and_gated(tmp_path): + """Phase 3: the flat low-res illustration class auto-supersamples (crisp lines, off the + native pixel staircase) while large/already-high-res inputs stay native; ``--hires`` + forces it; the illustration knobs still validate.""" src = tmp_path / "c.png" - _gradient_with_black_bars().save(src) # low-res, rich-colour, mid-edge = illustration signature + _gradient_with_black_bars().save(src) # the _supersample_candidate flat-low-res signature + # AUTO: a flat low-res input now traces supersampled by default (viewBox > native 300). base, _ = convert(str(src), ConvertOptions(max_iters=1)) base_vb = max(float(v) for v in ET.fromstring(base).get("viewBox").split()) - assert base_vb == 300 # default: native resolution, no supersample - - sup, _ = convert(str(src), ConvertOptions(max_iters=1, illustration_supersample=2048)) - sup_vb = max(float(v) for v in ET.fromstring(sup).get("viewBox").split()) - assert sup_vb > 300 # the supersample knob traces at a larger internal resolution + assert base_vb > 300 + + # A large input is NOT a supersample candidate → stays native (no auto-supersample). + big = tmp_path / "big.png" + _gradient_with_black_bars(size=1100).save(big) + big_svg, _ = convert(str(big), ConvertOptions(max_iters=1)) + big_vb = max(float(v) for v in ET.fromstring(big_svg).get("viewBox").split()) + assert big_vb == 1100 + + # --hires FORCES supersample even on the large input. + forced, _ = convert(str(big), ConvertOptions(max_iters=1, hires=True)) + forced_vb = max(float(v) for v in ET.fromstring(forced).get("viewBox").split()) + assert forced_vb > 1100 # negative knob values fail fast with pytest.raises(ValueError): diff --git a/tests/test_postprocess.py b/tests/test_postprocess.py index 1a6b7ad..8f602dd 100644 --- a/tests/test_postprocess.py +++ b/tests/test_postprocess.py @@ -247,3 +247,34 @@ def test_snap_dark_fills_collapses_near_black_to_one_layer(): assert "#0a0a0a" not in fills and "#050505" not in fills # both darks merged # de<=0 disables the pass entirely (byte-identical) assert snap_dark_fills(svg, de=0.0) == svg + + +def test_global_same_fill_merge_collapses_fragmented_fills(): + """Phase 3 flat-economy: same-fill fragments hoist into one per fill (fewer + paths, same pixels); a non-repeating palette is returned unchanged (nothing to merge).""" + from svgsmith.postprocess import global_same_fill_merge + + svg = ( + f'' + '' + '' # same fill, fragmented + '' + "" + ) + out = global_same_fill_merge(svg) + root = ET.fromstring(out) + paths = [p for p in root.iter(f"{{{SVG_NS}}}path")] + fills = [p.get("fill") for p in paths] + assert len(paths) == 2 # the two reds collapsed into one, green stays + assert fills.count("#ff0000") == 1 and "#00ff00" in fills + red = next(p for p in paths if p.get("fill") == "#ff0000") + assert red.get("d").count("M") == 2 # both red subpaths preserved in the merged path + + # All-unique palette: nothing to merge → returned unchanged. + uniq = ( + f'' + '' + '' + "" + ) + assert global_same_fill_merge(uniq) == uniq diff --git a/tests/test_smooth.py b/tests/test_smooth.py index eecaa68..d39e9b0 100644 --- a/tests/test_smooth.py +++ b/tests/test_smooth.py @@ -1,10 +1,49 @@ """Tests for the curve-smoothing post-pass (Schneider fit + axis-snap).""" import math +import xml.etree.ElementTree as ET import numpy as np -from svgsmith.smooth import _axis_snap +from svgsmith.smooth import _axis_snap, _resolution_precision, smooth_svg + + +def _root(view_box: str) -> ET.Element: + return ET.fromstring( + f'' + ) + + +def test_resolution_precision_drops_decimals_for_supersampled_viewbox(): + # 640px native traced at 1536 (factor 2.4): .2f resolves ~2.4x finer than the + # native pixel grid, so the (one-guard-decimal) lossless decimal count is .1f. + assert _resolution_precision(_root("0 0 1536 1536"), 640, 2) == 1 + # 1024px native at 4096 (factor 4): still .1f with the guard decimal kept. + assert _resolution_precision(_root("0 0 4096 4096"), 1024, 2) == 1 + # Extreme 20x supersample (tiny pixel art): safe to drop to integer coords. + assert _resolution_precision(_root("0 0 2000 2000"), 100, 2) == 0 + + +def test_resolution_precision_noops_without_native_or_supersample(): + # Unknown native size -> keep the requested precision untouched. + assert _resolution_precision(_root("0 0 1536 1536"), 0, 2) == 2 + # Native-size trace (no supersample) -> no decimals dropped. + assert _resolution_precision(_root("0 0 640 640"), 640, 2) == 2 + + +def test_resolution_precision_never_increases_precision(): + # The lever only ever saves bytes: it must not raise precision above the request. + assert _resolution_precision(_root("0 0 100 100"), 1000, 1) == 1 + + +def test_smooth_svg_native_long_edge_reduces_coordinate_decimals(): + svg = ( + '' + '' + ) + out = smooth_svg(svg, native_long_edge=640) + # No coordinate should carry two decimals once auto-precision picks .1f. + assert ".12" not in out and ".34" not in out def test_axis_snap_snaps_near_horizontal_and_keeps_length():