From 03b12bac52dc6bce4a85546933e3cc760434b176 Mon Sep 17 00:00:00 2001 From: Mason James Date: Sun, 3 May 2026 22:22:20 -0400 Subject: [PATCH 1/2] Fix WordPress import image variant URL rewrites --- .changeset/wp-import-size-variant-urls.md | 5 + .../import/wordpress/rewrite-url-helpers.ts | 196 ++++++++++++++++++ .../api/import/wordpress/rewrite-urls.ts | 185 +---------------- .../import/wordpress-rewrite-urls.test.ts | 100 +++++++++ 4 files changed, 309 insertions(+), 177 deletions(-) create mode 100644 .changeset/wp-import-size-variant-urls.md create mode 100644 packages/core/src/astro/routes/api/import/wordpress/rewrite-url-helpers.ts create mode 100644 packages/core/tests/unit/import/wordpress-rewrite-urls.test.ts diff --git a/.changeset/wp-import-size-variant-urls.md b/.changeset/wp-import-size-variant-urls.md new file mode 100644 index 000000000..5e2baeed0 --- /dev/null +++ b/.changeset/wp-import-size-variant-urls.md @@ -0,0 +1,5 @@ +--- +"emdash": patch +--- + +Fixes WordPress media URL rewriting for imported image URLs that use generated size suffixes. diff --git a/packages/core/src/astro/routes/api/import/wordpress/rewrite-url-helpers.ts b/packages/core/src/astro/routes/api/import/wordpress/rewrite-url-helpers.ts new file mode 100644 index 000000000..a901ce253 --- /dev/null +++ b/packages/core/src/astro/routes/api/import/wordpress/rewrite-url-helpers.ts @@ -0,0 +1,196 @@ +const REGEX_SPECIAL_CHARS = /[.*+?^${}()|[\]\\]/g; +const WORDPRESS_IMAGE_SIZE_SUFFIX = /-\d+x\d+(?=\.[^./?#]+$)/; +const BASE_URL_EXTENSION = /^(.+)(\.[^./?#]+)$/; + +/** + * Strip query parameters from a URL for base matching + */ +export function getBaseUrl(url: string): string { + try { + const parsed = new URL(url); + return `${parsed.origin}${parsed.pathname}`; + } catch { + // If URL parsing fails, try simple string split + return url.split("?")[0] || url; + } +} + +/** + * Build a map of base URLs to new URLs for flexible matching + */ +export function buildBaseUrlMap(urlMap: Record): Map { + const baseMap = new Map(); + for (const [oldUrl, newUrl] of Object.entries(urlMap)) { + const baseUrl = getBaseUrl(oldUrl); + baseMap.set(baseUrl, newUrl); + } + return baseMap; +} + +/** + * Find matching new URL for a given URL, checking exact, base, and WordPress image-size matches + */ +export function findMatchingUrl( + url: string, + exactMap: Record, + baseMap: Map, +): string | null { + if (exactMap[url]) { + return exactMap[url]; + } + + const baseUrl = getBaseUrl(url); + const baseMatch = baseMap.get(baseUrl); + if (baseMatch) { + return baseMatch; + } + + const wordPressImageMatch = baseMap.get(stripWordPressImageSizeSuffix(baseUrl)); + if (wordPressImageMatch) { + return wordPressImageMatch; + } + + return null; +} + +/** + * Portable Text block type (simplified for URL rewriting) + */ +interface PortableTextBlock { + _type: string; + _key?: string; + asset?: { + _type?: string; + _ref?: string; + url?: string; + }; + link?: string; + // For nested content like galleries + images?: PortableTextBlock[]; + columns?: Array<{ content?: PortableTextBlock[] }>; + [key: string]: unknown; +} + +/** + * Rewrite URLs in a Portable Text array, returning whether any changes were made + */ +export function rewritePortableTextUrls( + blocks: PortableTextBlock[], + exactMap: Record, + baseMap: Map, +): { changed: boolean; urlsRewritten: number } { + let changed = false; + let urlsRewritten = 0; + + for (const block of blocks) { + // Handle image blocks + if (block._type === "image" && block.asset?.url) { + const newUrl = findMatchingUrl(block.asset.url, exactMap, baseMap); + if (newUrl) { + block.asset.url = newUrl; + block.asset._ref = newUrl; // Also update the reference + changed = true; + urlsRewritten++; + } + } + + // Handle image link URLs (for linked images) + if (block._type === "image" && block.link) { + const newUrl = findMatchingUrl(block.link, exactMap, baseMap); + if (newUrl) { + block.link = newUrl; + changed = true; + urlsRewritten++; + } + } + + // Handle gallery blocks with nested images + if (block._type === "gallery" && Array.isArray(block.images)) { + const result = rewritePortableTextUrls(block.images, exactMap, baseMap); + if (result.changed) { + changed = true; + urlsRewritten += result.urlsRewritten; + } + } + + // Handle columns blocks with nested content + if (block._type === "columns" && Array.isArray(block.columns)) { + for (const column of block.columns) { + if (Array.isArray(column.content)) { + const result = rewritePortableTextUrls(column.content, exactMap, baseMap); + if (result.changed) { + changed = true; + urlsRewritten += result.urlsRewritten; + } + } + } + } + } + + return { changed, urlsRewritten }; +} + +/** + * Rewrite URLs in a string field using simple string replacement + */ +export function rewriteStringUrls( + value: string, + exactMap: Record, + baseMap: Map, +): { newValue: string; changed: boolean; urlsRewritten: number } { + let newValue = value; + let changed = false; + let urlsRewritten = 0; + + // Try exact matches first + for (const [oldUrl, newUrl] of Object.entries(exactMap)) { + if (newValue.includes(oldUrl)) { + newValue = newValue.split(oldUrl).join(newUrl); + changed = true; + urlsRewritten++; + } + } + + // For base URL matching in strings, we need to be more careful + // Only match if we find a URL that starts with the base + for (const [baseUrl, newUrl] of baseMap.entries()) { + // Look for the base URL followed by optional query string or end + const regex = buildBaseUrlMatchRegex(baseUrl); + const matches = newValue.match(regex); + if (matches) { + for (const match of matches) { + // Don't replace if we already have an exact match in the map + if (!exactMap[match]) { + newValue = newValue.split(match).join(newUrl); + changed = true; + urlsRewritten++; + } + } + } + } + + return { newValue, changed, urlsRewritten }; +} + +/** + * Escape special regex characters in a string + */ +function escapeRegExp(string: string): string { + return string.replace(REGEX_SPECIAL_CHARS, "\\$&"); +} + +function stripWordPressImageSizeSuffix(url: string): string { + return url.replace(WORDPRESS_IMAGE_SIZE_SUFFIX, ""); +} + +function buildBaseUrlMatchRegex(baseUrl: string): RegExp { + const extensionMatch = BASE_URL_EXTENSION.exec(baseUrl); + const basePattern = extensionMatch + ? `${escapeRegExp(extensionMatch[1])}(?:-\\d+x\\d+)?${escapeRegExp(extensionMatch[2])}` + : escapeRegExp(baseUrl); + + return new RegExp( + `${basePattern}(\\?[^"'\\s]*)?(?=$|["'\\s<>)\\],;:!?]|\\.(?=$|["'\\s<>)\\]]))`, + "g", + ); +} diff --git a/packages/core/src/astro/routes/api/import/wordpress/rewrite-urls.ts b/packages/core/src/astro/routes/api/import/wordpress/rewrite-urls.ts index 59fbd24da..5cba95e0c 100644 --- a/packages/core/src/astro/routes/api/import/wordpress/rewrite-urls.ts +++ b/packages/core/src/astro/routes/api/import/wordpress/rewrite-urls.ts @@ -22,9 +22,12 @@ import { normalizeMediaValue } from "#media/normalize.js"; import type { MediaProvider } from "#media/types.js"; import type { EmDashHandlers } from "#types"; -export const prerender = false; - -const REGEX_SPECIAL_CHARS = /[.*+?^${}()|[\]\\]/g; +import { + buildBaseUrlMap, + findMatchingUrl, + rewritePortableTextUrls, + rewriteStringUrls, +} from "./rewrite-url-helpers.js"; export interface RewriteUrlsResult { /** Total items updated */ @@ -37,6 +40,8 @@ export interface RewriteUrlsResult { errors: Array<{ collection: string; id: string; error: string }>; } +export const prerender = false; + export const POST: APIRoute = async ({ request, locals }) => { const { emdash, user } = locals; @@ -70,180 +75,6 @@ export const POST: APIRoute = async ({ request, locals }) => { } }; -/** - * Strip query parameters from a URL for base matching - */ -function getBaseUrl(url: string): string { - try { - const parsed = new URL(url); - return `${parsed.origin}${parsed.pathname}`; - } catch { - // If URL parsing fails, try simple string split - return url.split("?")[0] || url; - } -} - -/** - * Build a map of base URLs to new URLs for flexible matching - */ -function buildBaseUrlMap(urlMap: Record): Map { - const baseMap = new Map(); - for (const [oldUrl, newUrl] of Object.entries(urlMap)) { - const baseUrl = getBaseUrl(oldUrl); - baseMap.set(baseUrl, newUrl); - } - return baseMap; -} - -/** - * Find matching new URL for a given URL, checking both exact and base matches - */ -function findMatchingUrl( - url: string, - exactMap: Record, - baseMap: Map, -): string | null { - // Try exact match first - if (exactMap[url]) { - return exactMap[url]; - } - - // Try base URL match (ignoring query params) - const baseUrl = getBaseUrl(url); - const baseMatch = baseMap.get(baseUrl); - if (baseMatch) { - return baseMatch; - } - - return null; -} - -/** - * Portable Text block type (simplified for URL rewriting) - */ -interface PortableTextBlock { - _type: string; - _key?: string; - asset?: { - _type?: string; - _ref?: string; - url?: string; - }; - link?: string; - // For nested content like galleries - images?: PortableTextBlock[]; - columns?: Array<{ content?: PortableTextBlock[] }>; - [key: string]: unknown; -} - -/** - * Rewrite URLs in a Portable Text array, returning whether any changes were made - */ -function rewritePortableTextUrls( - blocks: PortableTextBlock[], - exactMap: Record, - baseMap: Map, -): { changed: boolean; urlsRewritten: number } { - let changed = false; - let urlsRewritten = 0; - - for (const block of blocks) { - // Handle image blocks - if (block._type === "image" && block.asset?.url) { - const newUrl = findMatchingUrl(block.asset.url, exactMap, baseMap); - if (newUrl) { - block.asset.url = newUrl; - block.asset._ref = newUrl; // Also update the reference - changed = true; - urlsRewritten++; - } - } - - // Handle image link URLs (for linked images) - if (block._type === "image" && block.link) { - const newUrl = findMatchingUrl(block.link, exactMap, baseMap); - if (newUrl) { - block.link = newUrl; - changed = true; - urlsRewritten++; - } - } - - // Handle gallery blocks with nested images - if (block._type === "gallery" && Array.isArray(block.images)) { - const result = rewritePortableTextUrls(block.images, exactMap, baseMap); - if (result.changed) { - changed = true; - urlsRewritten += result.urlsRewritten; - } - } - - // Handle columns blocks with nested content - if (block._type === "columns" && Array.isArray(block.columns)) { - for (const column of block.columns) { - if (Array.isArray(column.content)) { - const result = rewritePortableTextUrls(column.content, exactMap, baseMap); - if (result.changed) { - changed = true; - urlsRewritten += result.urlsRewritten; - } - } - } - } - } - - return { changed, urlsRewritten }; -} - -/** - * Rewrite URLs in a string field using simple string replacement - */ -function rewriteStringUrls( - value: string, - exactMap: Record, - baseMap: Map, -): { newValue: string; changed: boolean; urlsRewritten: number } { - let newValue = value; - let changed = false; - let urlsRewritten = 0; - - // Try exact matches first - for (const [oldUrl, newUrl] of Object.entries(exactMap)) { - if (newValue.includes(oldUrl)) { - newValue = newValue.split(oldUrl).join(newUrl); - changed = true; - urlsRewritten++; - } - } - - // For base URL matching in strings, we need to be more careful - // Only match if we find a URL that starts with the base - for (const [baseUrl, newUrl] of baseMap.entries()) { - // Look for the base URL followed by optional query string or end - const regex = new RegExp(escapeRegExp(baseUrl) + "(\\?[^\"'\\s]*)?", "g"); - const matches = newValue.match(regex); - if (matches) { - for (const match of matches) { - // Don't replace if we already have an exact match in the map - if (!exactMap[match]) { - newValue = newValue.split(match).join(newUrl); - changed = true; - urlsRewritten++; - } - } - } - } - - return { newValue, changed, urlsRewritten }; -} - -/** - * Escape special regex characters in a string - */ -function escapeRegExp(string: string): string { - return string.replace(REGEX_SPECIAL_CHARS, "\\$&"); -} - async function rewriteUrls( db: NonNullable, urlMap: Record, diff --git a/packages/core/tests/unit/import/wordpress-rewrite-urls.test.ts b/packages/core/tests/unit/import/wordpress-rewrite-urls.test.ts new file mode 100644 index 000000000..09b030812 --- /dev/null +++ b/packages/core/tests/unit/import/wordpress-rewrite-urls.test.ts @@ -0,0 +1,100 @@ +import { describe, expect, it } from "vitest"; + +import { + buildBaseUrlMap, + findMatchingUrl, + getBaseUrl, + rewritePortableTextUrls, + rewriteStringUrls, +} from "../../../src/astro/routes/api/import/wordpress/rewrite-url-helpers.js"; + +describe("WordPress import URL rewriting", () => { + const oldOriginalUrl = "https://example.com/wp-content/uploads/2026/01/hero.jpg"; + const oldVariantUrl = "https://example.com/wp-content/uploads/2026/01/hero-1024x695.jpg"; + const newUrl = "/_emdash/media/file/imported/hero.jpg"; + const urlMap = { [oldOriginalUrl]: newUrl }; + + it("strips query strings for base matching without changing filenames", () => { + expect(getBaseUrl(`${oldVariantUrl}?w=1024`)).toBe(oldVariantUrl); + }); + + it("matches Portable Text image asset URLs that use a WordPress size suffix", () => { + const baseMap = buildBaseUrlMap(urlMap); + const blocks = [ + { + _type: "image", + asset: { + _type: "reference", + _ref: oldVariantUrl, + url: oldVariantUrl, + }, + }, + ]; + + const result = rewritePortableTextUrls(blocks, urlMap, baseMap); + + expect(result).toEqual({ changed: true, urlsRewritten: 1 }); + expect(blocks[0]?.asset?.url).toBe(newUrl); + expect(blocks[0]?.asset?._ref).toBe(newUrl); + }); + + it("matches string URLs that use a WordPress size suffix", () => { + const baseMap = buildBaseUrlMap(urlMap); + const result = rewriteStringUrls( + `Hero`, + urlMap, + baseMap, + ); + + expect(result).toEqual({ + newValue: `Hero`, + changed: true, + urlsRewritten: 1, + }); + }); + + it("keeps exact matching for original attachment URLs", () => { + const baseMap = buildBaseUrlMap(urlMap); + + expect(findMatchingUrl(oldOriginalUrl, urlMap, baseMap)).toBe(newUrl); + }); + + it("preserves dimension-named original attachment URLs while matching their variants", () => { + const dimensionNamedOriginal = + "https://example.com/wp-content/uploads/2026/01/banner-300x250.jpg"; + const dimensionNamedVariant = + "https://example.com/wp-content/uploads/2026/01/banner-300x250-150x125.jpg"; + const importedUrl = "/_emdash/media/file/imported/banner-300x250.jpg"; + const exactMap = { [dimensionNamedOriginal]: importedUrl }; + const baseMap = buildBaseUrlMap(exactMap); + + expect(findMatchingUrl(dimensionNamedVariant, exactMap, baseMap)).toBe(importedUrl); + }); + + it("does not rewrite URL prefixes inside longer filenames", () => { + const baseMap = buildBaseUrlMap(urlMap); + const value = `Hero`; + + expect(rewriteStringUrls(value, urlMap, baseMap)).toEqual({ + newValue: value, + changed: false, + urlsRewritten: 0, + }); + }); + + it("rewrites bare variant URLs followed by prose punctuation", () => { + const baseMap = buildBaseUrlMap(urlMap); + + expect(rewriteStringUrls(`Image: ${oldVariantUrl}, next`, urlMap, baseMap)).toEqual({ + newValue: `Image: ${newUrl}, next`, + changed: true, + urlsRewritten: 1, + }); + + expect(rewriteStringUrls(`Image: ${oldVariantUrl}.`, urlMap, baseMap)).toEqual({ + newValue: `Image: ${newUrl}.`, + changed: true, + urlsRewritten: 1, + }); + }); +}); From 1f693d0527bdf3a8ec92f36b19ebf8f22b6cc17a Mon Sep 17 00:00:00 2001 From: Mason James Date: Mon, 4 May 2026 22:20:51 -0400 Subject: [PATCH 2/2] Address URL rewrite review comments --- .../api/import/wordpress/rewrite-url-helpers.ts | 2 +- .../astro/routes/api/import/wordpress/rewrite-urls.ts | 1 + .../tests/unit/import/wordpress-rewrite-urls.test.ts | 11 +++++++++++ 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/packages/core/src/astro/routes/api/import/wordpress/rewrite-url-helpers.ts b/packages/core/src/astro/routes/api/import/wordpress/rewrite-url-helpers.ts index a901ce253..06a139d76 100644 --- a/packages/core/src/astro/routes/api/import/wordpress/rewrite-url-helpers.ts +++ b/packages/core/src/astro/routes/api/import/wordpress/rewrite-url-helpers.ts @@ -56,7 +56,7 @@ export function findMatchingUrl( /** * Portable Text block type (simplified for URL rewriting) */ -interface PortableTextBlock { +export interface PortableTextBlock { _type: string; _key?: string; asset?: { diff --git a/packages/core/src/astro/routes/api/import/wordpress/rewrite-urls.ts b/packages/core/src/astro/routes/api/import/wordpress/rewrite-urls.ts index 5cba95e0c..eabd7b0a2 100644 --- a/packages/core/src/astro/routes/api/import/wordpress/rewrite-urls.ts +++ b/packages/core/src/astro/routes/api/import/wordpress/rewrite-urls.ts @@ -28,6 +28,7 @@ import { rewritePortableTextUrls, rewriteStringUrls, } from "./rewrite-url-helpers.js"; +import type { PortableTextBlock } from "./rewrite-url-helpers.js"; export interface RewriteUrlsResult { /** Total items updated */ diff --git a/packages/core/tests/unit/import/wordpress-rewrite-urls.test.ts b/packages/core/tests/unit/import/wordpress-rewrite-urls.test.ts index 09b030812..0ffcbb303 100644 --- a/packages/core/tests/unit/import/wordpress-rewrite-urls.test.ts +++ b/packages/core/tests/unit/import/wordpress-rewrite-urls.test.ts @@ -53,6 +53,17 @@ describe("WordPress import URL rewriting", () => { }); }); + it("matches unquoted image URLs followed by a closing tag delimiter", () => { + const baseMap = buildBaseUrlMap(urlMap); + const result = rewriteStringUrls(``, urlMap, baseMap); + + expect(result).toEqual({ + newValue: ``, + changed: true, + urlsRewritten: 1, + }); + }); + it("keeps exact matching for original attachment URLs", () => { const baseMap = buildBaseUrlMap(urlMap);