diff --git a/.changeset/fix-taxonomy-term-counts.md b/.changeset/fix-taxonomy-term-counts.md new file mode 100644 index 000000000..12b02917e --- /dev/null +++ b/.changeset/fix-taxonomy-term-counts.md @@ -0,0 +1,5 @@ +--- +"emdash": patch +--- + +Fixes taxonomy term counts (e.g. the Categories sidebar widget) so they only include published, non-deleted entries. Previously drafts and soft-deleted entries inflated the counts. diff --git a/packages/core/src/api/handlers/content.ts b/packages/core/src/api/handlers/content.ts index 50f5dfa25..63b93f9c4 100644 --- a/packages/core/src/api/handlers/content.ts +++ b/packages/core/src/api/handlers/content.ts @@ -24,6 +24,7 @@ import type { Database } from "../../database/types.js"; import { validateIdentifier } from "../../database/validate.js"; import { isI18nEnabled } from "../../i18n/config.js"; import { invalidateRedirectCache } from "../../redirects/cache.js"; +import { invalidateTermCache } from "../../taxonomies/index.js"; import { isMissingTableError } from "../../utils/db-errors.js"; import { encodeRev, validateRev } from "../rev.js"; import type { ApiResult, ContentListResponse, ContentResponse } from "../types.js"; @@ -832,6 +833,9 @@ export async function handleContentDelete( }; } + // Trashed entries are excluded from public counts (#581). + invalidateTermCache(); + return { success: true, data: { deleted: true }, @@ -873,6 +877,9 @@ export async function handleContentRestore( }; } + // Restored entry may now appear in public counts again (#581). + invalidateTermCache(); + return { success: true, data: { restored: true }, @@ -932,6 +939,10 @@ export async function handleContentPermanentDelete( }; } + // Row is gone (and so are its content_taxonomies via FK), drop + // any cached counts that may have included it (#581). + invalidateTermCache(); + return { success: true, data: { deleted: true }, @@ -1133,6 +1144,10 @@ export async function handleContentPublish( return repo.publish(collection, resolvedId, options.publishedAt); }); + // Drop the worker-lifetime term-counts cache so taxonomy widgets + // reflect the new published entry on the next read (#581). + invalidateTermCache(); + const hasSeo = await collectionHasSeo(db, collection); await hydrateSeo(db, collection, item, hasSeo); @@ -1179,6 +1194,10 @@ export async function handleContentUnpublish( return repo.unpublish(collection, resolvedId); }); + // Drop the worker-lifetime term-counts cache so taxonomy widgets + // stop counting the entry on the next read (#581). + invalidateTermCache(); + const hasSeo = await collectionHasSeo(db, collection); await hydrateSeo(db, collection, item, hasSeo); diff --git a/packages/core/src/taxonomies/index.ts b/packages/core/src/taxonomies/index.ts index 93cd7605c..5502d6d72 100644 --- a/packages/core/src/taxonomies/index.ts +++ b/packages/core/src/taxonomies/index.ts @@ -4,6 +4,10 @@ * Provides functions to query taxonomy definitions and terms. */ +import type { Kysely } from "kysely"; +import { sql } from "kysely"; + +import { validateIdentifier } from "../database/validate.js"; import { getDb } from "../loader.js"; import { peekRequestCache, requestCached, setRequestCacheEntry } from "../request-cache.js"; import { chunks, SQL_BATCH_SIZE } from "../utils/chunks.js"; @@ -11,16 +15,56 @@ import { isMissingTableError } from "../utils/db-errors.js"; import type { TaxonomyDef, TaxonomyTerm, TaxonomyTermRow } from "./types.js"; /** - * No-op — kept for API compatibility. + * Worker-lifetime cache for the per-taxonomy term-counts map computed by + * `countTermAssignmentsForTaxonomy`. Lives on `globalThis` so module + * duplication during SSR bundling can't fragment the cache (same pattern + * as `request-context.ts` and `secrets.ts`). + * + * Counts change when: + * - an entry's visibility flips (publish, unpublish, soft delete, + * restore, permanent delete) + * - a term assignment is added, removed, or replaced (e.g. + * `setTermsForEntry`, `attachToEntry`, `detachFromEntry`, + * `clearEntryTerms`) + * - a taxonomy term itself is created, renamed, or deleted * - * Used to invalidate a worker-lifetime "has any term assignments?" probe. - * That probe added a query on every cold isolate to save one query on - * sites with zero term assignments (i.e. the wrong tradeoff), so we - * dropped it. The batch term join below returns an empty map for empty - * sites at the same cost as the probe, without the pre-check. + * Call `invalidateTermCache()` from any of those write paths to drop the + * cache. Any new write path that affects either side of the join must + * call it too. + */ +const TERM_COUNTS_CACHE_KEY = Symbol.for("@emdash-cms/core/term-counts-cache@1"); + +interface TermCountsHolder { + cache: WeakMap>>>; +} + +function getTermCountsHolder(): TermCountsHolder { + // eslint-disable-next-line typescript-eslint(no-unsafe-type-assertion) -- globalThis singleton pattern + const holder = globalThis as Record; + let entry = holder[TERM_COUNTS_CACHE_KEY]; + if (!entry) { + entry = { cache: new WeakMap() }; + holder[TERM_COUNTS_CACHE_KEY] = entry; + } + return entry; +} + +/** + * Invalidate the worker-lifetime term-counts cache. + * + * Called from any write path that can change either side of the count + * join — entry visibility (`content:publish`/`unpublish`, soft delete, + * restore, permanent delete) and term assignment changes + * (`setTermsForEntry`, `attachToEntry`, `detachFromEntry`, + * `clearEntryTerms`, term create/update/delete). The next read + * repopulates the cache lazily. */ export function invalidateTermCache(): void { - // Intentionally empty. + // eslint-disable-next-line typescript-eslint(no-unsafe-type-assertion) -- globalThis singleton pattern + const holder = globalThis as Record; + if (holder[TERM_COUNTS_CACHE_KEY]) { + holder[TERM_COUNTS_CACHE_KEY] = { cache: new WeakMap() }; + } } /** @@ -98,18 +142,11 @@ export async function getTaxonomyTerms(taxonomyName: string): Promise eb.fn.count("entry_id").as("count")) - .groupBy("taxonomy_id") - .execute(); - - const counts = new Map(); - for (const row of countsResult) { - counts.set(row.taxonomy_id, row.count); - } + // Count entries for each term — only count published, non-deleted + // entries. Without joining the content table, drafts and soft- + // deleted rows inflated category/tag counts in widgets (#581). + // Worker-lifetime cached: writes call `invalidateTermCache()`. + const counts = await getCachedTermCounts(db, taxonomyName, def.collections); const flatTerms: TaxonomyTermRow[] = rows.map((row) => ({ id: row.id, @@ -151,14 +188,20 @@ export async function getTerm(taxonomyName: string, slug: string): Promise eb.fn.count("entry_id").as("count")) - .where("taxonomy_id", "=", row.id) - .executeTakeFirst(); - - const count = countResult?.count ?? 0; + // Get entry count — restricted to published, non-deleted entries (#581). + // Deliberately reads the full per-taxonomy counts map from the same + // worker-lifetime cache as `getTaxonomyTerms` rather than running a + // narrower single-term query: a category/tag page typically renders + // the widget that calls `getTaxonomyTerms` alongside `getTerm`, so + // the shared cache turns the second call into a Map lookup with no + // extra round-trip. A separate `WHERE taxonomy_id = ?` path would + // either bypass the cache (extra query on every render) or fragment + // it (separate cache keys for full vs. single-term reads). + const def = await getTaxonomyDef(taxonomyName); + const counts = def + ? await getCachedTermCounts(db, taxonomyName, def.collections) + : new Map(); + const count = counts.get(row.id) ?? 0; // Get children if hierarchical const childRows = await db @@ -485,6 +528,127 @@ export async function getEntriesByTerm( return entries; } +/** + * Count `content_taxonomies` rows whose entry is currently published + * and not soft-deleted, grouped by taxonomy_id, scoped to a single + * taxonomy across every collection it applies to. + * + * The taxonomy → collection mapping comes from + * `_emdash_taxonomy_defs.collections`; assignments to collections not + * declared there are excluded (treated as drift). Each per-collection + * subquery joins `taxonomies` and filters by `t.name = ${taxonomyName}` + * so we don't aggregate counts for unrelated taxonomies that happen to + * live in the same collection. + * + * Cold path: one `UNION ALL` round-trip combines per-collection joins + * so a `tags` taxonomy spanning `posts` + `products` still costs one + * query, not N. Pre-migration databases (or drift where a referenced + * `ec_*` table is gone) fall back to a per-collection loop with + * `isMissingTableError` recovery. + */ +// eslint-disable-next-line @typescript-eslint/no-explicit-any -- dialect-agnostic +async function countTermAssignmentsForTaxonomy( + db: Kysely, + taxonomyName: string, + collections: string[], +): Promise> { + const counts = new Map(); + + // Dedupe: `_emdash_taxonomy_defs.collections` is JSON, so a seed or + // migration could yield duplicate entries. Without dedupe the + // `UNION ALL` would count the same collection twice. + const safeCollections = [...new Set(collections)].filter((collection) => { + try { + validateIdentifier(collection, "collection"); + return true; + } catch { + // Defense-in-depth: skip identifiers that wouldn't be safe to + // interpolate even though `_emdash_collections` validates on insert. + return false; + } + }); + + if (safeCollections.length === 0) return counts; + + const buildPart = (collection: string) => sql` + SELECT ct.taxonomy_id, COUNT(*) AS count + FROM content_taxonomies ct + INNER JOIN taxonomies t + ON t.id = ct.taxonomy_id + INNER JOIN ${sql.ref(`ec_${collection}`)} e + ON e.id = ct.entry_id + WHERE ct.collection = ${collection} + AND t.name = ${taxonomyName} + AND e.status = 'published' + AND e.deleted_at IS NULL + GROUP BY ct.taxonomy_id + `; + + try { + const unionParts = safeCollections.map(buildPart); + const result = await sql<{ taxonomy_id: string; count: number | string }>` + SELECT taxonomy_id, SUM(count) AS count + FROM (${sql.join(unionParts, sql` UNION ALL `)}) AS sub + GROUP BY taxonomy_id + `.execute(db); + for (const row of result.rows) { + counts.set(row.taxonomy_id, Number(row.count)); + } + return counts; + } catch (error) { + if (!isMissingTableError(error)) throw error; + // One or more `ec_{collection}` tables are missing (pre-migration + // install or drift). Fall back to per-collection so we can skip the + // missing ones without losing data from the rest. + } + + for (const collection of safeCollections) { + try { + const result = await sql<{ taxonomy_id: string; count: number | string }>` + ${buildPart(collection)} + `.execute(db); + for (const row of result.rows) { + const prev = counts.get(row.taxonomy_id) ?? 0; + counts.set(row.taxonomy_id, prev + Number(row.count)); + } + } catch (error) { + if (isMissingTableError(error)) continue; + throw error; + } + } + return counts; +} + +/** + * Worker-lifetime cached wrapper around `countTermAssignmentsForTaxonomy`. + * + * Keyed by `(db, taxonomyName)` so playground / per-DO / per-test databases + * each maintain their own counts. Caches the in-flight promise so concurrent + * callers share one query; on error the cache entry is dropped so the next + * caller retries. + */ +// eslint-disable-next-line @typescript-eslint/no-explicit-any -- dialect-agnostic +function getCachedTermCounts( + db: Kysely, + taxonomyName: string, + collections: string[], +): Promise> { + const { cache } = getTermCountsHolder(); + let perDb = cache.get(db); + if (!perDb) { + perDb = new Map(); + cache.set(db, perDb); + } + const cached = perDb.get(taxonomyName); + if (cached) return cached; + const promise = countTermAssignmentsForTaxonomy(db, taxonomyName, collections).catch((error) => { + perDb.delete(taxonomyName); + throw error; + }); + perDb.set(taxonomyName, promise); + return promise; +} + /** * Build tree structure from flat terms */ diff --git a/packages/core/tests/unit/taxonomies/term-counts.test.ts b/packages/core/tests/unit/taxonomies/term-counts.test.ts new file mode 100644 index 000000000..56f583c16 --- /dev/null +++ b/packages/core/tests/unit/taxonomies/term-counts.test.ts @@ -0,0 +1,194 @@ +import type { Kysely } from "kysely"; +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; + +import { ContentRepository } from "../../../src/database/repositories/content.js"; +import { TaxonomyRepository } from "../../../src/database/repositories/taxonomy.js"; +import type { Database } from "../../../src/database/types.js"; +import { setupTestDatabaseWithCollections, teardownTestDatabase } from "../../utils/test-db.js"; + +vi.mock("../../../src/loader.js", () => ({ + getDb: vi.fn(), +})); + +import { getDb } from "../../../src/loader.js"; +import { getTaxonomyTerms, getTerm, invalidateTermCache } from "../../../src/taxonomies/index.js"; + +describe("taxonomy term counts (#581)", () => { + let db: Kysely; + let taxRepo: TaxonomyRepository; + let contentRepo: ContentRepository; + + beforeEach(async () => { + db = await setupTestDatabaseWithCollections(); + taxRepo = new TaxonomyRepository(db); + contentRepo = new ContentRepository(db); + vi.mocked(getDb).mockResolvedValue(db); + invalidateTermCache(); + + // The setupTestDatabaseWithCollections helper creates a singular + // `post` collection while the seeded `category` taxonomy points at + // `posts` (plural). Re-target it so attachments to the test + // collection are counted. + await db + .updateTable("_emdash_taxonomy_defs") + .set({ collections: JSON.stringify(["post"]) }) + .where("name", "=", "category") + .execute(); + }); + + afterEach(async () => { + invalidateTermCache(); + await teardownTestDatabase(db); + vi.restoreAllMocks(); + }); + + it("excludes draft entries from getTaxonomyTerms counts", async () => { + const term = await taxRepo.create({ name: "category", slug: "tech", label: "Tech" }); + + const published = await contentRepo.create({ + type: "post", + slug: "p1", + data: { title: "P1" }, + status: "published", + }); + const draft = await contentRepo.create({ + type: "post", + slug: "p2", + data: { title: "P2" }, + status: "draft", + }); + + await taxRepo.attachToEntry("post", published.id, term.id); + await taxRepo.attachToEntry("post", draft.id, term.id); + + const terms = await getTaxonomyTerms("category"); + const tech = terms.find((t) => t.slug === "tech"); + expect(tech?.count).toBe(1); + }); + + it("excludes draft entries from getTerm count", async () => { + const term = await taxRepo.create({ name: "category", slug: "tech", label: "Tech" }); + + const published = await contentRepo.create({ + type: "post", + slug: "p1", + data: { title: "P1" }, + status: "published", + }); + const draft = await contentRepo.create({ + type: "post", + slug: "p2", + data: { title: "P2" }, + status: "draft", + }); + + await taxRepo.attachToEntry("post", published.id, term.id); + await taxRepo.attachToEntry("post", draft.id, term.id); + + const single = await getTerm("category", "tech"); + expect(single?.count).toBe(1); + }); + + it("excludes soft-deleted entries from term counts", async () => { + const term = await taxRepo.create({ name: "category", slug: "news", label: "News" }); + + const live = await contentRepo.create({ + type: "post", + slug: "p1", + data: { title: "P1" }, + status: "published", + }); + const trashed = await contentRepo.create({ + type: "post", + slug: "p2", + data: { title: "P2" }, + status: "published", + }); + + await taxRepo.attachToEntry("post", live.id, term.id); + await taxRepo.attachToEntry("post", trashed.id, term.id); + + await contentRepo.delete("post", trashed.id); + + const terms = await getTaxonomyTerms("category"); + expect(terms.find((t) => t.slug === "news")?.count).toBe(1); + }); + + it("returns zero count when only drafts are attached", async () => { + const term = await taxRepo.create({ name: "category", slug: "empty", label: "Empty" }); + const draft = await contentRepo.create({ + type: "post", + slug: "d", + data: { title: "D" }, + status: "draft", + }); + await taxRepo.attachToEntry("post", draft.id, term.id); + + const terms = await getTaxonomyTerms("category"); + expect(terms.find((t) => t.slug === "empty")?.count).toBe(0); + }); + + it("worker-lifetime cache serves stale counts until invalidateTermCache", async () => { + const term = await taxRepo.create({ name: "category", slug: "tech", label: "Tech" }); + const first = await contentRepo.create({ + type: "post", + slug: "p1", + data: { title: "P1" }, + status: "published", + }); + await taxRepo.attachToEntry("post", first.id, term.id); + + // First read populates the cache. + const initial = await getTaxonomyTerms("category"); + expect(initial.find((t) => t.slug === "tech")?.count).toBe(1); + + // Add another published assignment without invalidating — the + // cache should still serve the prior count. + const second = await contentRepo.create({ + type: "post", + slug: "p2", + data: { title: "P2" }, + status: "published", + }); + await taxRepo.attachToEntry("post", second.id, term.id); + + const stale = await getTaxonomyTerms("category"); + expect(stale.find((t) => t.slug === "tech")?.count).toBe(1); + + // After invalidation the next read recomputes. + invalidateTermCache(); + const fresh = await getTaxonomyTerms("category"); + expect(fresh.find((t) => t.slug === "tech")?.count).toBe(2); + }); + + it("aggregates counts across every collection a taxonomy applies to", async () => { + // Re-target `category` to span both `post` and `page` (both tables + // exist via setupTestDatabaseWithCollections). + await db + .updateTable("_emdash_taxonomy_defs") + .set({ collections: JSON.stringify(["post", "page"]) }) + .where("name", "=", "category") + .execute(); + + const term = await taxRepo.create({ name: "category", slug: "shared", label: "Shared" }); + + const post = await contentRepo.create({ + type: "post", + slug: "p", + data: { title: "P" }, + status: "published", + }); + const page = await contentRepo.create({ + type: "page", + slug: "g", + data: { title: "G" }, + status: "published", + }); + await taxRepo.attachToEntry("post", post.id, term.id); + await taxRepo.attachToEntry("page", page.id, term.id); + + invalidateTermCache(); + const terms = await getTaxonomyTerms("category"); + expect(terms.find((t) => t.slug === "shared")?.count).toBe(2); + }); +}); diff --git a/scripts/query-counts.snapshot.d1.json b/scripts/query-counts.snapshot.d1.json index 621ce0f79..45b18238b 100644 --- a/scripts/query-counts.snapshot.d1.json +++ b/scripts/query-counts.snapshot.d1.json @@ -1,8 +1,8 @@ { "GET / (cold)": 21, "GET / (warm)": 11, - "GET /category/development (cold)": 25, - "GET /category/development (warm)": 14, + "GET /category/development (cold)": 26, + "GET /category/development (warm)": 15, "GET /pages/about (cold)": 20, "GET /pages/about (warm)": 10, "GET /posts (cold)": 21, @@ -13,6 +13,6 @@ "GET /rss.xml (warm)": 4, "GET /search (cold)": 22, "GET /search (warm)": 12, - "GET /tag/webdev (cold)": 25, - "GET /tag/webdev (warm)": 14 + "GET /tag/webdev (cold)": 26, + "GET /tag/webdev (warm)": 15 } diff --git a/scripts/query-counts.snapshot.sqlite.json b/scripts/query-counts.snapshot.sqlite.json index b1e8dd4b0..2d3ba4b49 100644 --- a/scripts/query-counts.snapshot.sqlite.json +++ b/scripts/query-counts.snapshot.sqlite.json @@ -1,18 +1,18 @@ { "GET / (cold)": 11, "GET / (warm)": 11, - "GET /category/development (cold)": 15, + "GET /category/development (cold)": 16, "GET /category/development (warm)": 14, "GET /pages/about (cold)": 10, "GET /pages/about (warm)": 10, "GET /posts (cold)": 11, "GET /posts (warm)": 11, "GET /posts/building-for-the-long-term (cold)": 23, - "GET /posts/building-for-the-long-term (warm)": 23, + "GET /posts/building-for-the-long-term (warm)": 21, "GET /rss.xml (cold)": 4, "GET /rss.xml (warm)": 4, "GET /search (cold)": 12, "GET /search (warm)": 12, - "GET /tag/webdev (cold)": 14, + "GET /tag/webdev (cold)": 15, "GET /tag/webdev (warm)": 14 }