Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/fix-taxonomy-term-counts.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"emdash": patch
---

Fixes taxonomy term counts (e.g. the Categories sidebar widget) so they only include published, non-deleted entries. Previously drafts and soft-deleted entries inflated the counts.
19 changes: 19 additions & 0 deletions packages/core/src/api/handlers/content.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import type { Database } from "../../database/types.js";
import { validateIdentifier } from "../../database/validate.js";
import { isI18nEnabled } from "../../i18n/config.js";
import { invalidateRedirectCache } from "../../redirects/cache.js";
import { invalidateTermCache } from "../../taxonomies/index.js";
import { isMissingTableError } from "../../utils/db-errors.js";
import { encodeRev, validateRev } from "../rev.js";
import type { ApiResult, ContentListResponse, ContentResponse } from "../types.js";
Expand Down Expand Up @@ -832,6 +833,9 @@ export async function handleContentDelete(
};
}

// Trashed entries are excluded from public counts (#581).
invalidateTermCache();

return {
success: true,
data: { deleted: true },
Expand Down Expand Up @@ -873,6 +877,9 @@ export async function handleContentRestore(
};
}

// Restored entry may now appear in public counts again (#581).
invalidateTermCache();

return {
success: true,
data: { restored: true },
Expand Down Expand Up @@ -932,6 +939,10 @@ export async function handleContentPermanentDelete(
};
}

// Row is gone (and so are its content_taxonomies via FK), drop
// any cached counts that may have included it (#581).
invalidateTermCache();

return {
success: true,
data: { deleted: true },
Expand Down Expand Up @@ -1133,6 +1144,10 @@ export async function handleContentPublish(
return repo.publish(collection, resolvedId, options.publishedAt);
});

// Drop the worker-lifetime term-counts cache so taxonomy widgets
// reflect the new published entry on the next read (#581).
invalidateTermCache();

const hasSeo = await collectionHasSeo(db, collection);
await hydrateSeo(db, collection, item, hasSeo);

Expand Down Expand Up @@ -1179,6 +1194,10 @@ export async function handleContentUnpublish(
return repo.unpublish(collection, resolvedId);
});

// Drop the worker-lifetime term-counts cache so taxonomy widgets
// stop counting the entry on the next read (#581).
invalidateTermCache();

const hasSeo = await collectionHasSeo(db, collection);
await hydrateSeo(db, collection, item, hasSeo);

Expand Down
218 changes: 191 additions & 27 deletions packages/core/src/taxonomies/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,67 @@
* Provides functions to query taxonomy definitions and terms.
*/

import type { Kysely } from "kysely";
import { sql } from "kysely";

import { validateIdentifier } from "../database/validate.js";
import { getDb } from "../loader.js";
import { peekRequestCache, requestCached, setRequestCacheEntry } from "../request-cache.js";
import { chunks, SQL_BATCH_SIZE } from "../utils/chunks.js";
import { isMissingTableError } from "../utils/db-errors.js";
import type { TaxonomyDef, TaxonomyTerm, TaxonomyTermRow } from "./types.js";

/**
* No-op — kept for API compatibility.
* Worker-lifetime cache for the per-taxonomy term-counts map computed by
* `countTermAssignmentsForTaxonomy`. Lives on `globalThis` so module
* duplication during SSR bundling can't fragment the cache (same pattern
* as `request-context.ts` and `secrets.ts`).
*
* Counts change when:
* - an entry's visibility flips (publish, unpublish, soft delete,
* restore, permanent delete)
* - a term assignment is added, removed, or replaced (e.g.
* `setTermsForEntry`, `attachToEntry`, `detachFromEntry`,
* `clearEntryTerms`)
* - a taxonomy term itself is created, renamed, or deleted
*
* Used to invalidate a worker-lifetime "has any term assignments?" probe.
* That probe added a query on every cold isolate to save one query on
* sites with zero term assignments (i.e. the wrong tradeoff), so we
* dropped it. The batch term join below returns an empty map for empty
* sites at the same cost as the probe, without the pre-check.
* Call `invalidateTermCache()` from any of those write paths to drop the
* cache. Any new write path that affects either side of the join must
* call it too.
*/
const TERM_COUNTS_CACHE_KEY = Symbol.for("@emdash-cms/core/term-counts-cache@1");

interface TermCountsHolder {
cache: WeakMap<object, Map<string, Promise<Map<string, number>>>>;
}

function getTermCountsHolder(): TermCountsHolder {
// eslint-disable-next-line typescript-eslint(no-unsafe-type-assertion) -- globalThis singleton pattern
const holder = globalThis as Record<symbol, TermCountsHolder | undefined>;
let entry = holder[TERM_COUNTS_CACHE_KEY];
if (!entry) {
entry = { cache: new WeakMap() };
holder[TERM_COUNTS_CACHE_KEY] = entry;
}
return entry;
}

/**
* Invalidate the worker-lifetime term-counts cache.
*
* Called from any write path that can change either side of the count
* join — entry visibility (`content:publish`/`unpublish`, soft delete,
* restore, permanent delete) and term assignment changes
* (`setTermsForEntry`, `attachToEntry`, `detachFromEntry`,
* `clearEntryTerms`, term create/update/delete). The next read
* repopulates the cache lazily.
*/
export function invalidateTermCache(): void {
// Intentionally empty.
// eslint-disable-next-line typescript-eslint(no-unsafe-type-assertion) -- globalThis singleton pattern
const holder = globalThis as Record<symbol, TermCountsHolder | undefined>;
if (holder[TERM_COUNTS_CACHE_KEY]) {
holder[TERM_COUNTS_CACHE_KEY] = { cache: new WeakMap() };
}
}

/**
Expand Down Expand Up @@ -98,18 +142,11 @@ export async function getTaxonomyTerms(taxonomyName: string): Promise<TaxonomyTe
.orderBy("label", "asc")
.execute();

// Count entries for each term
const countsResult = await db
.selectFrom("content_taxonomies")
.select(["taxonomy_id"])
.select((eb) => eb.fn.count<number>("entry_id").as("count"))
.groupBy("taxonomy_id")
.execute();

const counts = new Map<string, number>();
for (const row of countsResult) {
counts.set(row.taxonomy_id, row.count);
}
// Count entries for each term — only count published, non-deleted
// entries. Without joining the content table, drafts and soft-
// deleted rows inflated category/tag counts in widgets (#581).
// Worker-lifetime cached: writes call `invalidateTermCache()`.
const counts = await getCachedTermCounts(db, taxonomyName, def.collections);

const flatTerms: TaxonomyTermRow[] = rows.map((row) => ({
id: row.id,
Expand Down Expand Up @@ -151,14 +188,20 @@ export async function getTerm(taxonomyName: string, slug: string): Promise<Taxon

if (!row) return null;

// Get entry count
const countResult = await db
.selectFrom("content_taxonomies")
.select((eb) => eb.fn.count<number>("entry_id").as("count"))
.where("taxonomy_id", "=", row.id)
.executeTakeFirst();

const count = countResult?.count ?? 0;
// Get entry count — restricted to published, non-deleted entries (#581).
// Deliberately reads the full per-taxonomy counts map from the same
// worker-lifetime cache as `getTaxonomyTerms` rather than running a
// narrower single-term query: a category/tag page typically renders
// the widget that calls `getTaxonomyTerms` alongside `getTerm`, so
// the shared cache turns the second call into a Map lookup with no
// extra round-trip. A separate `WHERE taxonomy_id = ?` path would
// either bypass the cache (extra query on every render) or fragment
// it (separate cache keys for full vs. single-term reads).
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Minor: PR description mentions a termIds option on a shared countPublishedTermAssignments helper so the single-term path doesn't scan the whole taxonomy. The shipped code went with a different (better) design — share the full per-taxonomy counts map between getTaxonomyTerms and getTerm via the cache, as the comment here explains. Worth updating the PR body to match what's actually implemented so reviewers don't go looking for the option.

const def = await getTaxonomyDef(taxonomyName);
const counts = def
? await getCachedTermCounts(db, taxonomyName, def.collections)
: new Map<string, number>();
const count = counts.get(row.id) ?? 0;
Comment thread
r2sake marked this conversation as resolved.

// Get children if hierarchical
const childRows = await db
Expand Down Expand Up @@ -485,6 +528,127 @@ export async function getEntriesByTerm(
return entries;
}

/**
* Count `content_taxonomies` rows whose entry is currently published
* and not soft-deleted, grouped by taxonomy_id, scoped to a single
* taxonomy across every collection it applies to.
*
* The taxonomy → collection mapping comes from
* `_emdash_taxonomy_defs.collections`; assignments to collections not
* declared there are excluded (treated as drift). Each per-collection
* subquery joins `taxonomies` and filters by `t.name = ${taxonomyName}`
* so we don't aggregate counts for unrelated taxonomies that happen to
* live in the same collection.
*
* Cold path: one `UNION ALL` round-trip combines per-collection joins
* so a `tags` taxonomy spanning `posts` + `products` still costs one
* query, not N. Pre-migration databases (or drift where a referenced
* `ec_*` table is gone) fall back to a per-collection loop with
* `isMissingTableError` recovery.
*/
// eslint-disable-next-line @typescript-eslint/no-explicit-any -- dialect-agnostic
async function countTermAssignmentsForTaxonomy(
db: Kysely<any>,
taxonomyName: string,
collections: string[],
): Promise<Map<string, number>> {
const counts = new Map<string, number>();

// Dedupe: `_emdash_taxonomy_defs.collections` is JSON, so a seed or
// migration could yield duplicate entries. Without dedupe the
// `UNION ALL` would count the same collection twice.
const safeCollections = [...new Set(collections)].filter((collection) => {
try {
validateIdentifier(collection, "collection");
return true;
} catch {
// Defense-in-depth: skip identifiers that wouldn't be safe to
// interpolate even though `_emdash_collections` validates on insert.
return false;
}
});

if (safeCollections.length === 0) return counts;

const buildPart = (collection: string) => sql`
SELECT ct.taxonomy_id, COUNT(*) AS count
FROM content_taxonomies ct
INNER JOIN taxonomies t
ON t.id = ct.taxonomy_id
INNER JOIN ${sql.ref(`ec_${collection}`)} e
ON e.id = ct.entry_id
WHERE ct.collection = ${collection}
AND t.name = ${taxonomyName}
AND e.status = 'published'
AND e.deleted_at IS NULL
GROUP BY ct.taxonomy_id
`;

try {
const unionParts = safeCollections.map(buildPart);
const result = await sql<{ taxonomy_id: string; count: number | string }>`
SELECT taxonomy_id, SUM(count) AS count
FROM (${sql.join(unionParts, sql` UNION ALL `)}) AS sub
GROUP BY taxonomy_id
`.execute(db);
for (const row of result.rows) {
counts.set(row.taxonomy_id, Number(row.count));
}
return counts;
} catch (error) {
if (!isMissingTableError(error)) throw error;
// One or more `ec_{collection}` tables are missing (pre-migration
// install or drift). Fall back to per-collection so we can skip the
// missing ones without losing data from the rest.
}

for (const collection of safeCollections) {
try {
const result = await sql<{ taxonomy_id: string; count: number | string }>`
${buildPart(collection)}
`.execute(db);
for (const row of result.rows) {
const prev = counts.get(row.taxonomy_id) ?? 0;
counts.set(row.taxonomy_id, prev + Number(row.count));
}
} catch (error) {
if (isMissingTableError(error)) continue;
throw error;
}
}
return counts;
}

/**
* Worker-lifetime cached wrapper around `countTermAssignmentsForTaxonomy`.
*
* Keyed by `(db, taxonomyName)` so playground / per-DO / per-test databases
* each maintain their own counts. Caches the in-flight promise so concurrent
* callers share one query; on error the cache entry is dropped so the next
* caller retries.
*/
// eslint-disable-next-line @typescript-eslint/no-explicit-any -- dialect-agnostic
function getCachedTermCounts(
db: Kysely<any>,
taxonomyName: string,
collections: string[],
): Promise<Map<string, number>> {
const { cache } = getTermCountsHolder();
let perDb = cache.get(db);
if (!perDb) {
perDb = new Map();
cache.set(db, perDb);
}
const cached = perDb.get(taxonomyName);
if (cached) return cached;
const promise = countTermAssignmentsForTaxonomy(db, taxonomyName, collections).catch((error) => {
perDb.delete(taxonomyName);
throw error;
});
perDb.set(taxonomyName, promise);
return promise;
}
Comment on lines +631 to +650
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Cache misses on every D1 request. The WeakMap<Kysely, ...> layer means the cache is keyed by the per-request session-bound Kysely that createRequestScopedDb (in @emdash-cms/cloudflare/db/d1.ts:165) creates anew on every request. As a result, the second request to /category/x gets a different db reference than the first and reruns the count query — which is exactly what the query-count snapshot diff shows on this PR (D1 warm +1 for /category/development and /tag/webdev, while SQLite warm — using the module singleton — stays flat).

The secrets.ts precedent has the same shape but is only on edit/preview paths, so the regression there is invisible. Public taxonomy widget renders are the hot path that ascorbic's request was specifically targeting.

Simplest fix: drop the per-db indirection. The globalThis holder + Symbol.for keying is enough for SSR-bundle module-duplication safety, and invalidateTermCache() already swaps the holder wholesale on writes. Tests can keep working because they call invalidateTermCache() between cases (and could also be tightened to clear in beforeEach if cross-test isolation matters).

Suggested change
function getCachedTermCounts(
db: Kysely<any>,
taxonomyName: string,
collections: string[],
): Promise<Map<string, number>> {
const { cache } = getTermCountsHolder();
let perDb = cache.get(db);
if (!perDb) {
perDb = new Map();
cache.set(db, perDb);
}
const cached = perDb.get(taxonomyName);
if (cached) return cached;
const promise = countTermAssignmentsForTaxonomy(db, taxonomyName, collections).catch((error) => {
perDb.delete(taxonomyName);
throw error;
});
perDb.set(taxonomyName, promise);
return promise;
}
interface TermCountsHolder {
byTaxonomy: Map<string, Promise<Map<string, number>>>;
}
function getTermCountsHolder(): TermCountsHolder {
// eslint-disable-next-line typescript-eslint(no-unsafe-type-assertion) -- globalThis singleton pattern
const holder = globalThis as Record<symbol, TermCountsHolder | undefined>;
let entry = holder[TERM_COUNTS_CACHE_KEY];
if (!entry) {
entry = { byTaxonomy: new Map() };
holder[TERM_COUNTS_CACHE_KEY] = entry;
}
return entry;
}

And correspondingly in getCachedTermCounts:

function getCachedTermCounts(
	db: Kysely<any>,
	taxonomyName: string,
	collections: string[],
): Promise<Map<string, number>> {
	const { byTaxonomy } = getTermCountsHolder();
	const cached = byTaxonomy.get(taxonomyName);
	if (cached) return cached;
	const promise = countTermAssignmentsForTaxonomy(db, taxonomyName, collections).catch((error) => {
		byTaxonomy.delete(taxonomyName);
		throw error;
	});
	byTaxonomy.set(taxonomyName, promise);
	return promise;
}

The db only needs to be passed for the actual query execution, not for cache scoping — the cache lives at the worker level, which is logically per-deployment-per-database. (If you do want per-DB scoping for playground / DO previews, key by something stable like dbConfig.entrypoint rather than the Kysely instance — but in practice playgrounds run in their own isolates anyway, so the globalThis already provides isolation.)

With this change the warm path on D1 should drop to 0 queries for the term-counts read, matching the SQLite warm baseline.


/**
* Build tree structure from flat terms
*/
Expand Down
Loading
Loading