From e7c57f2fd1419d4a6edb51d3a3249a132d2eb781 Mon Sep 17 00:00:00 2001 From: EC2 Default User Date: Thu, 9 Apr 2026 13:51:21 +0000 Subject: [PATCH] fix: canonicalize backend repo keys Prevent mixed-case owner/repo requests from splitting queue, live-status, and repo_reports identities. Add targeted regressions and a migration that deduplicates legacy case-only repo_report rows. --- .../0004_repo_report_canonical_full_names.sql | 39 +++++ src/server/live-status.ts | 8 +- src/server/queue.ts | 71 +++++++- src/server/repo-key.ts | 39 +++++ src/server/reports.ts | 42 +++-- test/queue-case-normalization.test.ts | 161 ++++++++++++++++++ test/reports-case-normalization.test.ts | 49 ++++++ test/repository-service-page-view.test.ts | 96 ++++++++--- .../api/repo/[owner]/[repo]/status/route.ts | 11 +- web/src/lib/repository-service.ts | 90 +++++----- web/src/lib/server/live-status.ts | 34 +++- web/src/lib/server/queue.ts | 36 ++-- web/src/lib/server/repo-key.ts | 39 +++++ web/src/lib/server/reports.ts | 22 ++- 14 files changed, 618 insertions(+), 119 deletions(-) create mode 100644 migrations/0004_repo_report_canonical_full_names.sql create mode 100644 src/server/repo-key.ts create mode 100644 test/queue-case-normalization.test.ts create mode 100644 test/reports-case-normalization.test.ts create mode 100644 web/src/lib/server/repo-key.ts diff --git a/migrations/0004_repo_report_canonical_full_names.sql b/migrations/0004_repo_report_canonical_full_names.sql new file mode 100644 index 0000000..a5f5f81 --- /dev/null +++ b/migrations/0004_repo_report_canonical_full_names.sql @@ -0,0 +1,39 @@ +WITH ranked_repo_reports AS ( + SELECT + ctid, + row_number() OVER ( + PARTITION BY lower(full_name) + ORDER BY + CASE + WHEN status = 'ready' AND report_json IS NOT NULL THEN 3 + WHEN status = 'processing' THEN 2 + WHEN status = 'queued' THEN 1 + ELSE 0 + END DESC, + updated_at DESC, + created_at DESC, + full_name ASC + ) AS duplicate_rank + FROM repo_reports +) +DELETE FROM repo_reports +WHERE ctid IN ( + SELECT ctid + FROM ranked_repo_reports + WHERE duplicate_rank > 1 +); + +UPDATE repo_reports +SET + full_name = lower(full_name), + owner = lower(owner), + repo = lower(repo), + github_url = 'https://github.com/' || lower(full_name) +WHERE + full_name <> lower(full_name) + OR owner <> lower(owner) + OR repo <> lower(repo) + OR github_url <> 'https://github.com/' || lower(full_name); + +CREATE UNIQUE INDEX IF NOT EXISTS repo_reports_full_name_lower_idx + ON repo_reports ((lower(full_name))); diff --git a/src/server/live-status.ts b/src/server/live-status.ts index 66daf90..7c72436 100644 --- a/src/server/live-status.ts +++ b/src/server/live-status.ts @@ -1,5 +1,6 @@ import type { Logger } from "../core/logger.ts" import { REPO_PROGRESS_PREFIX, REPO_PROGRESS_TTL_SECONDS } from "./constants.ts" +import { canonicalizeRepoFullName } from "./repo-key.ts" import { getRedisClient } from "./queue.ts" export type RepoLiveStatusPayload = { @@ -12,7 +13,7 @@ export type RepoLiveStatusPayload = { } function progressKey(fullName: string): string { - return `${REPO_PROGRESS_PREFIX}${fullName}` + return `${REPO_PROGRESS_PREFIX}${canonicalizeRepoFullName(fullName)}` } export async function writeRepoLiveStatus( @@ -21,16 +22,17 @@ export async function writeRepoLiveStatus( logger?: Logger, ): Promise { const redis = await getRedisClient() + const canonicalFullName = canonicalizeRepoFullName(fullName) const value: RepoLiveStatusPayload = { ...payload, updatedAt: new Date().toISOString(), } - await redis.set(progressKey(fullName), JSON.stringify(value), { + await redis.set(progressKey(canonicalFullName), JSON.stringify(value), { EX: REPO_PROGRESS_TTL_SECONDS, }) await logger?.debug("repo_live_status:write", { - repository: fullName, + repository: canonicalFullName, ...value, }) } diff --git a/src/server/queue.ts b/src/server/queue.ts index 86d0304..31e1c06 100644 --- a/src/server/queue.ts +++ b/src/server/queue.ts @@ -1,6 +1,7 @@ import { createClient, type RedisClientType } from "redis" import { REPO_PROCESSING_QUEUE_KEY, REPO_QUEUE_DEDUPE_PREFIX, REPO_QUEUE_DEDUPE_TTL_SECONDS, REPO_QUEUE_KEY } from "./constants.ts" +import { canonicalizeRepoFullName, repoIdentifierAliases } from "./repo-key.ts" let client: RedisClientType | null = null @@ -31,12 +32,41 @@ export async function getRedisClient(): Promise { } export function queueDedupeKey(fullName: string): string { - return `${REPO_QUEUE_DEDUPE_PREFIX}${fullName}` + return `${REPO_QUEUE_DEDUPE_PREFIX}${canonicalizeRepoFullName(fullName)}` +} + +async function findStoredRepoEntries(redis: RedisClientType, fullName: string): Promise { + const canonicalFullName = canonicalizeRepoFullName(fullName) + const [queued, processing] = await Promise.all([ + redis.lRange(REPO_QUEUE_KEY, 0, -1), + redis.lRange(REPO_PROCESSING_QUEUE_KEY, 0, -1), + ]) + + return Array.from( + new Set([ + ...queued.filter((entry) => canonicalizeRepoFullName(entry) === canonicalFullName), + ...processing.filter((entry) => canonicalizeRepoFullName(entry) === canonicalFullName), + ]), + ) +} + +async function loadStoredRepoAliases(redis: RedisClientType, fullName: string): Promise { + return Array.from(new Set([...repoIdentifierAliases(fullName), ...(await findStoredRepoEntries(redis, fullName))])) } export async function enqueueRepoJob(fullName: string): Promise { const redis = await getRedisClient() - const key = queueDedupeKey(fullName) + const canonicalFullName = canonicalizeRepoFullName(fullName) + const existingEntries = await findStoredRepoEntries(redis, canonicalFullName) + + if (existingEntries.length > 0) { + await redis.set(queueDedupeKey(canonicalFullName), "1", { + EX: REPO_QUEUE_DEDUPE_TTL_SECONDS, + }) + return false + } + + const key = queueDedupeKey(canonicalFullName) const wasQueued = await redis.set(key, "1", { NX: true, EX: REPO_QUEUE_DEDUPE_TTL_SECONDS, @@ -46,7 +76,7 @@ export async function enqueueRepoJob(fullName: string): Promise { return false } - await redis.lPush(REPO_QUEUE_KEY, fullName) + await redis.lPush(REPO_QUEUE_KEY, canonicalFullName) return true } @@ -59,14 +89,30 @@ export async function dequeueRepoJob(timeoutSeconds: number): Promise { const redis = await getRedisClient() - await redis.multi().lRem(REPO_PROCESSING_QUEUE_KEY, 1, fullName).del(queueDedupeKey(fullName)).exec() + const aliases = await loadStoredRepoAliases(redis, fullName) + const tx = redis.multi() + + for (const alias of aliases) { + tx.lRem(REPO_PROCESSING_QUEUE_KEY, 1, alias) + tx.del(queueDedupeKey(alias)) + } + + await tx.exec() } export async function requeueProcessingJob(fullName: string): Promise { const redis = await getRedisClient() - await redis.multi().lRem(REPO_PROCESSING_QUEUE_KEY, 1, fullName).lPush(REPO_QUEUE_KEY, fullName).exec() -} + const canonicalFullName = canonicalizeRepoFullName(fullName) + const aliases = await loadStoredRepoAliases(redis, fullName) + const tx = redis.multi() + for (const alias of aliases) { + tx.lRem(REPO_PROCESSING_QUEUE_KEY, 1, alias) + } + + tx.lPush(REPO_QUEUE_KEY, canonicalFullName) + await tx.exec() +} export async function listQueuedRepoJobs(): Promise { const redis = await getRedisClient() @@ -75,10 +121,19 @@ export async function listQueuedRepoJobs(): Promise { redis.lRange(REPO_PROCESSING_QUEUE_KEY, 0, -1), ]) - return Array.from(new Set([...queued, ...processing])) + return Array.from(new Set([...queued, ...processing].map((fullName) => canonicalizeRepoFullName(fullName)))) } export async function dropRepoJob(fullName: string): Promise { const redis = await getRedisClient() - await redis.multi().lRem(REPO_QUEUE_KEY, 0, fullName).lRem(REPO_PROCESSING_QUEUE_KEY, 0, fullName).del(queueDedupeKey(fullName)).exec() + const aliases = await loadStoredRepoAliases(redis, fullName) + const tx = redis.multi() + + for (const alias of aliases) { + tx.lRem(REPO_QUEUE_KEY, 0, alias) + tx.lRem(REPO_PROCESSING_QUEUE_KEY, 0, alias) + tx.del(queueDedupeKey(alias)) + } + + await tx.exec() } diff --git a/src/server/repo-key.ts b/src/server/repo-key.ts new file mode 100644 index 0000000..4d7f111 --- /dev/null +++ b/src/server/repo-key.ts @@ -0,0 +1,39 @@ +export type CanonicalRepoIdentity = { + owner: string + repo: string + fullName: string + githubUrl: string +} + +function normalizeRepoSegment(value: string): string { + return value.trim().toLowerCase() +} + +export function canonicalizeRepoIdentity(owner: string, repo: string): CanonicalRepoIdentity { + const canonicalOwner = normalizeRepoSegment(owner) + const canonicalRepo = normalizeRepoSegment(repo) + const fullName = `${canonicalOwner}/${canonicalRepo}` + + return { + owner: canonicalOwner, + repo: canonicalRepo, + fullName, + githubUrl: `https://github.com/${fullName}`, + } +} + +export function canonicalizeRepoFullName(fullName: string): string { + const [owner = "", repo = ""] = fullName.split("/", 2) + + if (!owner || !repo) { + return fullName.trim().toLowerCase() + } + + return canonicalizeRepoIdentity(owner, repo).fullName +} + +export function repoIdentifierAliases(fullName: string): string[] { + const trimmed = fullName.trim() + const canonical = canonicalizeRepoFullName(trimmed) + return Array.from(new Set([trimmed, canonical].filter((value) => value.length > 0))) +} diff --git a/src/server/reports.ts b/src/server/reports.ts index 3778b7c..45153d7 100644 --- a/src/server/reports.ts +++ b/src/server/reports.ts @@ -2,6 +2,7 @@ import { AppError } from "../core/errors.ts" import { serializeJsonSafely } from "../core/json.ts" import type { FinalReport } from "../core/types.ts" import { query } from "./database.ts" +import { canonicalizeRepoFullName, canonicalizeRepoIdentity } from "./repo-key.ts" export type RepoRetryState = "none" | "retrying" | "terminal" @@ -38,6 +39,7 @@ function appendFailureHistorySql(state: RepoRetryState, messagePlaceholder: stri } export async function getRepoRecord(fullName: string): Promise { + const canonicalFullName = canonicalizeRepoFullName(fullName) const rows = await query( `select full_name, @@ -60,16 +62,17 @@ export async function getRepoRecord(fullName: string): Promise { - const fullName = `${owner}/${repo}` - const githubUrl = `https://github.com/${fullName}` + const canonical = canonicalizeRepoIdentity(owner, repo) await query( `insert into repo_reports ( @@ -103,23 +106,27 @@ export async function touchQueuedRepo(owner: string, repo: string, queuedNow: bo last_error_message = case when $5 = true then null else repo_reports.last_error_message end, failure_history = case when $5 = true then '[]'::jsonb else repo_reports.failure_history end, updated_at = now()`, - [fullName, owner, repo, githubUrl, queuedNow], + [canonical.fullName, canonical.owner, canonical.repo, canonical.githubUrl, queuedNow], ) } export async function markRepoProcessing(fullName: string): Promise { + const canonicalFullName = canonicalizeRepoFullName(fullName) + await query( `update repo_reports set status = 'processing', processing_started_at = coalesce(processing_started_at, now()), error_message = null, updated_at = now() - where full_name = $1`, - [fullName], + where lower(full_name) = lower($1)`, + [canonicalFullName], ) } export async function markRepoRetrying(fullName: string, retryCount: number, nextRetryAt: string, errorMessage: string): Promise { + const canonicalFullName = canonicalizeRepoFullName(fullName) + await query( `update repo_reports set status = 'processing', @@ -131,12 +138,13 @@ export async function markRepoRetrying(fullName: string, retryCount: number, nex error_message = $4, failure_history = ${appendFailureHistorySql("retrying", "$4")}, updated_at = now() - where full_name = $1`, - [fullName, retryCount, nextRetryAt, errorMessage], + where lower(full_name) = lower($1)`, + [canonicalFullName, retryCount, nextRetryAt, errorMessage], ) } export async function markRepoReady(report: FinalReport): Promise { + const canonicalFullName = canonicalizeRepoFullName(report.repository.fullName) const serialized = serializeJsonSafely(report) if (serialized.sanitizedPaths.length > 0) { @@ -156,8 +164,8 @@ export async function markRepoReady(report: FinalReport): Promise { next_retry_at = null, last_error_message = null, updated_at = now() - where full_name = $1`, - [report.repository.fullName, serialized.json], + where lower(full_name) = lower($1)`, + [canonicalFullName, serialized.json], ) } catch (error) { if (error instanceof Error && /invalid input syntax for type json/i.test(error.message)) { @@ -176,6 +184,8 @@ export async function markRepoReady(report: FinalReport): Promise { } export async function markRepoFailedTerminal(fullName: string, retryCount: number, errorMessage: string): Promise { + const canonicalFullName = canonicalizeRepoFullName(fullName) + await query( `update repo_reports set status = 'failed', @@ -187,12 +197,14 @@ export async function markRepoFailedTerminal(fullName: string, retryCount: numbe error_message = $3, failure_history = ${appendFailureHistorySql("terminal", "$3")}, updated_at = now() - where full_name = $1`, - [fullName, retryCount, errorMessage], + where lower(full_name) = lower($1)`, + [canonicalFullName, retryCount, errorMessage], ) } export async function markRepoQueued(fullName: string): Promise { + const canonicalFullName = canonicalizeRepoFullName(fullName) + await query( `update repo_reports set status = 'queued', @@ -206,7 +218,7 @@ export async function markRepoQueued(fullName: string): Promise { last_error_message = null, failure_history = '[]'::jsonb, updated_at = now() - where full_name = $1`, - [fullName], + where lower(full_name) = lower($1)`, + [canonicalFullName], ) } diff --git a/test/queue-case-normalization.test.ts b/test/queue-case-normalization.test.ts new file mode 100644 index 0000000..9e5dd57 --- /dev/null +++ b/test/queue-case-normalization.test.ts @@ -0,0 +1,161 @@ +import { beforeEach, describe, expect, mock, test } from "bun:test" + +const queueModulePath = new URL("../src/server/queue.ts?queue-case-normalization-test", import.meta.url).href + +type RedisState = { + queued: string[] + processing: string[] + keys: Map +} + +function createRedisStub(state: RedisState) { + return { + isOpen: true, + connect: async () => {}, + on: () => {}, + set: async (key: string, value: string, options?: { NX?: boolean }) => { + if (options?.NX && state.keys.has(key)) { + return null + } + + state.keys.set(key, value) + return "OK" + }, + get: async (key: string) => state.keys.get(key) ?? null, + lPush: async (key: string, value: string) => { + const list = key.includes("processing") ? state.processing : state.queued + list.unshift(value) + return list.length + }, + lLen: async (key: string) => (key.includes("processing") ? state.processing.length : state.queued.length), + lRange: async (key: string) => (key.includes("processing") ? [...state.processing] : [...state.queued]), + sendCommand: async (args: string[]) => { + if (args[0] !== "BRPOPLPUSH") { + return null + } + const queuedValue = state.queued.pop() ?? null + if (queuedValue) { + state.processing.unshift(queuedValue) + } + return queuedValue + }, + multi: () => { + const operations: Array<() => void> = [] + return { + lRem(key: string, count: number, value: string) { + operations.push(() => { + const list = key.includes("processing") ? state.processing : state.queued + if (count === 0) { + for (let index = list.length - 1; index >= 0; index -= 1) { + if (list[index] === value) { + list.splice(index, 1) + } + } + return + } + + let remaining = count + for (let index = 0; index < list.length && remaining > 0; ) { + if (list[index] === value) { + list.splice(index, 1) + remaining -= 1 + } else { + index += 1 + } + } + }) + return this + }, + del(key: string) { + operations.push(() => { + state.keys.delete(key) + }) + return this + }, + lPush(key: string, value: string) { + operations.push(() => { + const list = key.includes("processing") ? state.processing : state.queued + list.unshift(value) + }) + return this + }, + async exec() { + for (const operation of operations) { + operation() + } + return [] + }, + } + }, + } +} + +const state: RedisState = { + queued: [], + processing: [], + keys: new Map(), +} + +mock.module("redis", () => ({ + createClient: () => createRedisStub(state), +})) + +const { acknowledgeRepoJob, dropRepoJob, enqueueRepoJob, listQueuedRepoJobs, queueDedupeKey, requeueProcessingJob } = await import(queueModulePath) + +beforeEach(() => { + state.queued.length = 0 + state.processing.length = 0 + state.keys.clear() + process.env.REDIS_URL = "redis://example.test:6379" +}) + +describe("queue case normalization", () => { + test("listQueuedRepoJobs returns canonical lowercase names", async () => { + state.queued = ["Schema-Labs-Ltd/DiscoFork"] + state.processing = ["SCHEMA-LABS-LTD/Another-Repo"] + + await expect(listQueuedRepoJobs()).resolves.toEqual([ + "schema-labs-ltd/discofork", + "schema-labs-ltd/another-repo", + ]) + }) + + test("enqueueRepoJob reuses a legacy mixed-case queued entry instead of queueing a duplicate", async () => { + state.queued = ["Schema-Labs-Ltd/DiscoFork"] + + await expect(enqueueRepoJob("schema-labs-ltd/discofork")).resolves.toBe(false) + expect(state.queued).toEqual(["Schema-Labs-Ltd/DiscoFork"]) + expect(state.keys.get(queueDedupeKey("schema-labs-ltd/discofork"))).toBe("1") + }) + + test("dropRepoJob removes a legacy mixed-case queued entry when called with the canonical name", async () => { + state.queued = ["Schema-Labs-Ltd/DiscoFork"] + state.processing = ["Schema-Labs-Ltd/DiscoFork"] + state.keys.set(queueDedupeKey("Schema-Labs-Ltd/DiscoFork"), "1") + + await dropRepoJob("schema-labs-ltd/discofork") + + expect(state.queued).toEqual([]) + expect(state.processing).toEqual([]) + expect(state.keys.size).toBe(0) + }) + + test("acknowledgeRepoJob removes a legacy mixed-case processing entry when called with the canonical name", async () => { + state.processing = ["Schema-Labs-Ltd/DiscoFork"] + state.keys.set(queueDedupeKey("Schema-Labs-Ltd/DiscoFork"), "1") + + await acknowledgeRepoJob("schema-labs-ltd/discofork") + + expect(state.processing).toEqual([]) + expect(state.keys.size).toBe(0) + }) + + test("requeueProcessingJob requeues a legacy mixed-case processing entry under the canonical name", async () => { + state.processing = ["Schema-Labs-Ltd/DiscoFork"] + + await requeueProcessingJob("schema-labs-ltd/discofork") + + expect(state.processing).toEqual([]) + expect(state.queued).toEqual(["schema-labs-ltd/discofork"]) + }) +}) diff --git a/test/reports-case-normalization.test.ts b/test/reports-case-normalization.test.ts new file mode 100644 index 0000000..35a15de --- /dev/null +++ b/test/reports-case-normalization.test.ts @@ -0,0 +1,49 @@ +import { beforeEach, describe, expect, mock, test } from "bun:test" + +const databaseModulePath = new URL("../src/server/database.ts", import.meta.url).href +const reportsModulePath = new URL("../src/server/reports.ts?case-normalization-test", import.meta.url).href + +const queryCalls: Array<{ sql: string; params: unknown[] }> = [] + +mock.module(databaseModulePath, () => ({ + query: async (sql: string, params: unknown[] = []) => { + queryCalls.push({ sql, params }) + return [] as T[] + }, +})) + +const { touchQueuedRepo, markRepoProcessing, markRepoReady } = await import(reportsModulePath) + +beforeEach(() => { + queryCalls.length = 0 +}) + +describe("repo report key normalization", () => { + test("touchQueuedRepo stores canonical lowercase identifiers", async () => { + await touchQueuedRepo("Schema-Labs-Ltd", "DiscoFork", true) + + expect(queryCalls).toHaveLength(1) + expect(queryCalls[0]?.params.slice(0, 4)).toEqual([ + "schema-labs-ltd/discofork", + "schema-labs-ltd", + "discofork", + "https://github.com/schema-labs-ltd/discofork", + ]) + }) + + test("markRepoProcessing updates repo rows through the canonical lowercase key", async () => { + await markRepoProcessing("Schema-Labs-Ltd/DiscoFork") + + expect(queryCalls).toHaveLength(1) + expect(queryCalls[0]?.sql).toContain("where lower(full_name) = lower($1)") + expect(queryCalls[0]?.params).toEqual(["schema-labs-ltd/discofork"]) + }) + + test("markRepoReady canonicalizes the report repo key before persisting", async () => { + await markRepoReady({ repository: { fullName: "Schema-Labs-Ltd/DiscoFork" } } as never) + + expect(queryCalls).toHaveLength(1) + expect(queryCalls[0]?.sql).toContain("where lower(full_name) = lower($1)") + expect(queryCalls[0]?.params[0]).toBe("schema-labs-ltd/discofork") + }) +}) diff --git a/test/repository-service-page-view.test.ts b/test/repository-service-page-view.test.ts index b67592b..33ba473 100644 --- a/test/repository-service-page-view.test.ts +++ b/test/repository-service-page-view.test.ts @@ -12,16 +12,26 @@ const fetchCalls: string[] = [] let databaseEnabled = true let queueEnabled = true -let repoRecord: any = null -let statusSnapshot: any = null let fetchStatus = 200 +let repoRecords = new Map() +let statusSnapshots = new Map() + +function canonicalize(owner: string, repo: string) { + const normalizedOwner = owner.toLowerCase() + const normalizedRepo = repo.toLowerCase() + return { + owner: normalizedOwner, + repo: normalizedRepo, + fullName: `${normalizedOwner}/${normalizedRepo}`, + } +} mock.module(databaseModulePath, () => ({ databaseConfigured: () => databaseEnabled, })) mock.module(liveStatusModulePath, () => ({ - getRepoStatusSnapshot: async () => statusSnapshot, + getRepoStatusSnapshot: async (fullName: string) => statusSnapshots.get(fullName.toLowerCase()) ?? null, })) mock.module(queueModulePath, () => ({ @@ -34,14 +44,15 @@ mock.module(queueModulePath, () => ({ })) mock.module(reportsModulePath, () => ({ - getRepoRecord: async () => repoRecord, + getRepoRecord: async (fullName: string) => repoRecords.get(fullName.toLowerCase()) ?? null, touchQueuedRepo: async (owner: string, repo: string, queuedNow: boolean) => { touchCalls.push({ owner, repo, queuedNow }) - repoRecord = { - full_name: `${owner}/${repo}`, - owner, - repo, - github_url: `https://github.com/${owner}/${repo}`, + const canonical = canonicalize(owner, repo) + repoRecords.set(canonical.fullName, { + full_name: canonical.fullName, + owner: canonical.owner, + repo: canonical.repo, + github_url: `https://github.com/${canonical.fullName}`, status: "queued", report_json: null, error_message: null, @@ -49,10 +60,16 @@ mock.module(reportsModulePath, () => ({ queued_at: "2026-04-04T00:00:00Z", processing_started_at: null, cached_at: null, + retry_count: 0, + retry_state: "none", + next_retry_at: null, + last_failed_at: null, + last_error_message: null, + failure_history: [], created_at: "2026-04-04T00:00:00Z", updated_at: "2026-04-04T00:00:00Z", - } - statusSnapshot = { + }) + statusSnapshots.set(canonical.fullName, { status: "queued", queuePosition: 1, progress: null, @@ -60,7 +77,11 @@ mock.module(reportsModulePath, () => ({ queuedAt: "2026-04-04T00:00:00Z", processingStartedAt: null, cachedAt: null, - } + retryCount: 0, + retryState: "none", + nextRetryAt: null, + lastFailedAt: null, + }) }, })) @@ -70,9 +91,9 @@ const originalFetch = globalThis.fetch beforeEach(() => { databaseEnabled = true queueEnabled = true - repoRecord = null - statusSnapshot = null fetchStatus = 200 + repoRecords = new Map() + statusSnapshots = new Map() enqueueCalls.length = 0 touchCalls.length = 0 fetchCalls.length = 0 @@ -94,21 +115,56 @@ afterEach(() => { describe("repository page view loading", () => { test("queues a missing repo once and reuses stored queued state on later reads", async () => { - const firstView = await getRepositoryPageView("schema-labs-ltd", "discofork") - const secondView = await getRepositoryPageView("schema-labs-ltd", "discofork") + const firstView = await getRepositoryPageView("schema-labs-ltd", "queued-once") + const secondView = await getRepositoryPageView("schema-labs-ltd", "queued-once") expect(firstView.kind).toBe("queued") expect(secondView.kind).toBe("queued") - expect(enqueueCalls).toEqual(["schema-labs-ltd/discofork"]) - expect(touchCalls).toEqual([{ owner: "schema-labs-ltd", repo: "discofork", queuedNow: true }]) + expect(enqueueCalls).toEqual(["schema-labs-ltd/queued-once"]) + expect(touchCalls).toEqual([{ owner: "schema-labs-ltd", repo: "queued-once", queuedNow: true }]) + }) + + test("mixed-case and lowercase requests reuse one canonical queued record", async () => { + const firstView = await getRepositoryPageView("Schema-Labs-Ltd", "DiscoFork-Case") + const secondView = await getRepositoryPageView("schema-labs-ltd", "discofork-case") + + expect(firstView).toMatchObject({ + kind: "queued", + fullName: "schema-labs-ltd/discofork-case", + owner: "schema-labs-ltd", + repo: "discofork-case", + }) + expect(secondView).toMatchObject({ + kind: "queued", + fullName: "schema-labs-ltd/discofork-case", + owner: "schema-labs-ltd", + repo: "discofork-case", + }) + expect(enqueueCalls).toEqual(["schema-labs-ltd/discofork-case"]) + expect(touchCalls).toEqual([{ owner: "schema-labs-ltd", repo: "discofork-case", queuedNow: true }]) }) test("readRepositoryView stays side-effect free for uncached repos", async () => { - const view = await readRepositoryView("schema-labs-ltd", "readonly-check") + const view = await readRepositoryView("Schema-Labs-Ltd", "Readonly-Check") - expect(view.kind).toBe("queued") + expect(view).toMatchObject({ + kind: "queued", + fullName: "schema-labs-ltd/readonly-check", + owner: "schema-labs-ltd", + repo: "readonly-check", + }) expect(enqueueCalls).toEqual([]) expect(touchCalls).toEqual([]) expect(fetchCalls).toEqual([]) }) + + test("still raises RepositoryNotFoundError when GitHub returns 404", async () => { + process.env.GH_TOKEN = "token" + fetchStatus = 404 + + await expect(getRepositoryPageView("Schema-Labs-Ltd", "Missing-Repo")).rejects.toBeInstanceOf(RepositoryNotFoundError) + expect(enqueueCalls).toEqual([]) + expect(touchCalls).toEqual([]) + expect(fetchCalls).toEqual(["https://api.github.com/repos/schema-labs-ltd/missing-repo"]) + }) }) diff --git a/web/src/app/api/repo/[owner]/[repo]/status/route.ts b/web/src/app/api/repo/[owner]/[repo]/status/route.ts index 1423dfb..4d7c002 100644 --- a/web/src/app/api/repo/[owner]/[repo]/status/route.ts +++ b/web/src/app/api/repo/[owner]/[repo]/status/route.ts @@ -1,4 +1,5 @@ import { getRepoStatusSnapshot } from "@/lib/server/live-status" +import { canonicalizeRepoIdentity } from "@/lib/server/repo-key" type RouteProps = { params: Promise<{ @@ -8,22 +9,24 @@ type RouteProps = { } function sseFrame(data: unknown): Uint8Array { - return new TextEncoder().encode(`data: ${JSON.stringify(data)}\n\n`) + return new TextEncoder().encode(`data: ${JSON.stringify(data)} + +`) } export async function GET(request: Request, { params }: RouteProps) { const { owner, repo } = await params - const fullName = `${owner}/${repo}` + const canonical = canonicalizeRepoIdentity(owner, repo) const stream = new ReadableStream({ async start(controller) { let closed = false const sendSnapshot = async () => { - const snapshot = await getRepoStatusSnapshot(fullName) + const snapshot = await getRepoStatusSnapshot(canonical.fullName) controller.enqueue( sseFrame({ - fullName, + fullName: canonical.fullName, snapshot, }), ) diff --git a/web/src/lib/repository-service.ts b/web/src/lib/repository-service.ts index 732a0da..3e373ce 100644 --- a/web/src/lib/repository-service.ts +++ b/web/src/lib/repository-service.ts @@ -1,6 +1,7 @@ import { cache } from "react" import { databaseConfigured } from "./server/database" +import { canonicalizeRepoFullName, canonicalizeRepoIdentity } from "./server/repo-key" import { getRepoStatusSnapshot, type RepoProgressSnapshot } from "./server/live-status" import { enqueueRepoJob, getRedisClient, queueConfigured } from "./server/queue" import { getRepoRecord, touchQueuedRepo, type StoredReportRecord } from "./server/reports" @@ -105,9 +106,11 @@ async function readCachedRepoExistence(fullName: string): Promise { - const cached = await readCachedRepoExistence(fullName) + const canonicalFullName = canonicalizeRepoFullName(fullName) + const cached = await readCachedRepoExistence(canonicalFullName) if (cached === true) { return } if (cached === false) { - throw new RepositoryNotFoundError(fullName) + throw new RepositoryNotFoundError(canonicalFullName) } const token = githubToken() @@ -150,7 +156,7 @@ async function ensureGitHubRepositoryExists(fullName: string): Promise { return } - const response = await fetch(`https://api.github.com/repos/${fullName}`, { + const response = await fetch(`https://api.github.com/repos/${canonicalFullName}`, { headers: { Accept: "application/vnd.github+json", Authorization: `Bearer ${token}`, @@ -160,15 +166,15 @@ async function ensureGitHubRepositoryExists(fullName: string): Promise { }) if (response.status === 404) { - await writeCachedRepoExistence(fullName, false) - throw new RepositoryNotFoundError(fullName) + await writeCachedRepoExistence(canonicalFullName, false) + throw new RepositoryNotFoundError(canonicalFullName) } if (!response.ok) { return } - await writeCachedRepoExistence(fullName, true) + await writeCachedRepoExistence(canonicalFullName, true) } const mockCache = new Map([ @@ -368,22 +374,19 @@ const mockCache = new Map([ ], ]) - function queuedViewFromRecord( - owner: string, - repo: string, record: StoredReportRecord, snapshot: Awaited>, ): QueuedRepoView { - const fullName = `${owner}/${repo}` + const canonical = canonicalizeRepoIdentity(record.owner, record.repo) const queuedStatus = toQueuedStatus(snapshot?.status ?? record.status) return { kind: "queued", - owner, - repo, - fullName, - githubUrl: `https://github.com/${fullName}`, + owner: canonical.owner, + repo: canonical.repo, + fullName: canonical.fullName, + githubUrl: canonical.githubUrl, status: queuedStatus, queuedAt: toIsoString(snapshot?.queuedAt ?? record.queued_at) ?? new Date().toISOString(), queuePosition: snapshot?.queuePosition ?? null, @@ -404,14 +407,14 @@ function queuedViewFromRecord( } function fallbackQueuedView(owner: string, repo: string, queueHint: string): QueuedRepoView { - const fullName = `${owner}/${repo}` + const canonical = canonicalizeRepoIdentity(owner, repo) return { kind: "queued", - owner, - repo, - fullName, - githubUrl: `https://github.com/${fullName}`, + owner: canonical.owner, + repo: canonical.repo, + fullName: canonical.fullName, + githubUrl: canonical.githubUrl, status: "queued", queuedAt: new Date().toISOString(), queuePosition: null, @@ -426,8 +429,8 @@ function fallbackQueuedView(owner: string, repo: string, queueHint: string): Que } async function readStoredRepositoryView(owner: string, repo: string): Promise { - const fullName = `${owner}/${repo}` - const record = await getRepoRecord(fullName) + const canonical = canonicalizeRepoIdentity(owner, repo) + const record = await getRepoRecord(canonical.fullName) if (!record) { return null @@ -437,49 +440,49 @@ async function readStoredRepositoryView(owner: string, repo: string): Promise => { - const fullName = `${owner}/${repo}` + const canonical = canonicalizeRepoIdentity(owner, repo) if (databaseConfigured() && queueConfigured()) { - const storedView = await readStoredRepositoryView(owner, repo) + const storedView = await readStoredRepositoryView(canonical.owner, canonical.repo) if (storedView) { return storedView } - await ensureGitHubRepositoryExists(fullName) - return fallbackQueuedView(owner, repo, "No cached data exists yet. Open the main repository page to queue this repository for Discofork analysis.") + await ensureGitHubRepositoryExists(canonical.fullName) + return fallbackQueuedView(canonical.owner, canonical.repo, "No cached data exists yet. Open the main repository page to queue this repository for Discofork analysis.") } - const cached = mockCache.get(fullName) + const cached = mockCache.get(canonical.fullName) if (cached) { return cached } - return fallbackQueuedView(owner, repo, "No cached analysis was found. Configure DATABASE_URL and REDIS_URL to enable real queueing and cached repo views.") + return fallbackQueuedView(canonical.owner, canonical.repo, "No cached analysis was found. Configure DATABASE_URL and REDIS_URL to enable real queueing and cached repo views.") }) export const getRepositoryPageView = cache(async (owner: string, repo: string): Promise => { - const fullName = `${owner}/${repo}` + const canonical = canonicalizeRepoIdentity(owner, repo) if (databaseConfigured() && queueConfigured()) { - const storedView = await readStoredRepositoryView(owner, repo) + const storedView = await readStoredRepositoryView(canonical.owner, canonical.repo) if (storedView) { return storedView } - await ensureGitHubRepositoryExists(fullName) - const queuedNow = await enqueueRepoJob(fullName) - await touchQueuedRepo(owner, repo, queuedNow) + await ensureGitHubRepositoryExists(canonical.fullName) + const queuedNow = await enqueueRepoJob(canonical.fullName) + await touchQueuedRepo(canonical.owner, canonical.repo, queuedNow) - const refreshedView = await readStoredRepositoryView(owner, repo) - return refreshedView ?? fallbackQueuedView(owner, repo, "This repository has been queued for Discofork analysis.") + const refreshedView = await readStoredRepositoryView(canonical.owner, canonical.repo) + return refreshedView ?? fallbackQueuedView(canonical.owner, canonical.repo, "This repository has been queued for Discofork analysis.") } - return readRepositoryView(owner, repo) + return readRepositoryView(canonical.owner, canonical.repo) }) function queueHintForStatus( @@ -520,6 +523,7 @@ function toQueuedStatus(status: StoredReportRecord["status"] | "ready" | undefin } function mapStoredReportToView(record: StoredReportRecord): CachedRepoView { + const canonical = canonicalizeRepoIdentity(record.owner, record.repo) const report = record.report_json as { generatedAt?: string upstream?: { @@ -558,10 +562,10 @@ function mapStoredReportToView(record: StoredReportRecord): CachedRepoView { return { kind: "cached", - owner: record.owner, - repo: record.repo, - fullName: record.full_name, - githubUrl: record.github_url, + owner: canonical.owner, + repo: canonical.repo, + fullName: canonical.fullName, + githubUrl: canonical.githubUrl, cachedAt: toIsoString(record.cached_at ?? report.generatedAt ?? record.updated_at) ?? new Date().toISOString(), stats: { stars: upstreamMetadata.stargazerCount ?? 0, diff --git a/web/src/lib/server/live-status.ts b/web/src/lib/server/live-status.ts index cc25006..8377825 100644 --- a/web/src/lib/server/live-status.ts +++ b/web/src/lib/server/live-status.ts @@ -1,5 +1,6 @@ import { REPO_PROGRESS_PREFIX } from "./constants" import { query } from "./database" +import { canonicalizeRepoFullName, repoIdentifierAliases } from "./repo-key" import { getRedisClient, getRepoQueueState, queueConfigured } from "./queue" export type RepoProgressSnapshot = { @@ -33,26 +34,43 @@ type ProgressPayload = { updatedAt: string } +function progressKey(fullName: string): string { + return `${REPO_PROGRESS_PREFIX}${canonicalizeRepoFullName(fullName)}` +} + +function legacyProgressKey(fullName: string): string { + return `${REPO_PROGRESS_PREFIX}${fullName.trim()}` +} + async function getRedisProgress(fullName: string): Promise { if (!queueConfigured()) { return null } const client = await getRedisClient() + const canonicalFullName = canonicalizeRepoFullName(fullName) try { - const raw = await client.get(`${REPO_PROGRESS_PREFIX}${fullName}`) - if (!raw) { - return null + const canonicalRaw = await client.get(progressKey(canonicalFullName)) + if (canonicalRaw) { + return JSON.parse(canonicalRaw) as ProgressPayload } - return JSON.parse(raw) as ProgressPayload + for (const alias of repoIdentifierAliases(fullName)) { + const legacyRaw = await client.get(legacyProgressKey(alias)) + if (legacyRaw) { + return JSON.parse(legacyRaw) as ProgressPayload + } + } } catch { return null } + + return null } export async function getRepoStatusSnapshot(fullName: string): Promise { + const canonicalFullName = canonicalizeRepoFullName(fullName) const rows = await query<{ status: "queued" | "processing" | "ready" | "failed" error_message: string | null @@ -75,8 +93,10 @@ export async function getRepoStatusSnapshot(fullName: string): Promise { } function queueDedupeKey(fullName: string): string { - return `${REPO_QUEUE_DEDUPE_PREFIX}${fullName}` + return `${REPO_QUEUE_DEDUPE_PREFIX}${canonicalizeRepoFullName(fullName)}` } export async function enqueueRepoJob(fullName: string): Promise { const redis = await getRedisClient() - const queued = await redis.set(queueDedupeKey(fullName), "1", { + const canonicalFullName = canonicalizeRepoFullName(fullName) + const queued = await redis.set(queueDedupeKey(canonicalFullName), "1", { NX: true, EX: REPO_QUEUE_DEDUPE_TTL_SECONDS, }) @@ -44,7 +46,7 @@ export async function enqueueRepoJob(fullName: string): Promise { return false } - await redis.lPush(REPO_QUEUE_KEY, fullName) + await redis.lPush(REPO_QUEUE_KEY, canonicalFullName) return true } @@ -53,17 +55,31 @@ export async function getRepoQueueState(fullName: string): Promise<{ processing: boolean }> { const redis = await getRedisClient() + const canonicalFullName = canonicalizeRepoFullName(fullName) const [queueIndex, queueLength, processingIndex] = await Promise.all([ - redis.sendCommand(["LPOS", REPO_QUEUE_KEY, fullName]), + redis.sendCommand(["LPOS", REPO_QUEUE_KEY, canonicalFullName]), redis.lLen(REPO_QUEUE_KEY), - redis.sendCommand(["LPOS", REPO_PROCESSING_QUEUE_KEY, fullName]), + redis.sendCommand(["LPOS", REPO_PROCESSING_QUEUE_KEY, canonicalFullName]), ]) + if (typeof queueIndex === "number" || typeof processingIndex === "number") { + return { + queuePosition: + typeof queueIndex === "number" && queueLength > 0 + ? Math.max(1, queueLength - queueIndex) + : null, + processing: typeof processingIndex === "number", + } + } + + const [queuedEntries, processingEntries] = await Promise.all([ + redis.lRange(REPO_QUEUE_KEY, 0, -1), + redis.lRange(REPO_PROCESSING_QUEUE_KEY, 0, -1), + ]) + const legacyQueueIndex = queuedEntries.findIndex((entry) => canonicalizeRepoFullName(entry) === canonicalFullName) + return { - queuePosition: - typeof queueIndex === "number" && queueLength > 0 - ? Math.max(1, queueLength - queueIndex) - : null, - processing: typeof processingIndex === "number", + queuePosition: legacyQueueIndex >= 0 ? Math.max(1, queuedEntries.length - legacyQueueIndex) : null, + processing: processingEntries.some((entry) => canonicalizeRepoFullName(entry) === canonicalFullName), } } diff --git a/web/src/lib/server/repo-key.ts b/web/src/lib/server/repo-key.ts new file mode 100644 index 0000000..4d7f111 --- /dev/null +++ b/web/src/lib/server/repo-key.ts @@ -0,0 +1,39 @@ +export type CanonicalRepoIdentity = { + owner: string + repo: string + fullName: string + githubUrl: string +} + +function normalizeRepoSegment(value: string): string { + return value.trim().toLowerCase() +} + +export function canonicalizeRepoIdentity(owner: string, repo: string): CanonicalRepoIdentity { + const canonicalOwner = normalizeRepoSegment(owner) + const canonicalRepo = normalizeRepoSegment(repo) + const fullName = `${canonicalOwner}/${canonicalRepo}` + + return { + owner: canonicalOwner, + repo: canonicalRepo, + fullName, + githubUrl: `https://github.com/${fullName}`, + } +} + +export function canonicalizeRepoFullName(fullName: string): string { + const [owner = "", repo = ""] = fullName.split("/", 2) + + if (!owner || !repo) { + return fullName.trim().toLowerCase() + } + + return canonicalizeRepoIdentity(owner, repo).fullName +} + +export function repoIdentifierAliases(fullName: string): string[] { + const trimmed = fullName.trim() + const canonical = canonicalizeRepoFullName(trimmed) + return Array.from(new Set([trimmed, canonical].filter((value) => value.length > 0))) +} diff --git a/web/src/lib/server/reports.ts b/web/src/lib/server/reports.ts index 2b31cd2..08839fa 100644 --- a/web/src/lib/server/reports.ts +++ b/web/src/lib/server/reports.ts @@ -1,5 +1,6 @@ import type { RepoListOrder, RepoListStatusFilter } from "../repository-list" import { query } from "./database" +import { canonicalizeRepoFullName, canonicalizeRepoIdentity } from "./repo-key" export type RepoRetryState = "none" | "retrying" | "terminal" @@ -57,6 +58,7 @@ export type RepoListStatsRecord = { } export async function getRepoRecord(fullName: string): Promise { + const canonicalFullName = canonicalizeRepoFullName(fullName) const rows = await query( `select full_name, @@ -79,16 +81,17 @@ export async function getRepoRecord(fullName: string): Promise { - const fullName = `${owner}/${repo}` - const githubUrl = `https://github.com/${fullName}` + const canonical = canonicalizeRepoIdentity(owner, repo) await query( `insert into repo_reports ( @@ -122,7 +125,7 @@ export async function touchQueuedRepo(owner: string, repo: string, queuedNow: bo last_error_message = case when $5 = true then null else repo_reports.last_error_message end, failure_history = case when $5 = true then '[]'::jsonb else repo_reports.failure_history end, updated_at = now()`, - [fullName, owner, repo, githubUrl, queuedNow], + [canonical.fullName, canonical.owner, canonical.repo, canonical.githubUrl, queuedNow], ) } @@ -209,11 +212,12 @@ export async function listFailedRepoNames(): Promise { order by updated_at desc, full_name asc`, ) - return rows.map((row) => row.full_name) + return Array.from(new Set(rows.map((row) => canonicalizeRepoFullName(row.full_name)))) } export async function markReposQueued(fullNames: string[]): Promise { - if (fullNames.length === 0) { + const canonicalFullNames = Array.from(new Set(fullNames.map((fullName) => canonicalizeRepoFullName(fullName)))) + if (canonicalFullNames.length === 0) { return } @@ -231,7 +235,7 @@ export async function markReposQueued(fullNames: string[]): Promise { failure_history = '[]'::jsonb, updated_at = now(), last_requested_at = now() - where full_name = any($1::text[])`, - [fullNames], + where lower(full_name) = any($1::text[])`, + [canonicalFullNames], ) }