From 9890fc331f252f1ae2770ffc0d0ee49f0afd2440 Mon Sep 17 00:00:00 2001 From: JOhnsonKC201 Date: Wed, 24 Jun 2026 18:02:54 -0700 Subject: [PATCH] fix: stop nameMatchBonus awarding a spurious +10 on a whitespace-only query nameMatchBonus collapsed the query to a single token (queryLower) and then hit `nameLower.startsWith(queryLower)`. For an empty/whitespace-only query, queryLower is '' and `String.startsWith('')` is always true, so every node got a flat +10 'name match' bonus despite nothing matching its name. This is reachable end-to-end: searchNodes(' ') yields text='' and a truthy raw query, so the rescoring guard `text || query` lets the whitespace-only query through to nameMatchBonus. Guard with an early `if (!queryLower) return 0;`. Adds regression tests covering empty and whitespace-only queries (fail before, pass after) plus an exact-match case to confirm real queries are unaffected. --- __tests__/context-ranking.test.ts | 22 +++++++++++++++++++++- src/search/query-utils.ts | 13 ++++++++++--- 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/__tests__/context-ranking.test.ts b/__tests__/context-ranking.test.ts index ec9772086..cc2889845 100644 --- a/__tests__/context-ranking.test.ts +++ b/__tests__/context-ranking.test.ts @@ -16,7 +16,7 @@ import * as path from 'path'; import * as os from 'os'; import CodeGraph from '../src/index'; import { LOW_CONFIDENCE_MARKER } from '../src/context'; -import { isDistinctiveIdentifier, scorePathRelevance, deriveProjectNameTokens } from '../src/search/query-utils'; +import { isDistinctiveIdentifier, scorePathRelevance, deriveProjectNameTokens, nameMatchBonus } from '../src/search/query-utils'; describe('isDistinctiveIdentifier', () => { it('treats plain dictionary words as non-distinctive', () => { @@ -39,6 +39,26 @@ describe('isDistinctiveIdentifier', () => { }); }); +// An empty / whitespace-only query has no name to match, so the bonus must be +// 0. Before the guard, `queryLower` collapsed to '' and `nameLower.startsWith('')` +// (always true) awarded a spurious flat +10 to every node. This is reachable via +// `searchNodes(' ')`, where the rescoring guard `text || query` passes a +// whitespace-only query through to nameMatchBonus. +describe('nameMatchBonus empty/whitespace query', () => { + it('returns 0 for an empty query', () => { + expect(nameMatchBonus('authenticate', '')).toBe(0); + }); + + it('returns 0 for a whitespace-only query', () => { + expect(nameMatchBonus('authenticate', ' ')).toBe(0); + expect(nameMatchBonus('anything', '\t\n')).toBe(0); + }); + + it('still scores a real query (exact match unaffected by the guard)', () => { + expect(nameMatchBonus('authenticate', 'authenticate')).toBe(80); + }); +}); + // A single PascalCase query word (notably a project name a user naturally // includes) splits into sub-tokens that all match the SAME path segment; summed // per sub-token it boosted that path 4×, burying the rest of the query's stack diff --git a/src/search/query-utils.ts b/src/search/query-utils.ts index 1a7b121fc..91702979e 100644 --- a/src/search/query-utils.ts +++ b/src/search/query-utils.ts @@ -343,6 +343,16 @@ function matchesNonProductionDir(lowerPath: string): boolean { export function nameMatchBonus(nodeName: string, query: string): number { const nameLower = nodeName.toLowerCase(); + // Full query as a single token (for compound identifiers like "CacheBuilder") + const queryLower = query.replace(/[\s]+/g, '').toLowerCase(); + + // An empty / whitespace-only query carries no name to match against. Bail + // before the `startsWith` check below, since `anyString.startsWith('')` is + // always true and would otherwise award a spurious flat +10 to every node + // (reachable via `searchNodes(' ')`, where the rescoring guard `text || + // query` lets a whitespace-only query through). + if (!queryLower) return 0; + // Split query into word-level terms (handles "CacheBuilder build" → ["cache","builder","build"]) const rawTerms = query .replace(/([a-z])([A-Z])/g, '$1 $2') @@ -353,9 +363,6 @@ export function nameMatchBonus(nodeName: string, query: string): number { // Also keep original space-separated tokens for exact-term matching const queryTokens = query.split(/\s+/).map(t => t.toLowerCase()).filter(t => t.length >= 2); - // Full query as a single token (for compound identifiers like "CacheBuilder") - const queryLower = query.replace(/[\s]+/g, '').toLowerCase(); - // Exact match: query exactly equals the node name if (nameLower === queryLower) return 80;