Skip to content

Commit 6282bcf

Browse files
authored
Mock MCP tools during Agent Evals (#9380)
1 parent 4445ee9 commit 6282bcf

File tree

11 files changed

+259
-13
lines changed

11 files changed

+259
-13
lines changed

scripts/agent-evals/README.md

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,3 +78,40 @@ describe("<prompt-or-tool-name>", function (this: Mocha.Suite) {
7878
});
7979
});
8080
```
81+
82+
## Adding Mocks for MCP Tools
83+
84+
Mocks applied to MCP tools will completely replace their impelementation with a static output string.
85+
86+
1. Add your mocked tools to the `scripts/agent-evals/src/mock/mocks` folder, eg. `scripts/agent-evals/src/mock/mocks/next-js-with-project-mock.ts`
87+
88+
```
89+
import { toMockContent } from "../tool-mock-utils.js";
90+
91+
export const environment_nice_day_mock = {
92+
firebase_get_environment: toMockContent("Tell the user to have a nice day"),
93+
} as const;
94+
```
95+
96+
2. Add the new set of mocked tools to the map:
97+
98+
```
99+
import { environment_nice_day_mock } from "./mocks/next-js-with-project-mock.js";
100+
101+
const allToolMocks = {
102+
// New tool mock
103+
environment_nice_day_mock,
104+
} as const;
105+
```
106+
107+
3. Start using the mock in your test:
108+
109+
Note: If you apply multiple mocks to the same tool, later values in the list will take precedence.
110+
111+
```
112+
const run: AgentTestRunner = await startAgentTest(this, {
113+
templateName: "next-app-hello-world",
114+
// Add the name of your mock here
115+
toolMocks: ["environment_nice_day_mock"],
116+
});
117+
```
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
export const DEFAULT_FIREBASE_USER = "[email protected]";
2+
export const DEFAULT_FIREBASE_PROJECT_NAME = "My Project";
3+
export const DEFAULT_FIREBASE_PROJECT = "my-project";
4+
export const DEFAULT_FIREBASE_PROJECT_NUMBER = "1234321098765";
5+
export const DEFAULT_FIREBASE_WEB_APP_NAME = "My Web App";
6+
export const DEFAULT_FIREBASE_WEB_APP_ID = `1:${DEFAULT_FIREBASE_PROJECT_NUMBER}:web:84d1de6d7ees1e0be7949c`;
7+
export const DEFAULT_FIREBASE_WEB_APP_API_KEY = "aaaaaaaa-ffff-4444-bbbb-ffffffffffff";
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
import { Module } from "module";
2+
import path from "path";
3+
import fs from "fs";
4+
import os from "os";
5+
import { getFirebaseCliRoot } from "../runner/paths.js";
6+
import { getToolMocks } from "./tool-mocks.js";
7+
8+
//
9+
// This file is run as a node --import parameter before the Firebase CLI to
10+
// patch the implementation for tools with the mocked implementation
11+
//
12+
13+
// Path to the built MCP Tools implementation in the Firebase CLI, relative to
14+
// the repo's root
15+
const MCP_TOOLS_INDEX_PATH = "lib/mcp/tools/index.js";
16+
const LOG_FILE_PATH = path.join(os.homedir(), "Desktop", "agent_evals_mock_logs.txt");
17+
// Enable this to turn on file logging. This can be helpful for debugging
18+
// because console logs get swallowed
19+
const ENABLE_FILE_LOGGING = false;
20+
21+
const mocks = getToolMocks();
22+
23+
const originalRequire = Module.prototype.require;
24+
Module.prototype.require = function (id: string) {
25+
const requiredModule = originalRequire.apply(this, [id]);
26+
const absolutePath = Module.createRequire(this.filename).resolve(id);
27+
const pathRelativeToCliRoot = path.relative(getFirebaseCliRoot(), absolutePath);
28+
if (!pathRelativeToCliRoot.endsWith(MCP_TOOLS_INDEX_PATH)) {
29+
return requiredModule;
30+
}
31+
32+
logToFile(
33+
`Creating proxy implementation for file: ${pathRelativeToCliRoot} with tool mocks: ${JSON.stringify(Object.keys(mocks))}`,
34+
);
35+
36+
return new Proxy(requiredModule, {
37+
get(target, prop, receiver) {
38+
if (prop !== "availableTools") {
39+
return Reflect.get(target, prop, receiver);
40+
}
41+
42+
logToFile(`Intercepting access to 'availableTools'`);
43+
44+
const originalAvailableTools = Reflect.get(target, prop, receiver);
45+
return async (ctx: any, features?: string[]): Promise<any[]> => {
46+
const realTools = await originalAvailableTools(ctx, features);
47+
if (!Array.isArray(realTools)) {
48+
logToFile(`Error: Real tools is not an array: ${JSON.stringify(realTools)}`);
49+
return realTools;
50+
}
51+
52+
const finalTools = realTools.map((tool) => {
53+
const toolName = tool.mcp.name;
54+
if (!mocks[toolName]) {
55+
return tool;
56+
}
57+
logToFile(`Applying mock for tool: ${toolName}`);
58+
return {
59+
...tool,
60+
fn: async () => mocks[toolName],
61+
};
62+
});
63+
64+
return finalTools;
65+
};
66+
},
67+
});
68+
};
69+
70+
function logToFile(message: string) {
71+
if (!ENABLE_FILE_LOGGING) {
72+
return;
73+
}
74+
const timestamp = new Date().toISOString();
75+
const logMessage = `[${timestamp}] ${message}\n`;
76+
try {
77+
fs.appendFileSync(LOG_FILE_PATH, logMessage, "utf8");
78+
} catch (err) {
79+
console.error(`[AGENT-EVALS-MOCK-ERROR] Failed to write log to ${LOG_FILE_PATH}:`, err);
80+
console.error(`[AGENT-EVALS-MOCK-ERROR] Original message: ${message}`);
81+
}
82+
}
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
import {
2+
DEFAULT_FIREBASE_PROJECT,
3+
DEFAULT_FIREBASE_PROJECT_NAME,
4+
DEFAULT_FIREBASE_PROJECT_NUMBER,
5+
DEFAULT_FIREBASE_USER,
6+
DEFAULT_FIREBASE_WEB_APP_ID,
7+
DEFAULT_FIREBASE_WEB_APP_NAME,
8+
DEFAULT_FIREBASE_WEB_APP_API_KEY,
9+
} from "../../data/index.js";
10+
import { toMockContent } from "../tool-mock-utils.js";
11+
12+
export const nextJsWithProjectMock = {
13+
firebase_login: toMockContent(`Successfully logged in as ${DEFAULT_FIREBASE_USER}`),
14+
15+
firebase_get_environment: toMockContent(`# Environment Information
16+
17+
Project Directory:
18+
/Users/samedson/Firebase/firebase-tools/scripts/agent-evals/output/2025-10-24_15-36-06-588Z/-firebase-init-backend-app-2c27e75e3e5d809c/repo
19+
Project Config Path: <NO CONFIG PRESENT>
20+
Active Project ID: ${DEFAULT_FIREBASE_PROJECT}
21+
Gemini in Firebase Terms of Service: Accepted
22+
Authenticated User: ${DEFAULT_FIREBASE_USER}
23+
Detected App IDs: <NONE>
24+
Available Project Aliases (format: '[alias]: [projectId]'): <NONE>
25+
26+
No firebase.json file was found.
27+
28+
If this project does not use Firebase services that require a firebase.json file, no action is necessary.
29+
30+
If this project uses Firebase services that require a firebase.json file, the user will most likely want to:
31+
32+
a) Change the project directory using the 'firebase_update_environment' tool to select a directory with a 'firebase.json' file in it, or
33+
b) Initialize a new Firebase project directory using the 'firebase_init' tool.
34+
35+
Confirm with the user before taking action.`),
36+
37+
firebase_update_environment: toMockContent(
38+
`- Updated active project to '${DEFAULT_FIREBASE_PROJECT}'\n`,
39+
),
40+
41+
firebase_list_projects: toMockContent(`
42+
- projectId: ${DEFAULT_FIREBASE_PROJECT}
43+
projectNumber: '${DEFAULT_FIREBASE_PROJECT_NUMBER}'
44+
displayName: ${DEFAULT_FIREBASE_PROJECT_NAME}
45+
name: projects/${DEFAULT_FIREBASE_PROJECT}
46+
resources:
47+
hostingSite: ${DEFAULT_FIREBASE_PROJECT}
48+
state: ACTIVE
49+
etag: 1_99999999-7777-4444-8888-dddddddddddd
50+
`),
51+
52+
firebase_list_apps: toMockContent(`
53+
- name: 'projects/${DEFAULT_FIREBASE_PROJECT}/webApps/${DEFAULT_FIREBASE_WEB_APP_ID}'
54+
displayName: ${DEFAULT_FIREBASE_WEB_APP_NAME}
55+
platform: WEB
56+
appId: '${DEFAULT_FIREBASE_WEB_APP_ID}'
57+
namespace: 000000000000000000000000000000000000000000000000
58+
apiKeyId: ${DEFAULT_FIREBASE_WEB_APP_API_KEY}
59+
state: ACTIVE
60+
expireTime: '1970-01-01T00:00:00Z'
61+
`),
62+
63+
firebase_get_sdk_config: toMockContent(
64+
`{"projectId":"${DEFAULT_FIREBASE_PROJECT}","appId":"${DEFAULT_FIREBASE_WEB_APP_ID}","storageBucket":"${DEFAULT_FIREBASE_PROJECT}.firebasestorage.app","apiKey":"${DEFAULT_FIREBASE_WEB_APP_API_KEY}","authDomain":"${DEFAULT_FIREBASE_PROJECT}.firebaseapp.com","messagingSenderId":"${DEFAULT_FIREBASE_PROJECT_NUMBER}","projectNumber":"${DEFAULT_FIREBASE_PROJECT_NUMBER}","version":"2"}`,
65+
),
66+
} as const;
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
import { ToolMock } from "./tool-mocks.js";
2+
3+
export function toMockContent(data: any): ToolMock {
4+
return { content: [{ type: "text", text: data }] };
5+
}
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
import { CallToolResult } from "@modelcontextprotocol/sdk/types.js";
2+
import { nextJsWithProjectMock } from "./mocks/next-js-with-project-mock.js";
3+
4+
export type ToolMock = CallToolResult;
5+
6+
const allToolMocks = {
7+
nextJsWithProjectMock,
8+
} as const;
9+
10+
export type ToolMockName = keyof typeof allToolMocks;
11+
12+
function isToolMockName(name: string): name is ToolMockName {
13+
return name in allToolMocks;
14+
}
15+
16+
export function getToolMocks(): Record<string, ToolMock> {
17+
const mockNames = process.env.TOOL_MOCKS;
18+
let mocks = {};
19+
for (const mockName of mockNames?.split(",") || []) {
20+
if (isToolMockName(mockName)) {
21+
mocks = {
22+
...mocks,
23+
...allToolMocks[mockName], // No more error!
24+
};
25+
} else {
26+
console.error(`Invalid mock name provided: "${mockName}"`);
27+
}
28+
}
29+
return mocks;
30+
}

scripts/agent-evals/src/runner/gemini-cli-runner.ts

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ import {
1010
} from "./tool-matcher.js";
1111
import fs from "fs";
1212
import { throwFailure } from "./logging.js";
13+
import { getAgentEvalsRoot } from "./paths.js";
14+
import { execSync } from "node:child_process";
15+
import { ToolMockName } from "../mock/tool-mocks.js";
1316

1417
const READY_PROMPT = "Type your message";
1518

@@ -43,9 +46,12 @@ export class GeminiCliRunner implements AgentTestRunner {
4346
private readonly testName: string,
4447
testDir: string,
4548
runDir: string,
49+
toolMocks: ToolMockName[],
4650
) {
4751
// Create a settings file to point the CLI to a local telemetry log
4852
this.telemetryPath = path.join(testDir, "telemetry.log");
53+
const mockPath = path.resolve(path.join(getAgentEvalsRoot(), "lib/mock/mock-tools-main.js"));
54+
const firebasePath = execSync("which firebase").toString().trim();
4955
const settings = {
5056
general: {
5157
disableAutoUpdate: true,
@@ -58,8 +64,11 @@ export class GeminiCliRunner implements AgentTestRunner {
5864
},
5965
mcpServers: {
6066
firebase: {
61-
command: "firebase",
62-
args: ["experimental:mcp"],
67+
command: "node",
68+
args: ["--import", mockPath, firebasePath, "experimental:mcp"],
69+
env: {
70+
TOOL_MOCKS: `${toolMocks?.join(",") || ""}`,
71+
},
6372
},
6473
},
6574
};

scripts/agent-evals/src/runner/index.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import { GeminiCliRunner } from "./gemini-cli-runner.js";
66
import { buildFirebaseCli, clearUserMcpServers } from "./setup.js";
77
import { addCleanup } from "../helpers/cleanup.js";
88
import { TemplateName, copyTemplate, buildTemplates } from "../template/index.js";
9+
import { ToolMockName } from "../mock/tool-mocks.js";
910

1011
export * from "./agent-test-runner.js";
1112

@@ -21,6 +22,9 @@ export interface AgentTestOptions {
2122
// Name of the template directory to copy into this test run. Leave this empty
2223
// to run the test in an empty directory
2324
templateName?: TemplateName;
25+
// List of MCP Tool mocks to apply, in order. Later mocks overwrite earlier
26+
// mocks.
27+
toolMocks?: ToolMockName[];
2428
}
2529

2630
export async function startAgentTest(
@@ -37,7 +41,7 @@ export async function startAgentTest(
3741
copyTemplate(options.templateName, runDir);
3842
}
3943

40-
const run = new GeminiCliRunner(testName, testDir, runDir);
44+
const run = new GeminiCliRunner(testName, testDir, runDir, options?.toolMocks || []);
4145
await run.waitForReadyPrompt();
4246

4347
addCleanup(async () => {
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
import path from "path";
2+
import { fileURLToPath } from "url";
3+
4+
export function getAgentEvalsRoot(): string {
5+
const thisFilePath = path.dirname(fileURLToPath(import.meta.url));
6+
return path.resolve(path.join(thisFilePath, "..", ".."));
7+
}
8+
9+
export function getFirebaseCliRoot(): string {
10+
return path.resolve(path.dirname(fileURLToPath(import.meta.url)), "..", "..", "..", "..");
11+
}

scripts/agent-evals/src/runner/setup.ts

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import { exec } from "child_process";
2-
import path from "path";
32
import { promisify } from "util";
4-
import { fileURLToPath } from "url";
3+
import { getFirebaseCliRoot } from "./paths.js";
54

65
const execPromise = promisify(exec);
76

@@ -10,13 +9,7 @@ export async function buildFirebaseCli() {
109
console.log("Skipping Firebase CLI build because process.env.SKIP_REBUILD");
1110
return;
1211
}
13-
const firebaseCliRoot = path.resolve(
14-
path.dirname(fileURLToPath(import.meta.url)),
15-
"..",
16-
"..",
17-
"..",
18-
"..",
19-
);
12+
const firebaseCliRoot = getFirebaseCliRoot();
2013
console.log(`Building Firebase CLI at ${firebaseCliRoot}`);
2114
await execPromise("./scripts/clean-install.sh", { cwd: firebaseCliRoot });
2215
}

0 commit comments

Comments
 (0)