Skip to content

Commit 69c0fd3

Browse files
committed
Add functionality to remember information artificially, and add additional test coverage across supported platforms
1 parent 22fd98e commit 69c0fd3

File tree

21 files changed

+175
-1480
lines changed

21 files changed

+175
-1480
lines changed

.eslintignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,7 @@ node_modules
55
standalone
66
templates
77
.firebase
8+
scripts/agent-evals/output
9+
scripts/agent-evals/node_modules
10+
scripts/agent-evals/lib
11+
scripts/agent-evals/templates

.prettierignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
/scripts/frameworks-tests/vite-project/**
66
/scripts/webframeworks-deploy-tests/angular/**
77
/scripts/webframeworks-deploy-tests/nextjs/**
8+
/scripts/agent-evals/output/**
89
/src/frameworks/docs/**
910
/prompts
1011

npm-shrinkwrap.json

Lines changed: 8 additions & 1421 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,6 @@
241241
"eslint-plugin-brikke": "^2.2.2",
242242
"eslint-plugin-jsdoc": "^48.0.1",
243243
"eslint-plugin-prettier": "^5.1.3",
244-
"firebase": "^9.16.0",
245244
"firebase-admin": "^11.5.0",
246245
"firebase-functions": "^4.3.1",
247246
"google-discovery-to-swagger": "^2.1.0",

scripts/agent-evals/src/mock/mock-tools-main.ts

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ import { getFirebaseCliRoot } from "../runner/paths.js";
1212
// Path to the built MCP Tools implementation in the Firebase CLI, relative to
1313
// the repo's root
1414
const MCP_TOOLS_INDEX_PATH = "lib/mcp/tools/index.js";
15-
const CONFIGSTORE_INDEX_PATH = "lib/mcp/configstore.js";
1615
const LOG_FILE_PATH = path.join(os.homedir(), "Desktop", "agent_evals_mock_logs.txt");
1716
// Enable this to turn on file logging. This can be helpful for debugging
1817
// because console logs get swallowed
@@ -23,19 +22,6 @@ const originalRequire = Module.prototype.require;
2322
const requiredModule = originalRequire.apply(this, [id]);
2423
const absolutePath = Module.createRequire(this.filename).resolve(id);
2524
const pathRelativeToCliRoot = path.relative(getFirebaseCliRoot(), absolutePath);
26-
console.log(`[DEBUG] Requiring: ${pathRelativeToCliRoot} (Absolute: ${absolutePath})`);
27-
28-
// Mock configstore to avoid "Cannot find module ../package.json" error and side effects
29-
if (pathRelativeToCliRoot.endsWith(CONFIGSTORE_INDEX_PATH)) {
30-
logToFile(`Mocking configstore for: ${pathRelativeToCliRoot}`);
31-
return {
32-
configstore: {
33-
get: () => undefined,
34-
set: () => { },
35-
delete: () => { },
36-
},
37-
};
38-
}
3925

4026
if (!pathRelativeToCliRoot.endsWith(MCP_TOOLS_INDEX_PATH)) {
4127
return requiredModule;

scripts/agent-evals/src/mock/mocks/get-environment-mock.ts

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,10 @@ import { renderTemplate } from "../../../../../src/mcp/tools/core/get_environmen
33
import { toMockContent } from "../tool-mock-utils.js";
44

55
const PROJECT_DIR = "/Users/fakeuser/develop/fake-project";
6-
const IOS_APP_ID = `1:${DEFAULT_FIREBASE_PROJECT}:ios:abc123efj456`;
7-
const IOS_BUNDLE_ID = "com.firebase.fake.ios";
8-
const ANDROID_APP_ID = `1:${DEFAULT_FIREBASE_PROJECT}:android:abc123efj456`;
9-
const ANDROID_PACKAGE_NAME = "com.firebase.fake.android";
6+
export const IOS_APP_ID = `1:${DEFAULT_FIREBASE_PROJECT}:ios:abc123efj456`;
7+
export const IOS_BUNDLE_ID = "com.firebase.fake.ios";
8+
export const ANDROID_APP_ID = `1:${DEFAULT_FIREBASE_PROJECT}:android:abc123efj456`;
9+
export const ANDROID_PACKAGE_NAME = "com.firebase.fake.android";
1010

1111
const BASE_ENVIRONMENT_CONFIG = {
1212
projectId: DEFAULT_FIREBASE_PROJECT,
@@ -40,7 +40,7 @@ export const getEnvironmentWithFlutterApp = {
4040
...BASE_ENVIRONMENT_CONFIG,
4141
detectedAppIds: {
4242
[ANDROID_APP_ID]: ANDROID_PACKAGE_NAME,
43-
[IOS_APP_ID]: IOS_BUNDLE_ID
43+
[IOS_APP_ID]: IOS_BUNDLE_ID,
4444
},
4545
}),
4646
),

scripts/agent-evals/src/mock/tool-mocks.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ const allToolMocks = {
1212
nextJsWithProjectMock,
1313
getEnvironmentWithIosApp,
1414
getEnvironmentWithAndroidApp,
15-
getEnvironmentWithFlutterApp
15+
getEnvironmentWithFlutterApp,
1616
} as const;
1717

1818
export type ToolMockName = keyof typeof allToolMocks;

scripts/agent-evals/src/runner/agent-test-runner.ts

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,23 @@ export interface AgentTestMatchers {
2222
}
2323

2424
export interface AgentTestRunner extends AgentTestMatchers {
25+
/**
26+
* The directory where the test runner is operating
27+
*/
28+
readonly runDir: string;
29+
2530
/**
2631
* Simulates typing a string and waits for the turn to complete. It types one
2732
* character at a time to avoid paste detection that the Gemini CLI has
2833
*/
2934
type(text: string): Promise<void>;
3035

36+
/**
37+
* Simulates a previously remembered value. For Gemini CLI, this results in
38+
* saved values in the user's GEMINI.md file.
39+
*/
40+
remember(text: string): Promise<void>;
41+
3142
/**
3243
* Negated assertions
3344
*/

scripts/agent-evals/src/runner/gemini-cli-runner.ts

Lines changed: 26 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import { mkdirSync, writeFileSync, readFileSync, existsSync } from "fs";
22
import * as path from "path";
3+
import * as os from "os";
34
import { InteractiveCLI, poll } from "./interactive-cli";
45
import { AgentTestRunner, AgentTestMatchers } from "./agent-test-runner";
56
import {
@@ -13,6 +14,7 @@ import { throwFailure } from "./logging";
1314
import { getAgentEvalsRoot, RunDirectories } from "./paths";
1415
import { execSync } from "node:child_process";
1516
import { ToolMockName } from "../mock/tool-mocks";
17+
import { appendFileSync } from "node:fs";
1618

1719
const READY_PROMPT = "Type your message";
1820
const INSTALL_ID = "238efa5b-efb2-44bd-9dce-9b081532681c";
@@ -43,7 +45,8 @@ export class GeminiCliRunner implements AgentTestRunner {
4345
private readonly cli: InteractiveCLI;
4446
private readonly telemetryPath: string;
4547
private readonly telemetryTimeout = 15000;
46-
private readonly runDir: string;
48+
readonly runDir: string;
49+
private readonly userDir: string;
4750

4851
// Determines which tools to start from for this turn so we don't detect tool
4952
// calls from previous turns
@@ -99,6 +102,7 @@ export class GeminiCliRunner implements AgentTestRunner {
99102
this.writeGeminiInstallId(dirs.userDir);
100103

101104
this.runDir = dirs.runDir;
105+
this.userDir = dirs.userDir;
102106
this.cli = new InteractiveCLI("gemini", ["--yolo"], {
103107
cwd: dirs.runDir,
104108
readyPrompt: READY_PROMPT,
@@ -122,7 +126,22 @@ export class GeminiCliRunner implements AgentTestRunner {
122126
return this.cli.type(text);
123127
}
124128

129+
async remember(text: string): Promise<void> {
130+
const geminiDir = path.join(this.userDir, ".gemini");
131+
const geminiMdFile = path.join(geminiDir, "GEMINI.md");
132+
if (!existsSync(geminiDir)) {
133+
mkdirSync(geminiDir, { recursive: true });
134+
}
135+
136+
if (!existsSync(geminiMdFile)) {
137+
writeFileSync(geminiMdFile, "## Gemini Added Memories" + os.EOL);
138+
}
125139

140+
appendFileSync(geminiMdFile, text + os.EOL);
141+
await this.type("/memory refresh");
142+
// Due to https://github.com/google-gemini/gemini-cli/issues/10702, we need to start a new chat
143+
await this.type("/clear");
144+
}
126145

127146
async exit(): Promise<void> {
128147
await this.cli.kill();
@@ -155,7 +174,7 @@ export class GeminiCliRunner implements AgentTestRunner {
155174
await this.waitForTelemetryReady();
156175
let logs: string[] = [];
157176
const toolsCallsMade = await poll(() => {
158-
logs = []
177+
logs = [];
159178
const { success, messages } = this.checkToolCalls(tools);
160179
logs = [...messages];
161180
return success;
@@ -169,11 +188,10 @@ export class GeminiCliRunner implements AgentTestRunner {
169188
public async expectMemory(text: string | RegExp): Promise<void> {
170189
let logs: string[] = [];
171190
const memoryFound = await poll(() => {
172-
logs = []
191+
logs = [];
173192
const { success, messages } = this.checkMemory(text);
174193
logs = [...messages];
175-
return success
176-
194+
return success;
177195
}, this.telemetryTimeout);
178196

179197
if (!memoryFound) {
@@ -207,8 +225,8 @@ export class GeminiCliRunner implements AgentTestRunner {
207225
expectMemory: async (text: string | RegExp) => {
208226
const timeout = 1000;
209227
const found = await poll(() => {
210-
const { success } = this.checkMemory(text)
211-
return success
228+
const { success } = this.checkMemory(text);
229+
return success;
212230
}, timeout);
213231

214232
if (found) {
@@ -322,7 +340,7 @@ export class GeminiCliRunner implements AgentTestRunner {
322340
}
323341

324342
private checkMemory(text: string | RegExp): CheckResult {
325-
const geminiMdPath = path.join(this.runDir, ".gemini", "GEMINI.md");
343+
const geminiMdPath = path.join(this.userDir, ".gemini", "GEMINI.md");
326344
const messages: string[] = [];
327345
if (!existsSync(geminiMdPath)) {
328346
messages.push(`GEMINI.md file not found at ${geminiMdPath}`);

scripts/agent-evals/src/runner/paths.ts

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import * as path from "path";
22

3-
43
export type RunDirectories = { testDir: string; runDir: string; userDir: string };
54

65
export function getAgentEvalsRoot(): string {

0 commit comments

Comments
 (0)