Skip to content

Commit bd60309

Browse files
authored
feat: incremental aria snapshot (#37795)
1 parent e553669 commit bd60309

File tree

20 files changed

+480
-80
lines changed

20 files changed

+480
-80
lines changed

packages/injected/src/ariaSnapshot.ts

Lines changed: 88 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -495,21 +495,36 @@ function matchesNodeDeep(root: AriaNode, template: AriaTemplateNode, collectAll:
495495
return results;
496496
}
497497

498-
export function renderAriaTree(ariaSnapshot: AriaSnapshot, publicOptions: AriaTreeOptions): string {
498+
function buildByRefMap(root: AriaNode | undefined, map: Map<string, AriaNode> = new Map()): Map<string, AriaNode> {
499+
if (root?.ref)
500+
map.set(root.ref, root);
501+
for (const child of root?.children || []) {
502+
if (typeof child !== 'string')
503+
buildByRefMap(child, map);
504+
}
505+
return map;
506+
}
507+
508+
function arePropsEqual(a: AriaNode, b: AriaNode): boolean {
509+
const aKeys = Object.keys(a.props);
510+
const bKeys = Object.keys(b.props);
511+
return aKeys.length === bKeys.length && aKeys.every(k => a.props[k] === b.props[k]);
512+
}
513+
514+
export function renderAriaTree(ariaSnapshot: AriaSnapshot, publicOptions: AriaTreeOptions, previous?: AriaSnapshot): string {
499515
const options = toInternalOptions(publicOptions);
500516
const lines: string[] = [];
501517
const includeText = options.renderStringsAsRegex ? textContributesInfo : () => true;
502518
const renderString = options.renderStringsAsRegex ? convertToBestGuessRegex : (str: string) => str;
503-
const visit = (ariaNode: AriaNode | string, parentAriaNode: AriaNode | null, indent: string, renderCursorPointer: boolean) => {
504-
if (typeof ariaNode === 'string') {
505-
if (parentAriaNode && !includeText(parentAriaNode, ariaNode))
506-
return;
507-
const text = yamlEscapeValueIfNeeded(renderString(ariaNode));
508-
if (text)
509-
lines.push(indent + '- text: ' + text);
510-
return;
511-
}
519+
const previousByRef = buildByRefMap(previous?.root);
512520

521+
const visitText = (text: string, indent: string) => {
522+
const escaped = yamlEscapeValueIfNeeded(renderString(text));
523+
if (escaped)
524+
lines.push(indent + '- text: ' + escaped);
525+
};
526+
527+
const createKey = (ariaNode: AriaNode, renderCursorPointer: boolean): string => {
513528
let key = ariaNode.role;
514529
// Yaml has a limit of 1024 characters per key, and we leave some space for role and attributes.
515530
if (ariaNode.name && ariaNode.name.length <= 900) {
@@ -538,41 +553,84 @@ export function renderAriaTree(ariaSnapshot: AriaSnapshot, publicOptions: AriaTr
538553
if (ariaNode.selected === true)
539554
key += ` [selected]`;
540555

541-
let inCursorPointer = false;
542556
if (ariaNode.ref) {
543557
key += ` [ref=${ariaNode.ref}]`;
544-
if (renderCursorPointer && hasPointerCursor(ariaNode)) {
545-
inCursorPointer = true;
558+
if (renderCursorPointer && hasPointerCursor(ariaNode))
546559
key += ' [cursor=pointer]';
547-
}
548560
}
561+
return key;
562+
};
563+
564+
const getSingleInlinedTextChild = (ariaNode: AriaNode | undefined): string | undefined => {
565+
return ariaNode?.children.length === 1 && typeof ariaNode.children[0] === 'string' && !Object.keys(ariaNode.props).length ? ariaNode.children[0] : undefined;
566+
};
549567

568+
const visit = (ariaNode: AriaNode, indent: string, renderCursorPointer: boolean, previousNode: AriaNode | undefined): { unchanged: boolean } => {
569+
if (ariaNode.ref)
570+
previousNode = previousByRef.get(ariaNode.ref);
571+
572+
const linesBefore = lines.length;
573+
const key = createKey(ariaNode, renderCursorPointer);
550574
const escapedKey = indent + '- ' + yamlEscapeKeyIfNeeded(key);
551-
const hasProps = !!Object.keys(ariaNode.props).length;
552-
if (!ariaNode.children.length && !hasProps) {
575+
const inCursorPointer = renderCursorPointer && !!ariaNode.ref && hasPointerCursor(ariaNode);
576+
const singleInlinedTextChild = getSingleInlinedTextChild(ariaNode);
577+
578+
// Whether ariaNode's subtree is the same as previousNode's, and can be replaced with just a ref.
579+
let unchanged = !!previousNode && key === createKey(previousNode, renderCursorPointer) && arePropsEqual(ariaNode, previousNode);
580+
581+
if (!ariaNode.children.length && !Object.keys(ariaNode.props).length) {
582+
// Leaf node without children.
553583
lines.push(escapedKey);
554-
} else if (ariaNode.children.length === 1 && typeof ariaNode.children[0] === 'string' && !hasProps) {
555-
const text = includeText(ariaNode, ariaNode.children[0]) ? renderString(ariaNode.children[0] as string) : null;
556-
if (text)
557-
lines.push(escapedKey + ': ' + yamlEscapeValueIfNeeded(text));
584+
} else if (singleInlinedTextChild !== undefined) {
585+
// Leaf node with just some text inside.
586+
// Unchanged when the previous node also had the same single text child.
587+
unchanged = unchanged && getSingleInlinedTextChild(previousNode) === singleInlinedTextChild;
588+
589+
const shouldInclude = includeText(ariaNode, singleInlinedTextChild);
590+
if (shouldInclude)
591+
lines.push(escapedKey + ': ' + yamlEscapeValueIfNeeded(renderString(singleInlinedTextChild)));
558592
else
559593
lines.push(escapedKey);
560594
} else {
595+
// Node with (optional) props and some children.
561596
lines.push(escapedKey + ':');
562597
for (const [name, value] of Object.entries(ariaNode.props))
563598
lines.push(indent + ' - /' + name + ': ' + yamlEscapeValueIfNeeded(value));
564-
for (const child of ariaNode.children || [])
565-
visit(child, ariaNode, indent + ' ', renderCursorPointer && !inCursorPointer);
599+
600+
// All children must be the same.
601+
unchanged = unchanged && previousNode?.children.length === ariaNode.children.length;
602+
603+
const childIndent = indent + ' ';
604+
for (let childIndex = 0 ; childIndex < ariaNode.children.length; childIndex++) {
605+
const child = ariaNode.children[childIndex];
606+
if (typeof child === 'string') {
607+
unchanged = unchanged && previousNode?.children[childIndex] === child;
608+
if (includeText(ariaNode, child))
609+
visitText(child, childIndent);
610+
} else {
611+
const previousChild = previousNode?.children[childIndex];
612+
const childResult = visit(child, childIndent, renderCursorPointer && !inCursorPointer, typeof previousChild !== 'string' ? previousChild : undefined);
613+
unchanged = unchanged && childResult.unchanged;
614+
}
615+
}
566616
}
617+
618+
if (unchanged && ariaNode.ref) {
619+
// Replace the whole subtree with a single reference.
620+
lines.splice(linesBefore);
621+
lines.push(indent + `- ref=${ariaNode.ref} [unchanged]`);
622+
}
623+
624+
return { unchanged };
567625
};
568626

569-
const ariaNode = ariaSnapshot.root;
570-
if (ariaNode.role === 'fragment') {
571-
// Render fragment.
572-
for (const child of ariaNode.children || [])
573-
visit(child, ariaNode, '', !!options.renderCursorPointer);
574-
} else {
575-
visit(ariaNode, null, '', !!options.renderCursorPointer);
627+
// Do not render the root fragment, just its children.
628+
const nodesToRender = ariaSnapshot.root.role === 'fragment' ? ariaSnapshot.root.children : [ariaSnapshot.root];
629+
for (const nodeToRender of nodesToRender) {
630+
if (typeof nodeToRender === 'string')
631+
visitText(nodeToRender, '');
632+
else
633+
visit(nodeToRender, '', !!options.renderCursorPointer, undefined);
576634
}
577635
return lines.join('\n');
578636
}
@@ -636,5 +694,5 @@ function textContributesInfo(node: AriaNode, text: string): boolean {
636694
}
637695

638696
function hasPointerCursor(ariaNode: AriaNode): boolean {
639-
return ariaNode.box.style?.cursor === 'pointer';
697+
return ariaNode.box.cursor === 'pointer';
640698
}

packages/injected/src/domUtils.ts

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -112,28 +112,32 @@ export type Box = {
112112
visible: boolean;
113113
inline: boolean;
114114
rect?: DOMRect;
115-
style?: CSSStyleDeclaration;
115+
// Note: we do not store the CSSStyleDeclaration object, because it is a live object
116+
// and changes values over time. This does not work for caching or comparing to the
117+
// old values. Instead, store all the properties separately.
118+
cursor?: CSSStyleDeclaration['cursor'];
116119
};
117120

118121
export function computeBox(element: Element): Box {
119122
// Note: this logic should be similar to waitForDisplayedAtStablePosition() to avoid surprises.
120123
const style = getElementComputedStyle(element);
121124
if (!style)
122125
return { visible: true, inline: false };
126+
const cursor = style.cursor;
123127
if (style.display === 'contents') {
124128
// display:contents is not rendered itself, but its child nodes are.
125129
for (let child = element.firstChild; child; child = child.nextSibling) {
126130
if (child.nodeType === 1 /* Node.ELEMENT_NODE */ && isElementVisible(child as Element))
127-
return { visible: true, inline: false, style };
131+
return { visible: true, inline: false, cursor };
128132
if (child.nodeType === 3 /* Node.TEXT_NODE */ && isVisibleTextNode(child as Text))
129-
return { visible: true, inline: true, style };
133+
return { visible: true, inline: true, cursor };
130134
}
131-
return { visible: false, inline: false, style };
135+
return { visible: false, inline: false, cursor };
132136
}
133137
if (!isElementStyleVisibilityVisible(element, style))
134-
return { style, visible: false, inline: false };
138+
return { cursor, visible: false, inline: false };
135139
const rect = element.getBoundingClientRect();
136-
return { rect, style, visible: rect.width > 0 && rect.height > 0, inline: style.display === 'inline' };
140+
return { rect, cursor, visible: rect.width > 0 && rect.height > 0, inline: style.display === 'inline' };
137141
}
138142

139143
export function isElementVisible(element: Element): boolean {

packages/injected/src/injectedScript.ts

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,8 @@ export class InjectedScript {
9292
readonly window: Window & typeof globalThis;
9393
readonly document: Document;
9494
readonly consoleApi: ConsoleAPI;
95-
private _lastAriaSnapshot: AriaSnapshot | undefined;
95+
private _lastAriaSnapshotForTrack = new Map<string, AriaSnapshot>();
96+
private _lastAriaSnapshotForQuery: AriaSnapshot | undefined;
9697

9798
// Recorder must use any external dependencies through InjectedScript.
9899
// Otherwise it will end up with a copy of all modules it uses, and any
@@ -299,11 +300,18 @@ export class InjectedScript {
299300
return new Set<Element>(result.map(r => r.element));
300301
}
301302

302-
ariaSnapshot(node: Node, options: AriaTreeOptions): string {
303+
ariaSnapshot(node: Node, options: AriaTreeOptions & { track?: string, incremental?: boolean }): string {
303304
if (node.nodeType !== Node.ELEMENT_NODE)
304305
throw this.createStacklessError('Can only capture aria snapshot of Element nodes.');
305-
this._lastAriaSnapshot = generateAriaTree(node as Element, options);
306-
return renderAriaTree(this._lastAriaSnapshot, options);
306+
const ariaSnapshot = generateAriaTree(node as Element, options);
307+
let previous: AriaSnapshot | undefined;
308+
if (options.incremental)
309+
previous = options.track ? this._lastAriaSnapshotForTrack.get(options.track) : this._lastAriaSnapshotForQuery;
310+
const result = renderAriaTree(ariaSnapshot, options, previous);
311+
if (options.track)
312+
this._lastAriaSnapshotForTrack.set(options.track, ariaSnapshot);
313+
this._lastAriaSnapshotForQuery = ariaSnapshot;
314+
return result;
307315
}
308316

309317
ariaSnapshotForRecorder(): { ariaSnapshot: string, refs: Map<Element, string> } {
@@ -692,7 +700,7 @@ export class InjectedScript {
692700

693701
_createAriaRefEngine() {
694702
const queryAll = (root: SelectorRoot, selector: string): Element[] => {
695-
const result = this._lastAriaSnapshot?.elements?.get(selector);
703+
const result = this._lastAriaSnapshotForQuery?.elements?.get(selector);
696704
return result && result.isConnected ? [result] : [];
697705
};
698706
return { queryAll };

packages/playwright-core/src/client/page.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -847,8 +847,8 @@ export class Page extends ChannelOwner<channels.PageChannel> implements api.Page
847847
return result.pdf;
848848
}
849849

850-
async _snapshotForAI(options: TimeoutOptions = {}): Promise<string> {
851-
const result = await this._channel.snapshotForAI({ timeout: this._timeoutSettings.timeout(options) });
850+
async _snapshotForAI(options: TimeoutOptions & { track?: string, mode?: 'full' | 'incremental' } = {}): Promise<string> {
851+
const result = await this._channel.snapshotForAI({ timeout: this._timeoutSettings.timeout(options), track: options.track, mode: options.mode });
852852
return result.snapshot;
853853
}
854854
}

packages/playwright-core/src/protocol/validator.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1465,6 +1465,8 @@ scheme.PageRequestsResult = tObject({
14651465
requests: tArray(tChannel(['Request'])),
14661466
});
14671467
scheme.PageSnapshotForAIParams = tObject({
1468+
track: tOptional(tString),
1469+
mode: tOptional(tEnum(['full', 'incremental'])),
14681470
timeout: tFloat,
14691471
});
14701472
scheme.PageSnapshotForAIResult = tObject({

packages/playwright-core/src/server/dispatchers/pageDispatcher.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -352,7 +352,7 @@ export class PageDispatcher extends Dispatcher<Page, channels.PageChannel, Brows
352352
}
353353

354354
async snapshotForAI(params: channels.PageSnapshotForAIParams, progress: Progress): Promise<channels.PageSnapshotForAIResult> {
355-
return { snapshot: await this._page.snapshotForAI(progress) };
355+
return { snapshot: await this._page.snapshotForAI(progress, params) };
356356
}
357357

358358
async bringToFront(params: channels.PageBringToFrontParams, progress: Progress): Promise<void> {

packages/playwright-core/src/server/page.ts

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -859,9 +859,9 @@ export class Page extends SdkObject {
859859
await Promise.all(this.frames().map(frame => frame.hideHighlight().catch(() => {})));
860860
}
861861

862-
async snapshotForAI(progress: Progress): Promise<string> {
862+
async snapshotForAI(progress: Progress, options: { track?: string, mode?: 'full' | 'incremental' }): Promise<string> {
863863
this.lastSnapshotFrameIds = [];
864-
const snapshot = await snapshotFrameForAI(progress, this.mainFrame(), 0, this.lastSnapshotFrameIds);
864+
const snapshot = await snapshotFrameForAI(progress, this.mainFrame(), 0, this.lastSnapshotFrameIds, options);
865865
return snapshot.join('\n');
866866
}
867867
}
@@ -1037,18 +1037,18 @@ class FrameThrottler {
10371037
}
10381038
}
10391039

1040-
async function snapshotFrameForAI(progress: Progress, frame: frames.Frame, frameOrdinal: number, frameIds: string[]): Promise<string[]> {
1040+
async function snapshotFrameForAI(progress: Progress, frame: frames.Frame, frameOrdinal: number, frameIds: string[], options: { track?: string, mode?: 'full' | 'incremental' }): Promise<string[]> {
10411041
// Only await the topmost navigations, inner frames will be empty when racing.
10421042
const snapshot = await frame.retryWithProgressAndTimeouts(progress, [1000, 2000, 4000, 8000], async continuePolling => {
10431043
try {
10441044
const context = await progress.race(frame._utilityContext());
10451045
const injectedScript = await progress.race(context.injectedScript());
1046-
const snapshotOrRetry = await progress.race(injectedScript.evaluate((injected, refPrefix) => {
1046+
const snapshotOrRetry = await progress.race(injectedScript.evaluate((injected, options) => {
10471047
const node = injected.document.body;
10481048
if (!node)
10491049
return true;
1050-
return injected.ariaSnapshot(node, { mode: 'ai', refPrefix });
1051-
}, frameOrdinal ? 'f' + frameOrdinal : ''));
1050+
return injected.ariaSnapshot(node, { mode: 'ai', ...options });
1051+
}, { refPrefix: frameOrdinal ? 'f' + frameOrdinal : '', incremental: options.mode === 'incremental', track: options.track }));
10521052
if (snapshotOrRetry === true)
10531053
return continuePolling;
10541054
return snapshotOrRetry;
@@ -1080,7 +1080,7 @@ async function snapshotFrameForAI(progress: Progress, frame: frames.Frame, frame
10801080
const frameOrdinal = frameIds.length + 1;
10811081
frameIds.push(child.frame._id);
10821082
try {
1083-
const childSnapshot = await snapshotFrameForAI(progress, child.frame, frameOrdinal, frameIds);
1083+
const childSnapshot = await snapshotFrameForAI(progress, child.frame, frameOrdinal, frameIds, options);
10841084
result.push(line + ':', ...childSnapshot.map(l => leadingSpace + ' ' + l));
10851085
} catch {
10861086
result.push(line);

packages/playwright/src/mcp/browser/response.ts

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ export class Response {
2828
private _code: string[] = [];
2929
private _images: { contentType: string, data: Buffer }[] = [];
3030
private _context: Context;
31-
private _includeSnapshot = false;
31+
private _includeSnapshot: 'none' | 'full' | 'incremental' = 'none';
3232
private _includeTabs = false;
3333
private _tabSnapshot: TabSnapshot | undefined;
3434

@@ -75,8 +75,8 @@ export class Response {
7575
return this._images;
7676
}
7777

78-
setIncludeSnapshot() {
79-
this._includeSnapshot = true;
78+
setIncludeSnapshot(full?: 'full') {
79+
this._includeSnapshot = full ?? 'incremental';
8080
}
8181

8282
setIncludeTabs() {
@@ -86,8 +86,8 @@ export class Response {
8686
async finish() {
8787
// All the async snapshotting post-action is happening here.
8888
// Everything below should race against modal states.
89-
if (this._includeSnapshot && this._context.currentTab())
90-
this._tabSnapshot = await this._context.currentTabOrDie().captureSnapshot();
89+
if (this._includeSnapshot !== 'none' && this._context.currentTab())
90+
this._tabSnapshot = await this._context.currentTabOrDie().captureSnapshot(this._includeSnapshot);
9191
for (const tab of this._context.tabs())
9292
await tab.updateTitle();
9393
}
@@ -126,7 +126,7 @@ ${this._code.join('\n')}
126126
}
127127

128128
// List browser tabs.
129-
if (this._includeSnapshot || this._includeTabs)
129+
if (this._includeSnapshot !== 'none' || this._includeTabs)
130130
response.push(...renderTabsMarkdown(this._context.tabs(), this._includeTabs));
131131

132132
// Add snapshot if provided.

packages/playwright/src/mcp/browser/tab.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -217,10 +217,10 @@ export class Tab extends EventEmitter<TabEventsInterface> {
217217
return this._requests;
218218
}
219219

220-
async captureSnapshot(): Promise<TabSnapshot> {
220+
async captureSnapshot(mode: 'full' | 'incremental'): Promise<TabSnapshot> {
221221
let tabSnapshot: TabSnapshot | undefined;
222222
const modalStates = await this._raceAgainstModalStates(async () => {
223-
const snapshot = await this.page._snapshotForAI();
223+
const snapshot = await this.page._snapshotForAI({ mode, track: 'response' });
224224
tabSnapshot = {
225225
url: this.page.url(),
226226
title: await this.page.title(),

packages/playwright/src/mcp/browser/tools/snapshot.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ const snapshot = defineTool({
3030

3131
handle: async (context, params, response) => {
3232
await context.ensureTab();
33-
response.setIncludeSnapshot();
33+
response.setIncludeSnapshot('full');
3434
},
3535
});
3636

0 commit comments

Comments
 (0)