diff --git a/packages/core/src/tools/index.ts b/packages/core/src/tools/index.ts index 4cc8d567b..0c512322a 100644 --- a/packages/core/src/tools/index.ts +++ b/packages/core/src/tools/index.ts @@ -94,6 +94,20 @@ tools.set( }) ) +tools.set( + 'hover_element_by_index', + tool({ + description: 'Hover over an element by index without clicking or focusing it', + inputSchema: z.object({ + index: z.int().min(0), + }), + execute: async function (this: PageAgentCore, input) { + const result = await this.pageController.hoverElement(input.index) + return result.message + }, + }) +) + tools.set( 'input_text', tool({ diff --git a/packages/extension/src/agent/RemotePageController.content.ts b/packages/extension/src/agent/RemotePageController.content.ts index 55eff28b0..fe369bf22 100644 --- a/packages/extension/src/agent/RemotePageController.content.ts +++ b/packages/extension/src/agent/RemotePageController.content.ts @@ -77,6 +77,7 @@ export function initPageController() { case 'update_tree': case 'clean_up_highlights': case 'click_element': + case 'hover_element': case 'input_text': case 'select_option': case 'scroll': @@ -118,6 +119,8 @@ function getMethodName(action: string): string { case 'click_element': return 'clickElement' as const + case 'hover_element': + return 'hoverElement' as const case 'input_text': return 'inputText' as const case 'select_option': diff --git a/packages/extension/src/agent/RemotePageController.ts b/packages/extension/src/agent/RemotePageController.ts index 67f6336cb..9659f3ff4 100644 --- a/packages/extension/src/agent/RemotePageController.ts +++ b/packages/extension/src/agent/RemotePageController.ts @@ -117,6 +117,10 @@ export class RemotePageController { return res } + async hoverElement(...args: any[]): Promise { + return this.remoteCallDomAction('hover_element', args) + } + async inputText(...args: any[]): Promise { return this.remoteCallDomAction('input_text', args) } diff --git a/packages/page-controller/src/PageController.ts b/packages/page-controller/src/PageController.ts index 0369185b2..1c7fd05bf 100644 --- a/packages/page-controller/src/PageController.ts +++ b/packages/page-controller/src/PageController.ts @@ -9,6 +9,7 @@ import { clickElement, getElementByIndex, + hoverElement, inputTextElement, scrollHorizontally, scrollVertically, @@ -268,6 +269,28 @@ export class PageController extends EventTarget { } } + /** + * Hover element by index without clicking or focusing it + */ + async hoverElement(index: number): Promise { + try { + this.assertIndexed() + const element = getElementByIndex(this.selectorMap, index) + const elemText = this.elementTextMap.get(index) + await hoverElement(element) + + return { + success: true, + message: `✅ Hovered element (${elemText ?? index}).`, + } + } catch (error) { + return { + success: false, + message: `❌ Failed to hover element: ${error}`, + } + } + } + /** * Input text into element by index */ diff --git a/packages/page-controller/src/actions.ts b/packages/page-controller/src/actions.ts index 04ee72186..19d9d5cf7 100644 --- a/packages/page-controller/src/actions.ts +++ b/packages/page-controller/src/actions.ts @@ -43,6 +43,31 @@ export function getElementByIndex( let lastClickedElement: HTMLElement | null = null +function getElementCenter(element: Element): { x: number; y: number } { + const rect = element.getBoundingClientRect() + return { + x: rect.left + rect.width / 2, + y: rect.top + rect.height / 2, + } +} + +function dispatchHoverEvents(target: HTMLElement, x: number, y: number) { + const pointerOpts = { + bubbles: true, + cancelable: true, + clientX: x, + clientY: y, + pointerType: 'mouse', + } + const mouseOpts = { bubbles: true, cancelable: true, clientX: x, clientY: y, button: 0 } + + // Hover — pointer events first, then mouse events (spec order) + target.dispatchEvent(new PointerEvent('pointerover', pointerOpts)) + target.dispatchEvent(new PointerEvent('pointerenter', { ...pointerOpts, bubbles: false })) + target.dispatchEvent(new MouseEvent('mouseover', mouseOpts)) + target.dispatchEvent(new MouseEvent('mouseenter', { ...mouseOpts, bubbles: false })) +} + function blurLastClickedElement() { if (lastClickedElement) { lastClickedElement.dispatchEvent(new PointerEvent('pointerout', { bubbles: true })) @@ -54,6 +79,35 @@ function blurLastClickedElement() { } } +/** + * Simulate moving the pointer over an element without clicking or focusing it. + * + * @private Internal method, subject to change at any time. + */ +export async function hoverElement(element: HTMLElement) { + await scrollIntoViewIfNeeded(element) + const frame = element.ownerDocument.defaultView?.frameElement + if (frame) await scrollIntoViewIfNeeded(frame) + + const { x, y } = getElementCenter(element) + + await movePointerToElement(element, x, y) + await waitFor(0.1) + + // Hit-test to find the deepest element at hover coordinates, matching + // real browser behavior where events target the innermost element. + const doc = element.ownerDocument + await enablePassThrough() + const hitTarget = doc.elementFromPoint(x, y) + await disablePassThrough() + const target = + hitTarget instanceof HTMLElement && element.contains(hitTarget) ? hitTarget : element + + dispatchHoverEvents(target, x, y) + + await waitFor(0.2) +} + /** * Simulate a full click following W3C Pointer Events + UI Events spec order: * pointerover/enter → mouseover/enter → pointerdown → mousedown → [focus] → @@ -70,9 +124,7 @@ export async function clickElement(element: HTMLElement) { const frame = element.ownerDocument.defaultView?.frameElement if (frame) await scrollIntoViewIfNeeded(frame) - const rect = element.getBoundingClientRect() - const x = rect.left + rect.width / 2 - const y = rect.top + rect.height / 2 + const { x, y } = getElementCenter(element) await movePointerToElement(element, x, y) await clickPointer() @@ -90,23 +142,20 @@ export async function clickElement(element: HTMLElement) { const target = hitTarget instanceof HTMLElement && element.contains(hitTarget) ? hitTarget : element - const pointerOpts = { - bubbles: true, - cancelable: true, - clientX: x, - clientY: y, - pointerType: 'mouse', - } const mouseOpts = { bubbles: true, cancelable: true, clientX: x, clientY: y, button: 0 } - // Hover — pointer events first, then mouse events (spec order) - target.dispatchEvent(new PointerEvent('pointerover', pointerOpts)) - target.dispatchEvent(new PointerEvent('pointerenter', { ...pointerOpts, bubbles: false })) - target.dispatchEvent(new MouseEvent('mouseover', mouseOpts)) - target.dispatchEvent(new MouseEvent('mouseenter', { ...mouseOpts, bubbles: false })) + dispatchHoverEvents(target, x, y) // Press - target.dispatchEvent(new PointerEvent('pointerdown', pointerOpts)) + target.dispatchEvent( + new PointerEvent('pointerdown', { + bubbles: true, + cancelable: true, + clientX: x, + clientY: y, + pointerType: 'mouse', + }) + ) target.dispatchEvent(new MouseEvent('mousedown', mouseOpts)) // Focus is not part of the standard pointer/mouse event sequence @@ -115,7 +164,15 @@ export async function clickElement(element: HTMLElement) { element.focus({ preventScroll: true }) // Release - target.dispatchEvent(new PointerEvent('pointerup', pointerOpts)) + target.dispatchEvent( + new PointerEvent('pointerup', { + bubbles: true, + cancelable: true, + clientX: x, + clientY: y, + pointerType: 'mouse', + }) + ) target.dispatchEvent(new MouseEvent('mouseup', mouseOpts)) // Click — activation behavior (navigation, form submit, etc.) triggers