Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions packages/core/src/tools/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,20 @@ tools.set(
})
)

tools.set(
'hover_element_by_index',
tool({
description: 'Hover over an element by index without clicking or focusing it',
inputSchema: z.object({
index: z.int().min(0),
}),
execute: async function (this: PageAgentCore, input) {
const result = await this.pageController.hoverElement(input.index)
return result.message
},
})
)

tools.set(
'input_text',
tool({
Expand Down
3 changes: 3 additions & 0 deletions packages/extension/src/agent/RemotePageController.content.ts
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ export function initPageController() {
case 'update_tree':
case 'clean_up_highlights':
case 'click_element':
case 'hover_element':
case 'input_text':
case 'select_option':
case 'scroll':
Expand Down Expand Up @@ -118,6 +119,8 @@ function getMethodName(action: string): string {

case 'click_element':
return 'clickElement' as const
case 'hover_element':
return 'hoverElement' as const
case 'input_text':
return 'inputText' as const
case 'select_option':
Expand Down
4 changes: 4 additions & 0 deletions packages/extension/src/agent/RemotePageController.ts
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,10 @@ export class RemotePageController {
return res
}

async hoverElement(...args: any[]): Promise<DomActionReturn> {
return this.remoteCallDomAction('hover_element', args)
}

async inputText(...args: any[]): Promise<DomActionReturn> {
return this.remoteCallDomAction('input_text', args)
}
Expand Down
23 changes: 23 additions & 0 deletions packages/page-controller/src/PageController.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import {
clickElement,
getElementByIndex,
hoverElement,
inputTextElement,
scrollHorizontally,
scrollVertically,
Expand Down Expand Up @@ -268,6 +269,28 @@ export class PageController extends EventTarget {
}
}

/**
* Hover element by index without clicking or focusing it
*/
async hoverElement(index: number): Promise<ActionResult> {
try {
this.assertIndexed()
const element = getElementByIndex(this.selectorMap, index)
const elemText = this.elementTextMap.get(index)
await hoverElement(element)

return {
success: true,
message: `✅ Hovered element (${elemText ?? index}).`,
}
} catch (error) {
return {
success: false,
message: `❌ Failed to hover element: ${error}`,
}
}
}

/**
* Input text into element by index
*/
Expand Down
91 changes: 74 additions & 17 deletions packages/page-controller/src/actions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,31 @@ export function getElementByIndex(

let lastClickedElement: HTMLElement | null = null

function getElementCenter(element: Element): { x: number; y: number } {
const rect = element.getBoundingClientRect()
return {
x: rect.left + rect.width / 2,
y: rect.top + rect.height / 2,
}
}

function dispatchHoverEvents(target: HTMLElement, x: number, y: number) {
const pointerOpts = {
bubbles: true,
cancelable: true,
clientX: x,
clientY: y,
pointerType: 'mouse',
}
const mouseOpts = { bubbles: true, cancelable: true, clientX: x, clientY: y, button: 0 }

// Hover — pointer events first, then mouse events (spec order)
target.dispatchEvent(new PointerEvent('pointerover', pointerOpts))
target.dispatchEvent(new PointerEvent('pointerenter', { ...pointerOpts, bubbles: false }))
target.dispatchEvent(new MouseEvent('mouseover', mouseOpts))
target.dispatchEvent(new MouseEvent('mouseenter', { ...mouseOpts, bubbles: false }))
}

function blurLastClickedElement() {
if (lastClickedElement) {
lastClickedElement.dispatchEvent(new PointerEvent('pointerout', { bubbles: true }))
Expand All @@ -54,6 +79,35 @@ function blurLastClickedElement() {
}
}

/**
* Simulate moving the pointer over an element without clicking or focusing it.
*
* @private Internal method, subject to change at any time.
*/
export async function hoverElement(element: HTMLElement) {
await scrollIntoViewIfNeeded(element)
const frame = element.ownerDocument.defaultView?.frameElement
if (frame) await scrollIntoViewIfNeeded(frame)

const { x, y } = getElementCenter(element)

await movePointerToElement(element, x, y)
await waitFor(0.1)

// Hit-test to find the deepest element at hover coordinates, matching
// real browser behavior where events target the innermost element.
const doc = element.ownerDocument
await enablePassThrough()
const hitTarget = doc.elementFromPoint(x, y)
await disablePassThrough()
const target =
hitTarget instanceof HTMLElement && element.contains(hitTarget) ? hitTarget : element

dispatchHoverEvents(target, x, y)

await waitFor(0.2)
}

/**
* Simulate a full click following W3C Pointer Events + UI Events spec order:
* pointerover/enter → mouseover/enter → pointerdown → mousedown → [focus] →
Expand All @@ -70,9 +124,7 @@ export async function clickElement(element: HTMLElement) {
const frame = element.ownerDocument.defaultView?.frameElement
if (frame) await scrollIntoViewIfNeeded(frame)

const rect = element.getBoundingClientRect()
const x = rect.left + rect.width / 2
const y = rect.top + rect.height / 2
const { x, y } = getElementCenter(element)

await movePointerToElement(element, x, y)
await clickPointer()
Expand All @@ -90,23 +142,20 @@ export async function clickElement(element: HTMLElement) {
const target =
hitTarget instanceof HTMLElement && element.contains(hitTarget) ? hitTarget : element

const pointerOpts = {
bubbles: true,
cancelable: true,
clientX: x,
clientY: y,
pointerType: 'mouse',
}
const mouseOpts = { bubbles: true, cancelable: true, clientX: x, clientY: y, button: 0 }

// Hover — pointer events first, then mouse events (spec order)
target.dispatchEvent(new PointerEvent('pointerover', pointerOpts))
target.dispatchEvent(new PointerEvent('pointerenter', { ...pointerOpts, bubbles: false }))
target.dispatchEvent(new MouseEvent('mouseover', mouseOpts))
target.dispatchEvent(new MouseEvent('mouseenter', { ...mouseOpts, bubbles: false }))
dispatchHoverEvents(target, x, y)

// Press
target.dispatchEvent(new PointerEvent('pointerdown', pointerOpts))
target.dispatchEvent(
new PointerEvent('pointerdown', {
bubbles: true,
cancelable: true,
clientX: x,
clientY: y,
pointerType: 'mouse',
})
)
target.dispatchEvent(new MouseEvent('mousedown', mouseOpts))

// Focus is not part of the standard pointer/mouse event sequence
Expand All @@ -115,7 +164,15 @@ export async function clickElement(element: HTMLElement) {
element.focus({ preventScroll: true })

// Release
target.dispatchEvent(new PointerEvent('pointerup', pointerOpts))
target.dispatchEvent(
new PointerEvent('pointerup', {
bubbles: true,
cancelable: true,
clientX: x,
clientY: y,
pointerType: 'mouse',
})
)
target.dispatchEvent(new MouseEvent('mouseup', mouseOpts))

// Click — activation behavior (navigation, form submit, etc.) triggers
Expand Down