Skip to content

Commit 35471a9

Browse files
author
SentienceDEV
committed
tweaking scroll_by
1 parent 06b6861 commit 35471a9

File tree

3 files changed

+213
-7
lines changed

3 files changed

+213
-7
lines changed

predicate/agent_runtime.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,34 @@ async def get_url(self) -> str:
326326
self._cached_url = url
327327
return url
328328

329+
async def get_viewport_height(self) -> int:
330+
"""
331+
Get current viewport height in pixels.
332+
333+
Returns:
334+
Viewport height in pixels, or 800 as fallback if unavailable
335+
"""
336+
try:
337+
# Try refresh_page_info first (PlaywrightBackend)
338+
refresh_fn = getattr(self.backend, "refresh_page_info", None)
339+
if callable(refresh_fn):
340+
info = await refresh_fn()
341+
height = getattr(info, "height", None)
342+
if height and height > 0:
343+
return int(height)
344+
345+
# Try evaluating JavaScript directly
346+
eval_fn = getattr(self.backend, "eval", None)
347+
if callable(eval_fn):
348+
height = await eval_fn("window.innerHeight")
349+
if height and height > 0:
350+
return int(height)
351+
except Exception:
352+
pass
353+
354+
# Fallback to reasonable default
355+
return 800
356+
329357
# -------------------------------------------------------------------------
330358
# Action methods for PlannerExecutorAgent compatibility
331359
# -------------------------------------------------------------------------

predicate/agents/planner_executor_agent.py

Lines changed: 73 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,9 @@ class SnapshotEscalationConfig:
218218
219219
# Enable scroll-after-escalation to find elements below/above viewport
220220
config = SnapshotEscalationConfig(scroll_after_escalation=True, scroll_directions=("down", "up"))
221+
222+
# Custom scroll amount as fraction of viewport height (default: 0.4 = 40%)
223+
config = SnapshotEscalationConfig(scroll_viewport_fraction=0.5) # 50% of viewport
221224
"""
222225

223226
enabled: bool = True
@@ -228,6 +231,7 @@ class SnapshotEscalationConfig:
228231
scroll_after_escalation: bool = True
229232
scroll_max_attempts: int = 3 # Max scrolls per direction
230233
scroll_directions: tuple[str, ...] = ("down", "up") # Directions to try
234+
scroll_viewport_fraction: float = 0.4 # Scroll by 40% of viewport height (adaptive to screen size)
231235

232236

233237
@dataclass(frozen=True)
@@ -455,8 +459,9 @@ class AuthBoundaryConfig:
455459
"/log-in",
456460
"/auth",
457461
"/authenticate",
458-
"/ap/signin", # Amazon
459-
"/ap/register", # Amazon
462+
"/ap/signin", # Amazon sign-in
463+
"/ap/register", # Amazon registration
464+
"/ax/claim", # Amazon CAPTCHA/verification
460465
"/account/login",
461466
"/accounts/login",
462467
"/user/login",
@@ -1866,6 +1871,9 @@ async def _snapshot_with_escalation(
18661871
break
18671872

18681873
# Check element count - if sufficient, no need to escalate
1874+
# NOTE: Limit escalation is based on element COUNT only, not on whether
1875+
# a specific target element was found. Intent heuristics are only used
1876+
# for scroll-after-escalation AFTER limit escalation is exhausted.
18691877
elements = getattr(snap, "elements", []) or []
18701878
if len(elements) >= 10:
18711879
break
@@ -1909,15 +1917,35 @@ async def _snapshot_with_escalation(
19091917
if self.config.verbose:
19101918
print(f" [SNAPSHOT-ESCALATION] Target element not found, trying scroll-after-escalation...", flush=True)
19111919

1920+
# Get viewport height and calculate scroll delta
1921+
viewport_height = await runtime.get_viewport_height()
1922+
scroll_delta = viewport_height * cfg.scroll_viewport_fraction
1923+
19121924
for direction in cfg.scroll_directions:
1925+
# Map direction to dy (pixels): down=positive, up=negative
1926+
scroll_dy = scroll_delta if direction == "down" else -scroll_delta
1927+
19131928
for scroll_num in range(cfg.scroll_max_attempts):
19141929
if self.config.verbose:
19151930
print(f" [SNAPSHOT-ESCALATION] Scrolling {direction} ({scroll_num + 1}/{cfg.scroll_max_attempts})...", flush=True)
19161931

1917-
# Scroll
1918-
await runtime.scroll(direction)
1932+
# Scroll with deterministic verification
1933+
# scroll_by() returns False if scroll had no effect (reached page boundary)
1934+
scroll_effective = await runtime.scroll_by(
1935+
dy=scroll_dy,
1936+
verify=True,
1937+
min_delta_px=50.0,
1938+
js_fallback=True,
1939+
required=False, # Don't fail the task if scroll doesn't work
1940+
timeout_s=5.0,
1941+
)
19191942

1920-
# Wait for stabilization
1943+
if not scroll_effective:
1944+
if self.config.verbose:
1945+
print(f" [SNAPSHOT-ESCALATION] Scroll {direction} had no effect (reached boundary), skipping remaining attempts", flush=True)
1946+
break # No point trying more scrolls in this direction
1947+
1948+
# Wait for stabilization after successful scroll
19211949
if self.config.stabilize_enabled:
19221950
await asyncio.sleep(self.config.stabilize_poll_s)
19231951

@@ -2778,6 +2806,40 @@ async def _execute_step(
27782806

27792807
return outcome
27802808

2809+
# Pre-step auth boundary check: stop early if on signin page without credentials
2810+
# This prevents executing login steps that would fail or use fake credentials
2811+
if self.config.auth_boundary.enabled and self.config.auth_boundary.stop_on_auth:
2812+
is_auth_page = await self._detect_auth_boundary(runtime)
2813+
if is_auth_page:
2814+
if self.config.verbose:
2815+
print(f" [AUTH] Auth boundary detected at step start - stopping gracefully", flush=True)
2816+
2817+
outcome = StepOutcome(
2818+
step_id=step.id,
2819+
goal=step.goal,
2820+
status=StepStatus.SUCCESS, # Graceful stop = success
2821+
action_taken="AUTH_BOUNDARY_REACHED",
2822+
verification_passed=True,
2823+
error=self.config.auth_boundary.auth_success_message,
2824+
duration_ms=int((time.time() - start_time) * 1000),
2825+
url_before=pre_url,
2826+
url_after=pre_url,
2827+
)
2828+
2829+
self._emit_step_end(
2830+
step_id=step_id,
2831+
step_index=step_index,
2832+
step=step,
2833+
outcome=outcome,
2834+
pre_url=pre_url,
2835+
post_url=pre_url,
2836+
llm_response=None,
2837+
snapshot_digest=None,
2838+
)
2839+
2840+
# Return special outcome that signals run completion
2841+
return outcome
2842+
27812843
# Wait for page to stabilize before snapshot
27822844
if self.config.stabilize_enabled:
27832845
await runtime.stabilize()
@@ -3243,6 +3305,12 @@ async def run(
32433305
outcome = await self._execute_step(step, runtime, step_index)
32443306
step_outcomes.append(outcome)
32453307

3308+
# Check if auth boundary was reached at step start (graceful termination)
3309+
if outcome.action_taken == "AUTH_BOUNDARY_REACHED":
3310+
if self.config.verbose:
3311+
print(f" [AUTH] Run completed at authentication boundary", flush=True)
3312+
break # Graceful termination - no error
3313+
32463314
# Record checkpoint on success (for recovery)
32473315
if outcome.status in (StepStatus.SUCCESS, StepStatus.VISION_FALLBACK):
32483316
if self._recovery_state is not None and outcome.url_after:

tests/unit/test_planner_executor_agent.py

Lines changed: 112 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from typing import Any
1818

1919
from predicate.agents.planner_executor_agent import (
20+
AuthBoundaryConfig,
2021
ExecutorOverride,
2122
IntentHeuristics,
2223
ModalDismissalConfig,
@@ -687,6 +688,14 @@ def test_custom_scroll_settings(self) -> None:
687688
assert config.scroll_max_attempts == 5
688689
assert config.scroll_directions == ("down",)
689690

691+
def test_scroll_viewport_fraction_default(self) -> None:
692+
config = SnapshotEscalationConfig()
693+
assert config.scroll_viewport_fraction == 0.4
694+
695+
def test_scroll_viewport_fraction_custom(self) -> None:
696+
config = SnapshotEscalationConfig(scroll_viewport_fraction=0.5)
697+
assert config.scroll_viewport_fraction == 0.5
698+
690699
def test_scroll_directions_can_be_reordered(self) -> None:
691700
# Try up first, then down
692701
config = SnapshotEscalationConfig(
@@ -788,13 +797,26 @@ def __init__(self, elements: list[MockElement], url: str = "https://example.com"
788797

789798

790799
class MockRuntime:
791-
"""Mock runtime for testing scroll-after-escalation."""
800+
"""Mock runtime for testing scroll-after-escalation and auth boundary."""
792801

793-
def __init__(self, snapshots_by_scroll: dict[int, MockSnapshot] | None = None):
802+
def __init__(
803+
self,
804+
snapshots_by_scroll: dict[int, MockSnapshot] | None = None,
805+
url: str = "https://example.com",
806+
):
794807
self.scroll_count = 0
795808
self.scroll_directions: list[str] = []
796809
self.snapshots_by_scroll = snapshots_by_scroll or {}
797810
self.default_snapshot = MockSnapshot([MockElement(1, "button", "Submit")])
811+
self._url = url
812+
813+
async def get_url(self) -> str:
814+
"""Return the current URL for auth boundary detection."""
815+
return self._url
816+
817+
async def get_viewport_height(self) -> int:
818+
"""Return mock viewport height."""
819+
return 800 # Standard viewport height
798820

799821
async def snapshot(
800822
self,
@@ -810,6 +832,23 @@ async def scroll(self, direction: str = "down") -> None:
810832
self.scroll_count += 1
811833
self.scroll_directions.append(direction)
812834

835+
async def scroll_by(
836+
self,
837+
dy: float,
838+
*,
839+
verify: bool = True,
840+
min_delta_px: float = 50.0,
841+
js_fallback: bool = True,
842+
required: bool = True,
843+
timeout_s: float = 10.0,
844+
**kwargs: Any,
845+
) -> bool:
846+
"""Mock scroll_by with verification - always returns True (scroll effective)."""
847+
direction = "down" if dy > 0 else "up"
848+
self.scroll_count += 1
849+
self.scroll_directions.append(direction)
850+
return True # Scroll always effective in tests
851+
813852
async def stabilize(self) -> None:
814853
pass
815854

@@ -1136,3 +1175,74 @@ async def test_no_scroll_without_intent_heuristics(self) -> None:
11361175

11371176
# No scrolling should occur without intent_heuristics
11381177
assert runtime.scroll_count == 0
1178+
1179+
1180+
# ---------------------------------------------------------------------------
1181+
# Test AuthBoundaryConfig
1182+
# ---------------------------------------------------------------------------
1183+
1184+
1185+
class TestAuthBoundaryConfig:
1186+
"""Tests for AuthBoundaryConfig."""
1187+
1188+
def test_default_values(self) -> None:
1189+
config = AuthBoundaryConfig()
1190+
assert config.enabled is True
1191+
assert config.stop_on_auth is True
1192+
assert "/signin" in config.url_patterns
1193+
assert "/ap/signin" in config.url_patterns
1194+
assert "/ax/claim" in config.url_patterns # Amazon CAPTCHA
1195+
1196+
def test_default_url_patterns_include_common_patterns(self) -> None:
1197+
config = AuthBoundaryConfig()
1198+
expected_patterns = [
1199+
"/signin",
1200+
"/sign-in",
1201+
"/login",
1202+
"/log-in",
1203+
"/auth",
1204+
"/authenticate",
1205+
"/ap/signin", # Amazon
1206+
"/ap/register", # Amazon
1207+
"/ax/claim", # Amazon CAPTCHA
1208+
"/account/login",
1209+
"/accounts/login",
1210+
"/user/login",
1211+
]
1212+
for pattern in expected_patterns:
1213+
assert pattern in config.url_patterns, f"Missing pattern: {pattern}"
1214+
1215+
def test_can_be_disabled(self) -> None:
1216+
config = AuthBoundaryConfig(enabled=False)
1217+
assert config.enabled is False
1218+
1219+
def test_stop_on_auth_can_be_disabled(self) -> None:
1220+
config = AuthBoundaryConfig(stop_on_auth=False)
1221+
assert config.stop_on_auth is False
1222+
1223+
def test_custom_url_patterns(self) -> None:
1224+
config = AuthBoundaryConfig(
1225+
url_patterns=("/custom/login", "/my-signin"),
1226+
)
1227+
assert config.url_patterns == ("/custom/login", "/my-signin")
1228+
1229+
def test_custom_auth_success_message(self) -> None:
1230+
config = AuthBoundaryConfig(
1231+
auth_success_message="Custom: Login required",
1232+
)
1233+
assert config.auth_success_message == "Custom: Login required"
1234+
1235+
def test_planner_executor_config_has_auth_boundary(self) -> None:
1236+
config = PlannerExecutorConfig()
1237+
assert config.auth_boundary is not None
1238+
assert config.auth_boundary.enabled is True
1239+
1240+
def test_planner_executor_config_custom_auth_boundary(self) -> None:
1241+
config = PlannerExecutorConfig(
1242+
auth_boundary=AuthBoundaryConfig(
1243+
enabled=True,
1244+
url_patterns=("/custom-signin",),
1245+
stop_on_auth=True,
1246+
),
1247+
)
1248+
assert config.auth_boundary.url_patterns == ("/custom-signin",)

0 commit comments

Comments
 (0)