Skip to content

Commit 0aa9f51

Browse files
author
SentienceDEV
committed
fix for openai models
1 parent 35471a9 commit 0aa9f51

File tree

3 files changed

+150
-3
lines changed

3 files changed

+150
-3
lines changed

predicate/agents/planner_executor_agent.py

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1158,14 +1158,22 @@ def build_executor_prompt(
11581158
goal: str,
11591159
intent: str | None,
11601160
compact_context: str,
1161+
input_text: str | None = None,
11611162
) -> tuple[str, str]:
11621163
"""
11631164
Build system and user prompts for the Executor LLM.
11641165
1166+
Args:
1167+
goal: Human-readable goal for this step
1168+
intent: Intent hint for element selection (optional)
1169+
compact_context: Compact representation of page elements
1170+
input_text: Text to type for TYPE_AND_SUBMIT actions (optional)
1171+
11651172
Returns:
11661173
(system_prompt, user_prompt)
11671174
"""
11681175
intent_line = f"Intent: {intent}\n" if intent else ""
1176+
input_line = f"Text to type: \"{input_text}\"\n" if input_text else ""
11691177

11701178
system = """You are a careful web automation executor.
11711179
You must respond with exactly ONE action in this format:
@@ -1180,7 +1188,7 @@ def build_executor_prompt(
11801188
user = f"""You are controlling a browser via element IDs.
11811189
11821190
Goal: {goal}
1183-
{intent_line}
1191+
{intent_line}{input_line}
11841192
Elements (ID|role|text|importance|clickable|...):
11851193
{compact_context}
11861194
@@ -2311,6 +2319,7 @@ async def _scroll_to_find_element(
23112319
step.goal,
23122320
step.intent,
23132321
ctx.compact_representation,
2322+
input_text=step.input,
23142323
)
23152324
resp = self.executor.generate(
23162325
sys_prompt,
@@ -2381,6 +2390,7 @@ async def _execute_optional_substeps(
23812390
substep.goal,
23822391
substep.intent,
23832392
ctx.compact_representation,
2393+
input_text=substep.input,
23842394
)
23852395
resp = self.executor.generate(
23862396
sys_prompt,
@@ -2893,6 +2903,7 @@ async def _execute_step(
28932903
step.goal,
28942904
step.intent,
28952905
ctx.compact_representation,
2906+
input_text=step.input,
28962907
)
28972908

28982909
if self.config.verbose:
@@ -3058,6 +3069,22 @@ async def _execute_step(
30583069
if self.config.verbose:
30593070
print(f" [VERIFY] Predicate result: {'PASS' if verification_passed else 'FAIL'}", flush=True)
30603071

3072+
# For successful CLICK actions, check if a modal/drawer appeared and dismiss it
3073+
# This handles cases like Amazon's "Add Protection" drawer after Add to Cart
3074+
# where verification passes (e.g., "Proceed to checkout" button exists in drawer)
3075+
# but we need to dismiss the overlay before continuing
3076+
if verification_passed and original_action == "CLICK" and self.config.modal.enabled:
3077+
try:
3078+
post_snap = await runtime.snapshot(emit_trace=False)
3079+
pre_elements = set(getattr(el, "id", 0) for el in (ctx.snapshot.elements or []))
3080+
post_elements = set(getattr(el, "id", 0) for el in (post_snap.elements or []))
3081+
new_elements = post_elements - pre_elements
3082+
if len(new_elements) >= self.config.modal.min_new_elements:
3083+
# Significant DOM change after CLICK - might be a modal/drawer
3084+
await self._attempt_modal_dismissal(runtime, post_snap)
3085+
except Exception:
3086+
pass # Ignore snapshot errors
3087+
30613088
# If verification failed and we have optional substeps, try them
30623089
if not verification_passed and step.optional_substeps:
30633090
substep_outcomes = await self._execute_optional_substeps(

predicate/llm_provider.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -186,11 +186,18 @@ def generate(
186186
temperature: Sampling temperature (0.0 = deterministic, 1.0 = creative)
187187
max_tokens: Maximum tokens to generate
188188
json_mode: Enable JSON response format (requires model support)
189-
**kwargs: Additional OpenAI API parameters
189+
**kwargs: Additional OpenAI API parameters (max_new_tokens is mapped to max_tokens)
190190
191191
Returns:
192192
LLMResponse object
193193
"""
194+
# Handle max_new_tokens -> max_tokens mapping for cross-provider compatibility
195+
if "max_new_tokens" in kwargs:
196+
if max_tokens is None:
197+
max_tokens = kwargs.pop("max_new_tokens")
198+
else:
199+
kwargs.pop("max_new_tokens") # max_tokens takes precedence
200+
194201
messages = []
195202
if system_prompt:
196203
messages.append({"role": "system", "content": system_prompt})
@@ -415,11 +422,19 @@ def generate(
415422
user_prompt: User query
416423
temperature: Sampling temperature
417424
max_tokens: Maximum tokens to generate (required by Anthropic)
418-
**kwargs: Additional Anthropic API parameters
425+
**kwargs: Additional Anthropic API parameters (max_new_tokens is mapped to max_tokens)
419426
420427
Returns:
421428
LLMResponse object
422429
"""
430+
# Handle max_new_tokens -> max_tokens mapping for cross-provider compatibility
431+
if "max_new_tokens" in kwargs:
432+
# Use max_new_tokens value if max_tokens is still at default
433+
if max_tokens == 1024:
434+
max_tokens = kwargs.pop("max_new_tokens")
435+
else:
436+
kwargs.pop("max_new_tokens") # Explicit max_tokens takes precedence
437+
423438
# Build API parameters
424439
api_params = {
425440
"model": self._model_name,

tests/unit/test_planner_executor_agent.py

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,79 @@
2727
PredicateSpec,
2828
RecoveryNavigationConfig,
2929
SnapshotEscalationConfig,
30+
build_executor_prompt,
3031
normalize_plan,
3132
validate_plan_smoothness,
3233
)
3334

3435

36+
# ---------------------------------------------------------------------------
37+
# Test build_executor_prompt
38+
# ---------------------------------------------------------------------------
39+
40+
41+
class TestBuildExecutorPrompt:
42+
"""Tests for the build_executor_prompt function."""
43+
44+
def test_basic_prompt_structure(self) -> None:
45+
sys_prompt, user_prompt = build_executor_prompt(
46+
goal="Click the submit button",
47+
intent=None,
48+
compact_context="123|button|Submit|100|1|0|-|0|",
49+
)
50+
assert "CLICK(<id>)" in sys_prompt
51+
assert "TYPE(<id>" in sys_prompt
52+
assert "Goal: Click the submit button" in user_prompt
53+
assert "123|button|Submit" in user_prompt
54+
55+
def test_includes_intent_when_provided(self) -> None:
56+
sys_prompt, user_prompt = build_executor_prompt(
57+
goal="Click on product",
58+
intent="Click the first product link",
59+
compact_context="456|link|Product|100|1|0|-|0|",
60+
)
61+
assert "Intent: Click the first product link" in user_prompt
62+
63+
def test_no_intent_line_when_none(self) -> None:
64+
sys_prompt, user_prompt = build_executor_prompt(
65+
goal="Click button",
66+
intent=None,
67+
compact_context="789|button|OK|100|1|0|-|0|",
68+
)
69+
assert "Intent:" not in user_prompt
70+
71+
def test_includes_input_text_when_provided(self) -> None:
72+
"""Input text should be included for TYPE_AND_SUBMIT actions."""
73+
sys_prompt, user_prompt = build_executor_prompt(
74+
goal="Search for Logitech mouse",
75+
intent=None,
76+
compact_context="167|searchbox|Search|100|1|0|-|0|",
77+
input_text="Logitech mouse",
78+
)
79+
assert 'Text to type: "Logitech mouse"' in user_prompt
80+
81+
def test_no_input_line_when_none(self) -> None:
82+
"""No input text line when not provided."""
83+
sys_prompt, user_prompt = build_executor_prompt(
84+
goal="Click button",
85+
intent=None,
86+
compact_context="123|button|Submit|100|1|0|-|0|",
87+
input_text=None,
88+
)
89+
assert "Text to type:" not in user_prompt
90+
91+
def test_includes_both_intent_and_input(self) -> None:
92+
"""Both intent and input can be present."""
93+
sys_prompt, user_prompt = build_executor_prompt(
94+
goal="Search for laptop",
95+
intent="search_box",
96+
compact_context="100|searchbox|Search|100|1|0|-|0|",
97+
input_text="laptop",
98+
)
99+
assert "Intent: search_box" in user_prompt
100+
assert 'Text to type: "laptop"' in user_prompt
101+
102+
35103
# ---------------------------------------------------------------------------
36104
# Test normalize_plan
37105
# ---------------------------------------------------------------------------
@@ -1246,3 +1314,40 @@ def test_planner_executor_config_custom_auth_boundary(self) -> None:
12461314
),
12471315
)
12481316
assert config.auth_boundary.url_patterns == ("/custom-signin",)
1317+
1318+
1319+
# ---------------------------------------------------------------------------
1320+
# Test Modal Dismissal After Successful CLICK
1321+
# ---------------------------------------------------------------------------
1322+
1323+
1324+
class TestModalDismissalAfterSuccessfulClick:
1325+
"""Tests for modal dismissal when verification passes after CLICK."""
1326+
1327+
def test_modal_dismissal_config_min_new_elements_default(self) -> None:
1328+
"""Default min_new_elements should be 5 for DOM change detection."""
1329+
config = ModalDismissalConfig()
1330+
assert config.min_new_elements == 5
1331+
1332+
def test_modal_enabled_by_default_in_planner_executor_config(self) -> None:
1333+
"""Modal dismissal should be enabled by default."""
1334+
config = PlannerExecutorConfig()
1335+
assert config.modal.enabled is True
1336+
assert config.modal.min_new_elements == 5
1337+
1338+
def test_modal_dismissal_patterns_include_no_thanks(self) -> None:
1339+
"""'no thanks' should be in default patterns for drawer dismissal."""
1340+
config = ModalDismissalConfig()
1341+
# This is the pattern that dismisses Amazon's product protection drawer
1342+
assert "no thanks" in config.dismiss_patterns
1343+
1344+
def test_modal_config_has_required_fields_for_drawer_dismissal(self) -> None:
1345+
"""Config should have all fields needed for drawer dismissal logic."""
1346+
config = ModalDismissalConfig()
1347+
# These are all used in _attempt_modal_dismissal
1348+
assert hasattr(config, "enabled")
1349+
assert hasattr(config, "dismiss_patterns")
1350+
assert hasattr(config, "dismiss_icons")
1351+
assert hasattr(config, "role_filter")
1352+
assert hasattr(config, "max_attempts")
1353+
assert hasattr(config, "min_new_elements")

0 commit comments

Comments
 (0)