fix for openai models

SentienceDEV · SentienceDEV · commit 0aa9f51b77d4 · 2026-03-15T22:25:56.000-07:00
diff --git a/predicate/agents/planner_executor_agent.py b/predicate/agents/planner_executor_agent.py
@@ -1158,14 +1158,22 @@ def build_executor_prompt(
     goal: str,
     intent: str | None,
     compact_context: str,
+    input_text: str | None = None,
 ) -> tuple[str, str]:
     """
     Build system and user prompts for the Executor LLM.
 
+    Args:
+        goal: Human-readable goal for this step
+        intent: Intent hint for element selection (optional)
+        compact_context: Compact representation of page elements
+        input_text: Text to type for TYPE_AND_SUBMIT actions (optional)
+
     Returns:
         (system_prompt, user_prompt)
     """
     intent_line = f"Intent: {intent}\n" if intent else ""
+    input_line = f"Text to type: \"{input_text}\"\n" if input_text else ""
 
     system = """You are a careful web automation executor.
 You must respond with exactly ONE action in this format:
@@ -1180,7 +1188,7 @@ def build_executor_prompt(
     user = f"""You are controlling a browser via element IDs.
 
 Goal: {goal}
-{intent_line}
+{intent_line}{input_line}
 Elements (ID|role|text|importance|clickable|...):
 {compact_context}
 
@@ -2311,6 +2319,7 @@ async def _scroll_to_find_element(
                     step.goal,
                     step.intent,
                     ctx.compact_representation,
+                    input_text=step.input,
                 )
                 resp = self.executor.generate(
                     sys_prompt,
@@ -2381,6 +2390,7 @@ async def _execute_optional_substeps(
                             substep.goal,
                             substep.intent,
                             ctx.compact_representation,
+                            input_text=substep.input,
                         )
                         resp = self.executor.generate(
                             sys_prompt,
@@ -2893,6 +2903,7 @@ async def _execute_step(
                         step.goal,
                         step.intent,
                         ctx.compact_representation,
+                        input_text=step.input,
                     )
 
                     if self.config.verbose:
@@ -3058,6 +3069,22 @@ async def _execute_step(
                 if self.config.verbose:
                     print(f"  [VERIFY] Predicate result: {'PASS' if verification_passed else 'FAIL'}", flush=True)
 
+                # For successful CLICK actions, check if a modal/drawer appeared and dismiss it
+                # This handles cases like Amazon's "Add Protection" drawer after Add to Cart
+                # where verification passes (e.g., "Proceed to checkout" button exists in drawer)
+                # but we need to dismiss the overlay before continuing
+                if verification_passed and original_action == "CLICK" and self.config.modal.enabled:
+                    try:
+                        post_snap = await runtime.snapshot(emit_trace=False)
+                        pre_elements = set(getattr(el, "id", 0) for el in (ctx.snapshot.elements or []))
+                        post_elements = set(getattr(el, "id", 0) for el in (post_snap.elements or []))
+                        new_elements = post_elements - pre_elements
+                        if len(new_elements) >= self.config.modal.min_new_elements:
+                            # Significant DOM change after CLICK - might be a modal/drawer
+                            await self._attempt_modal_dismissal(runtime, post_snap)
+                    except Exception:
+                        pass  # Ignore snapshot errors
+
                 # If verification failed and we have optional substeps, try them
                 if not verification_passed and step.optional_substeps:
                     substep_outcomes = await self._execute_optional_substeps(
diff --git a/predicate/llm_provider.py b/predicate/llm_provider.py
@@ -186,11 +186,18 @@ def generate(
             temperature: Sampling temperature (0.0 = deterministic, 1.0 = creative)
             max_tokens: Maximum tokens to generate
             json_mode: Enable JSON response format (requires model support)
-            **kwargs: Additional OpenAI API parameters
+            **kwargs: Additional OpenAI API parameters (max_new_tokens is mapped to max_tokens)
 
         Returns:
             LLMResponse object
         """
+        # Handle max_new_tokens -> max_tokens mapping for cross-provider compatibility
+        if "max_new_tokens" in kwargs:
+            if max_tokens is None:
+                max_tokens = kwargs.pop("max_new_tokens")
+            else:
+                kwargs.pop("max_new_tokens")  # max_tokens takes precedence
+
         messages = []
         if system_prompt:
             messages.append({"role": "system", "content": system_prompt})
@@ -415,11 +422,19 @@ def generate(
             user_prompt: User query
             temperature: Sampling temperature
             max_tokens: Maximum tokens to generate (required by Anthropic)
-            **kwargs: Additional Anthropic API parameters
+            **kwargs: Additional Anthropic API parameters (max_new_tokens is mapped to max_tokens)
 
         Returns:
             LLMResponse object
         """
+        # Handle max_new_tokens -> max_tokens mapping for cross-provider compatibility
+        if "max_new_tokens" in kwargs:
+            # Use max_new_tokens value if max_tokens is still at default
+            if max_tokens == 1024:
+                max_tokens = kwargs.pop("max_new_tokens")
+            else:
+                kwargs.pop("max_new_tokens")  # Explicit max_tokens takes precedence
+
         # Build API parameters
         api_params = {
             "model": self._model_name,
diff --git a/tests/unit/test_planner_executor_agent.py b/tests/unit/test_planner_executor_agent.py
@@ -27,11 +27,79 @@
     PredicateSpec,
     RecoveryNavigationConfig,
     SnapshotEscalationConfig,
+    build_executor_prompt,
     normalize_plan,
     validate_plan_smoothness,
 )
 
 
+# ---------------------------------------------------------------------------
+# Test build_executor_prompt
+# ---------------------------------------------------------------------------
+
+
+class TestBuildExecutorPrompt:
+    """Tests for the build_executor_prompt function."""
+
+    def test_basic_prompt_structure(self) -> None:
+        sys_prompt, user_prompt = build_executor_prompt(
+            goal="Click the submit button",
+            intent=None,
+            compact_context="123|button|Submit|100|1|0|-|0|",
+        )
+        assert "CLICK(<id>)" in sys_prompt
+        assert "TYPE(<id>" in sys_prompt
+        assert "Goal: Click the submit button" in user_prompt
+        assert "123|button|Submit" in user_prompt
+
+    def test_includes_intent_when_provided(self) -> None:
+        sys_prompt, user_prompt = build_executor_prompt(
+            goal="Click on product",
+            intent="Click the first product link",
+            compact_context="456|link|Product|100|1|0|-|0|",
+        )
+        assert "Intent: Click the first product link" in user_prompt
+
+    def test_no_intent_line_when_none(self) -> None:
+        sys_prompt, user_prompt = build_executor_prompt(
+            goal="Click button",
+            intent=None,
+            compact_context="789|button|OK|100|1|0|-|0|",
+        )
+        assert "Intent:" not in user_prompt
+
+    def test_includes_input_text_when_provided(self) -> None:
+        """Input text should be included for TYPE_AND_SUBMIT actions."""
+        sys_prompt, user_prompt = build_executor_prompt(
+            goal="Search for Logitech mouse",
+            intent=None,
+            compact_context="167|searchbox|Search|100|1|0|-|0|",
+            input_text="Logitech mouse",
+        )
+        assert 'Text to type: "Logitech mouse"' in user_prompt
+
+    def test_no_input_line_when_none(self) -> None:
+        """No input text line when not provided."""
+        sys_prompt, user_prompt = build_executor_prompt(
+            goal="Click button",
+            intent=None,
+            compact_context="123|button|Submit|100|1|0|-|0|",
+            input_text=None,
+        )
+        assert "Text to type:" not in user_prompt
+
+    def test_includes_both_intent_and_input(self) -> None:
+        """Both intent and input can be present."""
+        sys_prompt, user_prompt = build_executor_prompt(
+            goal="Search for laptop",
+            intent="search_box",
+            compact_context="100|searchbox|Search|100|1|0|-|0|",
+            input_text="laptop",
+        )
+        assert "Intent: search_box" in user_prompt
+        assert 'Text to type: "laptop"' in user_prompt
+
+
 # ---------------------------------------------------------------------------
 # Test normalize_plan
 # ---------------------------------------------------------------------------
@@ -1246,3 +1314,40 @@ def test_planner_executor_config_custom_auth_boundary(self) -> None:
             ),
         )
         assert config.auth_boundary.url_patterns == ("/custom-signin",)
+
+
+# ---------------------------------------------------------------------------
+# Test Modal Dismissal After Successful CLICK
+# ---------------------------------------------------------------------------
+
+
+class TestModalDismissalAfterSuccessfulClick:
+    """Tests for modal dismissal when verification passes after CLICK."""
+
+    def test_modal_dismissal_config_min_new_elements_default(self) -> None:
+        """Default min_new_elements should be 5 for DOM change detection."""
+        config = ModalDismissalConfig()
+        assert config.min_new_elements == 5
+
+    def test_modal_enabled_by_default_in_planner_executor_config(self) -> None:
+        """Modal dismissal should be enabled by default."""
+        config = PlannerExecutorConfig()
+        assert config.modal.enabled is True
+        assert config.modal.min_new_elements == 5
+
+    def test_modal_dismissal_patterns_include_no_thanks(self) -> None:
+        """'no thanks' should be in default patterns for drawer dismissal."""
+        config = ModalDismissalConfig()
+        # This is the pattern that dismisses Amazon's product protection drawer
+        assert "no thanks" in config.dismiss_patterns
+
+    def test_modal_config_has_required_fields_for_drawer_dismissal(self) -> None:
+        """Config should have all fields needed for drawer dismissal logic."""
+        config = ModalDismissalConfig()
+        # These are all used in _attempt_modal_dismissal
+        assert hasattr(config, "enabled")
+        assert hasattr(config, "dismiss_patterns")
+        assert hasattr(config, "dismiss_icons")
+        assert hasattr(config, "role_filter")
+        assert hasattr(config, "max_attempts")
+        assert hasattr(config, "min_new_elements")