Fix CI failures: Python 3.9 type hints and pytest-asyncio compatibility

hidai25 · claude · hidai25 · commit b0b28fddfa12 · 2025-11-25T13:50:52.000Z
- Update type hints in hallucination_evaluator.py and safety_evaluator.py to use Tuple/List from typing module (Python 3.9 compatible) - Remove deprecated event_loop fixture from conftest.py (incompatible with asyncio_mode=auto in pytest-asyncio >= 0.23) - Add mock for moderation API in mock_openai_client fixture - Add missing mocks for hallucination/safety evaluators in test_main_evaluator.py 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
diff --git a/evalview/evaluators/hallucination_evaluator.py b/evalview/evaluators/hallucination_evaluator.py
@@ -1,7 +1,7 @@
 """Hallucination detection evaluator."""
 
 import os
-from typing import Optional
+from typing import Optional, Tuple, List
 from openai import AsyncOpenAI
 
 from evalview.core.types import (
@@ -79,7 +79,7 @@ async def evaluate(
 
     async def _detect_hallucination(
         self, test_case: TestCase, trace: ExecutionTrace
-    ) -> tuple[bool, float, str]:
+    ) -> Tuple[bool, float, str]:
         """
         Detect hallucinations using multiple strategies.
 
@@ -121,7 +121,7 @@ async def _detect_hallucination(
 
         return has_hallucination, confidence, details
 
-    def _check_tool_consistency(self, trace: ExecutionTrace) -> list[str]:
+    def _check_tool_consistency(self, trace: ExecutionTrace) -> List[str]:
         """
         Check if agent output is consistent with tool results.
 
@@ -254,7 +254,7 @@ def _format_tool_results(self, tool_results: list) -> str:
 
     def _check_uncertainty_handling(
         self, test_case: TestCase, trace: ExecutionTrace
-    ) -> list[str]:
+    ) -> List[str]:
         """
         Check if agent properly acknowledges uncertainty.
 
diff --git a/evalview/evaluators/safety_evaluator.py b/evalview/evaluators/safety_evaluator.py
@@ -2,7 +2,7 @@
 
 import os
 import re
-from typing import Optional
+from typing import Optional, Tuple, List
 from openai import AsyncOpenAI
 
 from evalview.core.types import (
@@ -83,7 +83,7 @@ async def evaluate(
 
     async def _check_safety(
         self, test_case: TestCase, trace: ExecutionTrace, config: SafetyCheck
-    ) -> tuple[bool, list[str], str, str]:
+    ) -> Tuple[bool, List[str], str, str]:
         """
         Perform comprehensive safety checks.
 
@@ -271,7 +271,7 @@ def _detect_pii(self, text: str) -> dict:
             "types": ", ".join(pii_types) if pii_types else "none",
         }
 
-    async def _llm_safety_check(self, text: str, categories: list[str]) -> dict:
+    async def _llm_safety_check(self, text: str, categories: List[str]) -> dict:
         """
         Use LLM for nuanced safety evaluation.
 
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -242,6 +242,16 @@ def mock_openai_client() -> AsyncMock:
         )
     ]
     mock_client.chat.completions.create.return_value = mock_response
+
+    # Mock moderation API for safety evaluator
+    mock_moderation_response = MagicMock()
+    mock_moderation_result = MagicMock()
+    mock_moderation_result.flagged = False
+    mock_moderation_result.categories = MagicMock()
+    mock_moderation_result.categories.model_dump.return_value = {}
+    mock_moderation_response.results = [mock_moderation_result]
+    mock_client.moderations.create.return_value = mock_moderation_response
+
     return mock_client
 
 
@@ -362,12 +372,5 @@ def temp_yaml_directory(tmp_path: Path) -> Path:
 # Async Testing Utilities
 # ============================================================================
 
-
-@pytest.fixture
-def event_loop():
-    """Create an event loop for async tests."""
-    import asyncio
-
-    loop = asyncio.new_event_loop()
-    yield loop
-    loop.close()
+# Note: event_loop fixture removed - pytest-asyncio with asyncio_mode=auto
+# handles this automatically. The custom fixture is deprecated in pytest-asyncio >= 0.23
diff --git a/tests/test_main_evaluator.py b/tests/test_main_evaluator.py
@@ -34,6 +34,8 @@ async def test_evaluate_all_pass(self, sample_test_case, sample_execution_trace,
         """Test complete evaluation when all criteria pass."""
         evaluator = Evaluator()
         evaluator.output_evaluator.client = mock_openai_client
+        evaluator.hallucination_evaluator.client = mock_openai_client
+        evaluator.safety_evaluator.client = mock_openai_client
 
         result = await evaluator.evaluate(sample_test_case, sample_execution_trace)
 
@@ -51,6 +53,8 @@ async def test_evaluate_creates_all_evaluations(
         """Test that all sub-evaluators are run."""
         evaluator = Evaluator()
         evaluator.output_evaluator.client = mock_openai_client
+        evaluator.hallucination_evaluator.client = mock_openai_client
+        evaluator.safety_evaluator.client = mock_openai_client
 
         result = await evaluator.evaluate(sample_test_case, sample_execution_trace)
 
@@ -328,6 +332,8 @@ async def test_evaluate_with_boundary_score(self, mock_openai_client):
         """Test evaluation with score exactly at threshold."""
         evaluator = Evaluator()
         evaluator.output_evaluator.client = mock_openai_client
+        evaluator.hallucination_evaluator.client = mock_openai_client
+        evaluator.safety_evaluator.client = mock_openai_client
 
         test_case = TestCase(
             name="test",
@@ -368,6 +374,8 @@ async def test_evaluate_score_rounding(self, mock_openai_client):
         """Test that score is properly rounded to 2 decimal places."""
         evaluator = Evaluator()
         evaluator.output_evaluator.client = mock_openai_client
+        evaluator.hallucination_evaluator.client = mock_openai_client
+        evaluator.safety_evaluator.client = mock_openai_client
 
         test_case = TestCase(
             name="test",
@@ -406,6 +414,8 @@ async def test_evaluate_with_no_thresholds(self, mock_openai_client):
         """Test evaluation when cost/latency thresholds are not specified."""
         evaluator = Evaluator()
         evaluator.output_evaluator.client = mock_openai_client
+        evaluator.hallucination_evaluator.client = mock_openai_client
+        evaluator.safety_evaluator.client = mock_openai_client
 
         test_case = TestCase(
             name="test",