hidai25
diff --git a/‎SECURITY.md‎
Lines changed: 65 additions & 0 deletions b/‎SECURITY.md‎
Lines changed: 65 additions & 0 deletions
diff --git a/‎evalview/adapters/base.py‎
Lines changed: 35 additions & 2 deletions b/‎evalview/adapters/base.py‎
Lines changed: 35 additions & 2 deletions
diff --git a/‎evalview/adapters/crewai_adapter.py‎
Lines changed: 15 additions & 2 deletions b/‎evalview/adapters/crewai_adapter.py‎
Lines changed: 15 additions & 2 deletions
diff --git a/‎evalview/adapters/http_adapter.py‎
Lines changed: 20 additions & 3 deletions b/‎evalview/adapters/http_adapter.py‎
Lines changed: 20 additions & 3 deletions
diff --git a/‎evalview/adapters/langgraph_adapter.py‎
Lines changed: 15 additions & 2 deletions b/‎evalview/adapters/langgraph_adapter.py‎
Lines changed: 15 additions & 2 deletions
diff --git a/‎evalview/adapters/tapescope_adapter.py‎
Lines changed: 18 additions & 2 deletions b/‎evalview/adapters/tapescope_adapter.py‎
Lines changed: 18 additions & 2 deletions
@@ -76,24 +76,89 @@ When using EvalView, please follow these security best practices:
 - **Review dependencies**: Use tools like `pip-audit` to check for known vulnerabilities
 - **Lock versions**: Use `requirements.txt` or `poetry.lock` to pin dependency versions
 
+## Built-in Security Features
+
+### SSRF (Server-Side Request Forgery) Protection
+
+EvalView includes built-in protection against SSRF attacks. By default in production mode, requests to the following destinations are blocked:
+
+- **Private IP ranges**: 10.0.0.0/8, 172.16.0.0/12, 192.168.0.0/16
+- **Loopback addresses**: localhost, 127.0.0.0/8
+- **Cloud metadata endpoints**: 169.254.169.254 (AWS, GCP, Azure)
+- **Link-local addresses**: 169.254.0.0/16
+- **Internal hostnames**: kubernetes.default, metadata.google.internal
+
+#### Configuration
+
+For local development, SSRF protection allows private URLs by default. To enable strict mode in production:
+
+```yaml
+# .evalview/config.yaml
+allow_private_urls: false  # Block private/internal networks (recommended for production)
+```
+
+#### Security Considerations
+
+- When running EvalView in production environments, set `allow_private_urls: false`
+- Be cautious when loading test cases from untrusted sources - they can specify arbitrary endpoints
+- Review test case YAML files before running them in sensitive environments
+
+### LLM Prompt Injection Mitigation
+
+The LLM-as-judge feature includes protections against prompt injection attacks:
+
+1. **Output Sanitization**: Agent outputs are sanitized before being sent to the LLM judge
+   - Long outputs are truncated (default: 10,000 chars) to prevent token exhaustion
+   - Control characters are removed
+   - Common prompt delimiters are escaped (```, ###, ---, XML tags, etc.)
+
+2. **Boundary Markers**: Untrusted content is wrapped in unique cryptographic boundary markers
+
+3. **Security Instructions**: The judge prompt explicitly instructs the LLM to:
+   - Ignore any instructions within the agent output
+   - Only evaluate content quality, not meta-instructions
+   - Not follow commands embedded in the evaluated content
+
+#### Limitations
+
+While these mitigations reduce risk, they cannot completely prevent sophisticated prompt injection attacks. Consider:
+
+- Agent outputs could still influence LLM evaluation through subtle manipulation
+- Very long outputs may be truncated, potentially hiding issues
+- New prompt injection techniques may bypass current protections
+
+For high-stakes evaluations, consider:
+- Manual review of agent outputs
+- Multiple evaluation models
+- Structured evaluation criteria that are harder to manipulate
+
 ## Known Security Considerations
 
 ### LLM-as-Judge Evaluation
 
 - EvalView uses OpenAI's API for output quality evaluation
 - Test outputs and expected outputs are sent to OpenAI for comparison
+- Agent outputs are sanitized to mitigate prompt injection, but no protection is 100% effective
 - **Recommendation**: Don't include sensitive/proprietary data in test cases if using LLM-as-judge
 
 ### HTTP Adapters
 
 - Custom HTTP adapters may expose your agent endpoints
+- SSRF protection is enabled by default but can be bypassed with `allow_private_urls: true`
 - **Recommendation**: Use authentication, HTTPS, and rate limiting on agent endpoints
 
 ### Trace Data
 
 - Execution traces may contain sensitive information from agent responses
 - **Recommendation**: Sanitize traces before sharing or storing long-term
 
+### Verbose Mode
+
+The `--verbose` flag may expose sensitive information in logs:
+- API request/response payloads
+- Query content and agent outputs
+- **Recommendation**: Avoid using verbose mode in production or when processing sensitive data
+
 ## Security Updates
 
 We will disclose security vulnerabilities through:
 
@@ -1,12 +1,25 @@
 """Base agent adapter interface."""
 
 from abc import ABC, abstractmethod
-from typing import Any, Optional, Dict
+from typing import Any, Optional, Dict, Set
 from evalview.core.types import ExecutionTrace
+from evalview.core.security import validate_url, SSRFProtectionError
 
 
 class AgentAdapter(ABC):
-    """Abstract adapter for connecting to different agent frameworks."""
+    """Abstract adapter for connecting to different agent frameworks.
+
+    Security Note:
+        All adapters include SSRF (Server-Side Request Forgery) protection by default.
+        This prevents requests to internal networks, cloud metadata endpoints, and
+        other potentially dangerous destinations. Set `allow_private_urls=True` only
+        in trusted development environments.
+    """
+
+    # SSRF protection settings (can be overridden in subclasses or instances)
+    allow_private_urls: bool = False
+    allowed_hosts: Optional[Set[str]] = None
+    blocked_hosts: Optional[Set[str]] = None
 
     @property
     @abstractmethod
@@ -36,3 +49,23 @@ async def health_check(self) -> bool:
             True if agent is healthy, False otherwise
         """
         return True
+
+    def validate_endpoint(self, url: str) -> str:
+        """
+        Validate an endpoint URL for SSRF protection.
+
+        Args:
+            url: The URL to validate
+
+        Returns:
+            The validated URL
+
+        Raises:
+            SSRFProtectionError: If the URL fails security validation
+        """
+        return validate_url(
+            url,
+            allow_private=self.allow_private_urls,
+            allowed_hosts=self.allowed_hosts,
+            blocked_hosts=self.blocked_hosts,
+        )
@@ -6,7 +6,7 @@
 import httpx
 import json
 from datetime import datetime
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Set
 import logging
 
 from evalview.adapters.base import AgentAdapter
@@ -27,6 +27,11 @@ class CrewAIAdapter(AgentAdapter):
     - tasks: List of task executions
     - result: Final crew output
     - usage_metrics: Token usage
+
+    Security Note:
+        SSRF protection is enabled by default. URLs targeting private/internal
+        networks will be rejected. Set `allow_private_urls=True` only in trusted
+        development environments.
     """
 
     def __init__(
@@ -36,8 +41,16 @@ def __init__(
         timeout: float = 120.0,  # CrewAI can be slow
         verbose: bool = False,
         model_config: Optional[Dict[str, Any]] = None,
+        allow_private_urls: bool = False,
+        allowed_hosts: Optional[Set[str]] = None,
     ):
-        self.endpoint = endpoint
+        # Set SSRF protection settings before validation
+        self.allow_private_urls = allow_private_urls
+        self.allowed_hosts = allowed_hosts
+
+        # Validate endpoint URL for SSRF protection
+        self.endpoint = self.validate_endpoint(endpoint)
+
         self.headers = headers or {"Content-Type": "application/json"}
         self.timeout = timeout
         self.verbose = verbose
 
@@ -1,7 +1,7 @@
 """Generic HTTP adapter for REST API agents."""
 
 from datetime import datetime
-from typing import Any, Optional, Dict, List
+from typing import Any, Optional, Dict, List, Set
 import httpx
 import logging
 from evalview.adapters.base import AgentAdapter
@@ -18,14 +18,22 @@
 
 
 class HTTPAdapter(AgentAdapter):
-    """Generic HTTP adapter for REST API agents."""
+    """Generic HTTP adapter for REST API agents.
+
+    Security Note:
+        SSRF protection is enabled by default. URLs targeting private/internal
+        networks will be rejected. Set `allow_private_urls=True` only in trusted
+        development environments.
+    """
 
     def __init__(
         self,
         endpoint: str,
         headers: Optional[Dict[str, str]] = None,
         timeout: float = 30.0,
         model_config: Optional[Dict[str, Any]] = None,
+        allow_private_urls: bool = False,
+        allowed_hosts: Optional[Set[str]] = None,
     ):
         """
         Initialize HTTP adapter.
@@ -35,8 +43,17 @@ def __init__(
             headers: Optional HTTP headers
             timeout: Request timeout in seconds
             model_config: Model configuration with name and optional custom pricing
+            allow_private_urls: If True, allow requests to private/internal networks
+                               (default: False for security)
+            allowed_hosts: Optional set of explicitly allowed hostnames
         """
-        self.endpoint = endpoint
+        # Set SSRF protection settings before validation
+        self.allow_private_urls = allow_private_urls
+        self.allowed_hosts = allowed_hosts
+
+        # Validate endpoint URL for SSRF protection
+        self.endpoint = self.validate_endpoint(endpoint)
+
         self.headers = headers or {}
         self.timeout = timeout
         self.model_config = model_config or {}
 
@@ -6,7 +6,7 @@
 import httpx
 import json
 from datetime import datetime
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Set
 import logging
 
 from evalview.adapters.base import AgentAdapter
@@ -31,6 +31,11 @@ class LangGraphAdapter(AgentAdapter):
     Response formats:
     - {"messages": [...], "steps": [...]}
     - Streaming: data: {"type": "step", "content": "...", ...}
+
+    Security Note:
+        SSRF protection is enabled by default. URLs targeting private/internal
+        networks will be rejected. Set `allow_private_urls=True` only in trusted
+        development environments.
     """
 
     def __init__(
@@ -43,8 +48,16 @@ def __init__(
         model_config: Optional[Dict[str, Any]] = None,
         assistant_id: Optional[str] = None,
         use_cloud_api: Optional[bool] = None,  # Auto-detect if None
+        allow_private_urls: bool = False,
+        allowed_hosts: Optional[Set[str]] = None,
     ):
-        self.endpoint = endpoint
+        # Set SSRF protection settings before validation
+        self.allow_private_urls = allow_private_urls
+        self.allowed_hosts = allowed_hosts
+
+        # Validate endpoint URL for SSRF protection
+        self.endpoint = self.validate_endpoint(endpoint)
+
         self.headers = headers or {"Content-Type": "application/json"}
         self.timeout = timeout
         self.streaming = streaming
 
@@ -1,7 +1,7 @@
 """Custom adapter for TapeScope streaming API and other streaming agents."""
 
 from datetime import datetime
-from typing import Any, Optional, Dict
+from typing import Any, Optional, Dict, Set
 import httpx
 import json
 import logging
@@ -46,6 +46,11 @@ class TapeScopeAdapter(AgentAdapter):
     - LangServe streaming endpoints
     - Custom streaming agents
     - Any JSONL-based API
+
+    Security Note:
+        SSRF protection is enabled by default. URLs targeting private/internal
+        networks will be rejected. Set `allow_private_urls=True` only in trusted
+        development environments.
     """
 
     def __init__(
@@ -55,6 +60,8 @@ def __init__(
         timeout: float = 60.0,
         verbose: bool = False,
         model_config: Optional[Dict[str, Any]] = None,
+        allow_private_urls: bool = False,
+        allowed_hosts: Optional[Set[str]] = None,
     ):
         """
         Initialize streaming adapter.
@@ -65,8 +72,17 @@ def __init__(
             timeout: Request timeout in seconds
             verbose: Enable verbose logging (overrides DEBUG env var)
             model_config: Model configuration with name and optional custom pricing
+            allow_private_urls: If True, allow requests to private/internal networks
+                               (default: False for security)
+            allowed_hosts: Optional set of explicitly allowed hostnames
         """
-        self.endpoint = endpoint
+        # Set SSRF protection settings before validation
+        self.allow_private_urls = allow_private_urls
+        self.allowed_hosts = allowed_hosts
+
+        # Validate endpoint URL for SSRF protection
+        self.endpoint = self.validate_endpoint(endpoint)
+
         self.headers = headers or {}
         self.timeout = timeout
         self.verbose = verbose or os.getenv("DEBUG") == "1"