Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
438a383
Improve search tool to extract resolved urls
amrit110 Feb 2, 2026
c2b09d2
Add web fetch tool
amrit110 Feb 2, 2026
67bb38d
Add html_to_markdown dependency
amrit110 Feb 2, 2026
da81995
Add html_to_markdown dependency
amrit110 Feb 2, 2026
05471dc
Refactor out the redirect url code
amrit110 Feb 2, 2026
5685b65
Remove unused synchronous version
amrit110 Feb 2, 2026
fc29b79
Fix merge conflict
amrit110 Feb 2, 2026
ce62b75
Merge branch 'main' into ak/improve_search_tool
amrit110 Feb 2, 2026
ca7d319
Merge branch 'main' into ak/improve_search_tool
amrit110 Feb 3, 2026
3e8938a
Merge branch 'ak/improve_search_tool' of github.com:VectorInstitute/e…
amrit110 Feb 3, 2026
5ada2fa
Fix merge conflicts
amrit110 Feb 3, 2026
1caea9e
Fix typing issues
amrit110 Feb 3, 2026
c6b1207
Merge branch 'main' into ak/improve_search_tool
amrit110 Feb 3, 2026
fa7a951
Update search fn to async
amrit110 Feb 4, 2026
871530f
Merge branch 'ak/improve_search_tool' of github.com:VectorInstitute/e…
amrit110 Feb 4, 2026
f6792cf
Use modern operator to denote union of types
amrit110 Feb 4, 2026
5ecb97f
[pre-commit.ci] Add auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Feb 4, 2026
1fee32a
Use tenacity for retries
amrit110 Feb 4, 2026
11655f8
Merge branch 'ak/improve_search_tool' of github.com:VectorInstitute/e…
amrit110 Feb 4, 2026
5eebe9f
Fix config in test using mock
amrit110 Feb 4, 2026
9a73472
Improve return docstring
amrit110 Feb 4, 2026
6f93ea0
Fix test
amrit110 Feb 4, 2026
2a9f4bb
Remove use of cast, lets stop lying to the type checker
amrit110 Feb 4, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
TRAJECTORY_EVALUATOR_TEMPLATE,
)
from langfuse._client.datasets import DatasetItemClient
from langfuse.experiment import Evaluation
from langfuse.experiment import Evaluation, LocalExperimentItem
from openai.types.responses.response_function_tool_call import ResponseFunctionToolCall
from openai.types.responses.response_output_message import ResponseOutputMessage
from openai.types.responses.response_output_refusal import ResponseOutputRefusal
Expand Down Expand Up @@ -136,12 +136,12 @@ def __init__(
self.reports_output_path = reports_output_path
self.langfuse_project_name = langfuse_project_name

async def run(self, *, item: DatasetItemClient, **kwargs) -> EvaluationOutput:
async def run(self, *, item: LocalExperimentItem | DatasetItemClient, **kwargs: dict[str, Any]) -> EvaluationOutput:
"""Run the report generation agent against an item from a Langfuse dataset.

Parameters
----------
item : DatasetItemClient
item : LocalExperimentItem | DatasetItemClient
The item from the Langfuse dataset to evaluate against.

Returns
Expand All @@ -157,7 +157,9 @@ async def run(self, *, item: DatasetItemClient, **kwargs) -> EvaluationOutput:
reports_output_path=self.reports_output_path,
langfuse_project_name=self.langfuse_project_name,
)
result = await run_agent_with_retry(report_generation_agent, item.input)
# Handle both TypedDict and class access patterns
item_input = item["input"] if isinstance(item, dict) else item.input
result = await run_agent_with_retry(report_generation_agent, item_input)

# Extract the report data and trajectory from the agent's response
actions = []
Expand Down
9 changes: 8 additions & 1 deletion aieng-eval-agents/aieng/agent_evals/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,21 @@
- SQL Database access (sql_database.py)
"""

from .search import GroundedResponse, GroundingChunk, create_google_search_tool, format_response_with_citations
from .search import (
GroundedResponse,
GroundingChunk,
create_google_search_tool,
format_response_with_citations,
google_search,
)
from .sql_database import ReadOnlySqlDatabase, ReadOnlySqlPolicy


__all__ = [
# Search tools
"create_google_search_tool",
"format_response_with_citations",
"google_search",
"GroundedResponse",
"GroundingChunk",
# SQL Database tools
Expand Down
183 changes: 183 additions & 0 deletions aieng-eval-agents/aieng/agent_evals/tools/_redirect.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
"""URL redirect resolution utilities.

Provides utilities for resolving redirect URLs (especially Vertex AI grounding
redirects) to their final destinations. Used by search and web fetch tools to
display actual URLs.
"""

import asyncio
import logging

import httpx
from tenacity import AsyncRetrying, retry_if_exception_type, stop_after_attempt, wait_exponential


logger = logging.getLogger(__name__)

REDIRECT_URL_PATTERNS = (
"vertexaisearch.cloud.google.com/grounding-api-redirect",
"vertexaisearch.cloud.google.com/redirect",
)

_REDIRECT_CONNECT_TIMEOUT = 10.0
_REDIRECT_READ_TIMEOUT = 15.0
_USER_AGENT = (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
)
_redirect_cache: dict[str, str] = {}


def _is_redirect_url(url: str) -> bool:
"""Check if URL is a known redirect pattern."""
return any(pattern in url for pattern in REDIRECT_URL_PATTERNS)


def _get_redirect_timeout() -> httpx.Timeout:
"""Get timeout configuration for redirect resolution."""
return httpx.Timeout(
connect=_REDIRECT_CONNECT_TIMEOUT,
read=_REDIRECT_READ_TIMEOUT,
write=10.0,
pool=10.0,
)


async def _resolve_with_head_async(client: httpx.AsyncClient, url: str) -> str | None:
"""Try to resolve redirect using async HEAD request."""
try:
response = await client.head(url, headers={"User-Agent": _USER_AGENT})
return str(response.url)
except httpx.HTTPStatusError as e:
# Some servers return 405 Method Not Allowed for HEAD
if e.response.status_code in (405, 501):
return None # Signal to try GET
raise
except Exception:
return None


async def _resolve_with_get_async(client: httpx.AsyncClient, url: str) -> str:
"""Resolve redirect using async GET request (fallback when HEAD fails)."""
# Use stream to avoid downloading the body
async with client.stream("GET", url, headers={"User-Agent": _USER_AGENT}) as response:
return str(response.url)


async def _resolve_single_url_async(
client: httpx.AsyncClient,
url: str,
max_retries: int = 3,
base_delay: float = 1.0,
) -> str:
"""Resolve a single URL with retries and exponential backoff.

Uses tenacity for automatic retry handling with exponential backoff.

Parameters
----------
client : httpx.AsyncClient
The HTTP client to use.
url : str
The URL to resolve.
max_retries : int
Maximum number of retry attempts.
base_delay : float
Base delay between retries (doubles each retry).

Returns
-------
str
The resolved URL, or original URL if not a redirect or on failure.
"""
# Skip resolution for non-redirect URLs
if not _is_redirect_url(url):
return url

# Check cache first
if url in _redirect_cache:
return _redirect_cache[url]

try:
async for attempt in AsyncRetrying(
stop=stop_after_attempt(max_retries),
wait=wait_exponential(multiplier=base_delay, min=base_delay, max=60.0),
retry=retry_if_exception_type((httpx.TimeoutException, httpx.ConnectError, httpx.ReadError)),
):
with attempt:
# Try HEAD first (faster, no body download)
final_url = await _resolve_with_head_async(client, url)

# Fall back to GET if HEAD failed
if final_url is None:
logger.debug(f"HEAD failed for {url[:60]}..., trying GET")
final_url = await _resolve_with_get_async(client, url)

if final_url != url:
logger.debug(f"Resolved redirect: {url[:60]}... -> {final_url[:60]}...")

_redirect_cache[url] = final_url

# If we reach here, the retry loop succeeded
return _redirect_cache[url]

except Exception as e:
# All retries exhausted or non-retryable error
logger.warning(f"Failed to resolve redirect URL {url[:60]}...: {type(e).__name__}: {e}")
_redirect_cache[url] = url # Cache failures to avoid repeated attempts
return url


async def resolve_redirect_url_async(url: str) -> str:
"""Async version of resolve_redirect_url with caching and retries.

Parameters
----------
url : str
The URL to resolve (may be a redirect URL).

Returns
-------
str
The final destination URL after following redirects.
"""
# Skip resolution for non-redirect URLs (fast path)
if not _is_redirect_url(url):
return url

# Check cache first (fast path)
if url in _redirect_cache:
return _redirect_cache[url]

async with httpx.AsyncClient(
timeout=_get_redirect_timeout(),
follow_redirects=True,
) as client:
return await _resolve_single_url_async(client, url)


async def resolve_redirect_urls_async(urls: list[str]) -> list[str]:
"""Resolve multiple redirect URLs in parallel.

Resolves URLs concurrently with proper error handling per URL.

Parameters
----------
urls : list[str]
List of URLs to resolve.

Returns
-------
list[str]
List of resolved URLs in the same order.
"""
if not urls:
return []

async with httpx.AsyncClient(
timeout=_get_redirect_timeout(),
follow_redirects=True,
limits=httpx.Limits(max_connections=20, max_keepalive_connections=10),
) as client:
# Resolve all URLs in parallel
tasks = [_resolve_single_url_async(client, url) for url in urls]
return list(await asyncio.gather(*tasks))
Loading