From ad10a2c198012d61d6a1b07e693401769688c465 Mon Sep 17 00:00:00 2001 From: Charlie C Date: Mon, 25 Aug 2025 13:44:08 +0800 Subject: [PATCH] feat(crawler): add redirected_status_code to crawl results and responses --- crawl4ai/async_crawler_strategy.py | 2 ++ crawl4ai/async_webcrawler.py | 1 + crawl4ai/models.py | 2 ++ 3 files changed, 5 insertions(+) diff --git a/crawl4ai/async_crawler_strategy.py b/crawl4ai/async_crawler_strategy.py index 943867d0..723bd5d8 100644 --- a/crawl4ai/async_crawler_strategy.py +++ b/crawl4ai/async_crawler_strategy.py @@ -682,6 +682,7 @@ async def handle_request_failed_capture(request): url, wait_until=config.wait_until, timeout=config.page_timeout ) redirected_url = page.url + redirected_status_code = response.status except Error as e: # Allow navigation to be aborted when downloading files # This is expected behavior for downloads in some browser engines @@ -1038,6 +1039,7 @@ async def get_delayed_content(delay: float = 5.0) -> str: self._downloaded_files if self._downloaded_files else None ), redirected_url=redirected_url, + redirected_status_code=redirected_status_code, # Include captured data if enabled network_requests=captured_requests if config.capture_network_requests else None, console_messages=captured_console if config.capture_console_messages else None, diff --git a/crawl4ai/async_webcrawler.py b/crawl4ai/async_webcrawler.py index ebd2859d..e03fb601 100644 --- a/crawl4ai/async_webcrawler.py +++ b/crawl4ai/async_webcrawler.py @@ -369,6 +369,7 @@ async def arun( crawl_result.status_code = async_response.status_code crawl_result.redirected_url = async_response.redirected_url or url + crawl_result.redirected_status_code = async_response.redirected_status_code crawl_result.response_headers = async_response.response_headers crawl_result.downloaded_files = async_response.downloaded_files crawl_result.js_execution_result = js_execution_result diff --git a/crawl4ai/models.py b/crawl4ai/models.py index 640c2f2d..af62def5 100644 --- a/crawl4ai/models.py +++ b/crawl4ai/models.py @@ -149,6 +149,7 @@ class CrawlResult(BaseModel): ssl_certificate: Optional[SSLCertificate] = None dispatch_result: Optional[DispatchResult] = None redirected_url: Optional[str] = None + redirected_status_code: Optional[int] = None network_requests: Optional[List[Dict[str, Any]]] = None console_messages: Optional[List[Dict[str, Any]]] = None tables: List[Dict] = Field(default_factory=list) # NEW – [{headers,rows,caption,summary}] @@ -319,6 +320,7 @@ class AsyncCrawlResponse(BaseModel): downloaded_files: Optional[List[str]] = None ssl_certificate: Optional[SSLCertificate] = None redirected_url: Optional[str] = None + redirected_status_code: Optional[int] = None network_requests: Optional[List[Dict[str, Any]]] = None console_messages: Optional[List[Dict[str, Any]]] = None