Skip to content

Commit 16808d0

Browse files
committed
Update server.py
1 parent d8e140f commit 16808d0

File tree

1 file changed

+55
-10
lines changed

1 file changed

+55
-10
lines changed

src/scrapegraph_mcp/server.py

Lines changed: 55 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@
55
- markdownify: Convert any webpage into clean, formatted markdown
66
- smartscraper: Extract structured data from any webpage using AI
77
- searchscraper: Perform AI-powered web searches with structured results
8-
- crawl: Perform intelligent web crawling with AI-powered data extraction
8+
- crawl_requester: Initiate intelligent web crawling requests (step 1)
9+
- crawl_fetcher: Fetch results from crawling requests (step 2)
910
"""
1011

1112
import os
@@ -125,7 +126,7 @@ def searchscraper(self, user_prompt: str, num_results: int = None, number_of_scr
125126

126127
return response.json()
127128

128-
def crawl(
129+
def crawl_requester(
129130
self,
130131
url: str,
131132
prompt: str = None,
@@ -136,7 +137,7 @@ def crawl(
136137
markdown_only: bool = None
137138
) -> Dict[str, Any]:
138139
"""
139-
Perform intelligent web crawling with AI-powered data extraction.
140+
Initiate a web crawling request and get a request ID.
140141
141142
Args:
142143
url: Starting URL to crawl
@@ -148,9 +149,9 @@ def crawl(
148149
markdown_only: Whether to return only markdown content without AI processing (optional)
149150
150151
Returns:
151-
Dictionary containing the crawl results
152+
Dictionary containing the request ID and status
152153
"""
153-
endpoint = f"{self.BASE_URL}/crawl"
154+
endpoint = f"{self.BASE_URL}/crawl/requester"
154155
data = {
155156
"url": url
156157
}
@@ -177,6 +178,29 @@ def crawl(
177178

178179
return response.json()
179180

181+
def crawl_fetcher(self, request_id: str) -> Dict[str, Any]:
182+
"""
183+
Fetch the results of a crawling request using the request ID.
184+
185+
Args:
186+
request_id: The request ID returned by crawl_requester
187+
188+
Returns:
189+
Dictionary containing the crawl results or status
190+
"""
191+
endpoint = f"{self.BASE_URL}/crawl/fetcher"
192+
data = {
193+
"request_id": request_id
194+
}
195+
196+
response = self.client.post(endpoint, headers=self.headers, json=data)
197+
198+
if response.status_code != 200:
199+
error_msg = f"Error {response.status_code}: {response.text}"
200+
raise Exception(error_msg)
201+
202+
return response.json()
203+
180204
def close(self) -> None:
181205
"""Close the HTTP client."""
182206
self.client.close()
@@ -267,9 +291,9 @@ def searchscraper(
267291
return {"error": str(e)}
268292

269293

270-
# Add tool for crawl (smartcrawler)
294+
# Add tool for crawl requester (smartcrawler step 1)
271295
@mcp.tool()
272-
def crawl(
296+
def crawl_requester(
273297
url: str,
274298
prompt: str = None,
275299
cache_website: bool = None,
@@ -279,7 +303,7 @@ def crawl(
279303
markdown_only: bool = None
280304
) -> Dict[str, Any]:
281305
"""
282-
Perform intelligent web crawling with AI-powered data extraction.
306+
Initiate a web crawling request and get a request ID.
283307
284308
Args:
285309
url: Starting URL to crawl
@@ -291,13 +315,13 @@ def crawl(
291315
markdown_only: Whether to return only markdown content without AI processing (optional)
292316
293317
Returns:
294-
Dictionary containing the crawl results
318+
Dictionary containing the request ID and status
295319
"""
296320
if scrapegraph_client is None:
297321
return {"error": "ScapeGraph client not initialized. Please provide an API key."}
298322

299323
try:
300-
return scrapegraph_client.crawl(
324+
return scrapegraph_client.crawl_requester(
301325
url=url,
302326
prompt=prompt,
303327
cache_website=cache_website,
@@ -310,6 +334,27 @@ def crawl(
310334
return {"error": str(e)}
311335

312336

337+
# Add tool for crawl fetcher (smartcrawler step 2)
338+
@mcp.tool()
339+
def crawl_fetcher(request_id: str) -> Dict[str, Any]:
340+
"""
341+
Fetch the results of a crawling request using the request ID.
342+
343+
Args:
344+
request_id: The request ID returned by crawl_requester
345+
346+
Returns:
347+
Dictionary containing the crawl results or status
348+
"""
349+
if scrapegraph_client is None:
350+
return {"error": "ScapeGraph client not initialized. Please provide an API key."}
351+
352+
try:
353+
return scrapegraph_client.crawl_fetcher(request_id)
354+
except Exception as e:
355+
return {"error": str(e)}
356+
357+
313358
def main() -> None:
314359
"""Run the ScapeGraph MCP server."""
315360
print("Starting ScapeGraph MCP server!")

0 commit comments

Comments
 (0)