5
5
- markdownify: Convert any webpage into clean, formatted markdown
6
6
- smartscraper: Extract structured data from any webpage using AI
7
7
- searchscraper: Perform AI-powered web searches with structured results
8
- - crawl: Perform intelligent web crawling with AI-powered data extraction
8
+ - crawl_requester: Initiate intelligent web crawling requests (step 1)
9
+ - crawl_fetcher: Fetch results from crawling requests (step 2)
9
10
"""
10
11
11
12
import os
@@ -125,7 +126,7 @@ def searchscraper(self, user_prompt: str, num_results: int = None, number_of_scr
125
126
126
127
return response .json ()
127
128
128
- def crawl (
129
+ def crawl_requester (
129
130
self ,
130
131
url : str ,
131
132
prompt : str = None ,
@@ -136,7 +137,7 @@ def crawl(
136
137
markdown_only : bool = None
137
138
) -> Dict [str , Any ]:
138
139
"""
139
- Perform intelligent web crawling with AI-powered data extraction .
140
+ Initiate a web crawling request and get a request ID .
140
141
141
142
Args:
142
143
url: Starting URL to crawl
@@ -148,9 +149,9 @@ def crawl(
148
149
markdown_only: Whether to return only markdown content without AI processing (optional)
149
150
150
151
Returns:
151
- Dictionary containing the crawl results
152
+ Dictionary containing the request ID and status
152
153
"""
153
- endpoint = f"{ self .BASE_URL } /crawl"
154
+ endpoint = f"{ self .BASE_URL } /crawl/requester "
154
155
data = {
155
156
"url" : url
156
157
}
@@ -177,6 +178,29 @@ def crawl(
177
178
178
179
return response .json ()
179
180
181
+ def crawl_fetcher (self , request_id : str ) -> Dict [str , Any ]:
182
+ """
183
+ Fetch the results of a crawling request using the request ID.
184
+
185
+ Args:
186
+ request_id: The request ID returned by crawl_requester
187
+
188
+ Returns:
189
+ Dictionary containing the crawl results or status
190
+ """
191
+ endpoint = f"{ self .BASE_URL } /crawl/fetcher"
192
+ data = {
193
+ "request_id" : request_id
194
+ }
195
+
196
+ response = self .client .post (endpoint , headers = self .headers , json = data )
197
+
198
+ if response .status_code != 200 :
199
+ error_msg = f"Error { response .status_code } : { response .text } "
200
+ raise Exception (error_msg )
201
+
202
+ return response .json ()
203
+
180
204
def close (self ) -> None :
181
205
"""Close the HTTP client."""
182
206
self .client .close ()
@@ -267,9 +291,9 @@ def searchscraper(
267
291
return {"error" : str (e )}
268
292
269
293
270
- # Add tool for crawl (smartcrawler)
294
+ # Add tool for crawl requester (smartcrawler step 1 )
271
295
@mcp .tool ()
272
- def crawl (
296
+ def crawl_requester (
273
297
url : str ,
274
298
prompt : str = None ,
275
299
cache_website : bool = None ,
@@ -279,7 +303,7 @@ def crawl(
279
303
markdown_only : bool = None
280
304
) -> Dict [str , Any ]:
281
305
"""
282
- Perform intelligent web crawling with AI-powered data extraction .
306
+ Initiate a web crawling request and get a request ID .
283
307
284
308
Args:
285
309
url: Starting URL to crawl
@@ -291,13 +315,13 @@ def crawl(
291
315
markdown_only: Whether to return only markdown content without AI processing (optional)
292
316
293
317
Returns:
294
- Dictionary containing the crawl results
318
+ Dictionary containing the request ID and status
295
319
"""
296
320
if scrapegraph_client is None :
297
321
return {"error" : "ScapeGraph client not initialized. Please provide an API key." }
298
322
299
323
try :
300
- return scrapegraph_client .crawl (
324
+ return scrapegraph_client .crawl_requester (
301
325
url = url ,
302
326
prompt = prompt ,
303
327
cache_website = cache_website ,
@@ -310,6 +334,27 @@ def crawl(
310
334
return {"error" : str (e )}
311
335
312
336
337
+ # Add tool for crawl fetcher (smartcrawler step 2)
338
+ @mcp .tool ()
339
+ def crawl_fetcher (request_id : str ) -> Dict [str , Any ]:
340
+ """
341
+ Fetch the results of a crawling request using the request ID.
342
+
343
+ Args:
344
+ request_id: The request ID returned by crawl_requester
345
+
346
+ Returns:
347
+ Dictionary containing the crawl results or status
348
+ """
349
+ if scrapegraph_client is None :
350
+ return {"error" : "ScapeGraph client not initialized. Please provide an API key." }
351
+
352
+ try :
353
+ return scrapegraph_client .crawl_fetcher (request_id )
354
+ except Exception as e :
355
+ return {"error" : str (e )}
356
+
357
+
313
358
def main () -> None :
314
359
"""Run the ScapeGraph MCP server."""
315
360
print ("Starting ScapeGraph MCP server!" )
0 commit comments