5
5
- markdownify: Convert any webpage into clean, formatted markdown
6
6
- smartscraper: Extract structured data from any webpage using AI
7
7
- searchscraper: Perform AI-powered web searches with structured results
8
+ - crawl_requester: Initiate intelligent web crawling requests (step 1)
9
+ - crawl_fetcher: Fetch results from crawling requests (step 2)
8
10
"""
9
11
10
12
import os
@@ -56,22 +58,32 @@ def markdownify(self, website_url: str) -> Dict[str, Any]:
56
58
57
59
return response .json ()
58
60
59
- def smartscraper (self , user_prompt : str , website_url : str ) -> Dict [str , Any ]:
61
+ def smartscraper (self , user_prompt : str , website_url : str , number_of_scrolls : int = None , markdown_only : bool = None ) -> Dict [str , Any ]:
60
62
"""
61
63
Extract structured data from a webpage using AI.
62
64
63
65
Args:
64
66
user_prompt: Instructions for what data to extract
65
67
website_url: URL of the webpage to scrape
68
+ number_of_scrolls: Number of infinite scrolls to perform (optional)
69
+ markdown_only: Whether to return only markdown content without AI processing (optional)
66
70
67
71
Returns:
68
- Dictionary containing the extracted data
72
+ Dictionary containing the extracted data or markdown content
69
73
"""
70
74
url = f"{ self .BASE_URL } /smartscraper"
71
75
data = {
72
76
"user_prompt" : user_prompt ,
73
77
"website_url" : website_url
74
78
}
79
+
80
+ # Add number_of_scrolls to the request if provided
81
+ if number_of_scrolls is not None :
82
+ data ["number_of_scrolls" ] = number_of_scrolls
83
+
84
+ # Add markdown_only to the request if provided
85
+ if markdown_only is not None :
86
+ data ["markdown_only" ] = markdown_only
75
87
76
88
response = self .client .post (url , headers = self .headers , json = data )
77
89
@@ -81,12 +93,14 @@ def smartscraper(self, user_prompt: str, website_url: str) -> Dict[str, Any]:
81
93
82
94
return response .json ()
83
95
84
- def searchscraper (self , user_prompt : str ) -> Dict [str , Any ]:
96
+ def searchscraper (self , user_prompt : str , num_results : int = None , number_of_scrolls : int = None ) -> Dict [str , Any ]:
85
97
"""
86
98
Perform AI-powered web searches with structured results.
87
99
88
100
Args:
89
101
user_prompt: Search query or instructions
102
+ num_results: Number of websites to search (optional, default: 3 websites = 30 credits)
103
+ number_of_scrolls: Number of infinite scrolls to perform on each website (optional)
90
104
91
105
Returns:
92
106
Dictionary containing search results and reference URLs
@@ -95,6 +109,14 @@ def searchscraper(self, user_prompt: str) -> Dict[str, Any]:
95
109
data = {
96
110
"user_prompt" : user_prompt
97
111
}
112
+
113
+ # Add num_results to the request if provided
114
+ if num_results is not None :
115
+ data ["num_results" ] = num_results
116
+
117
+ # Add number_of_scrolls to the request if provided
118
+ if number_of_scrolls is not None :
119
+ data ["number_of_scrolls" ] = number_of_scrolls
98
120
99
121
response = self .client .post (url , headers = self .headers , json = data )
100
122
@@ -104,6 +126,81 @@ def searchscraper(self, user_prompt: str) -> Dict[str, Any]:
104
126
105
127
return response .json ()
106
128
129
+ def crawl_requester (
130
+ self ,
131
+ url : str ,
132
+ prompt : str = None ,
133
+ cache_website : bool = None ,
134
+ depth : int = None ,
135
+ max_pages : int = None ,
136
+ same_domain_only : bool = None ,
137
+ markdown_only : bool = None
138
+ ) -> Dict [str , Any ]:
139
+ """
140
+ Initiate a web crawling request and get a request ID.
141
+
142
+ Args:
143
+ url: Starting URL to crawl
144
+ prompt: AI prompt for data extraction (optional, if not provided returns markdown only)
145
+ cache_website: Whether to cache the website content (optional)
146
+ depth: Maximum crawling depth (optional)
147
+ max_pages: Maximum number of pages to crawl (optional)
148
+ same_domain_only: Whether to crawl only within the same domain (optional)
149
+ markdown_only: Whether to return only markdown content without AI processing (optional)
150
+
151
+ Returns:
152
+ Dictionary containing the request ID and status
153
+ """
154
+ endpoint = f"{ self .BASE_URL } /crawl/requester"
155
+ data = {
156
+ "url" : url
157
+ }
158
+
159
+ # Add optional parameters if provided
160
+ if prompt is not None :
161
+ data ["prompt" ] = prompt
162
+ if cache_website is not None :
163
+ data ["cache_website" ] = cache_website
164
+ if depth is not None :
165
+ data ["depth" ] = depth
166
+ if max_pages is not None :
167
+ data ["max_pages" ] = max_pages
168
+ if same_domain_only is not None :
169
+ data ["same_domain_only" ] = same_domain_only
170
+ if markdown_only is not None :
171
+ data ["markdown_only" ] = markdown_only
172
+
173
+ response = self .client .post (endpoint , headers = self .headers , json = data )
174
+
175
+ if response .status_code != 200 :
176
+ error_msg = f"Error { response .status_code } : { response .text } "
177
+ raise Exception (error_msg )
178
+
179
+ return response .json ()
180
+
181
+ def crawl_fetcher (self , request_id : str ) -> Dict [str , Any ]:
182
+ """
183
+ Fetch the results of a crawling request using the request ID.
184
+
185
+ Args:
186
+ request_id: The request ID returned by crawl_requester
187
+
188
+ Returns:
189
+ Dictionary containing the crawl results or status
190
+ """
191
+ endpoint = f"{ self .BASE_URL } /crawl/fetcher"
192
+ data = {
193
+ "request_id" : request_id
194
+ }
195
+
196
+ response = self .client .post (endpoint , headers = self .headers , json = data )
197
+
198
+ if response .status_code != 200 :
199
+ error_msg = f"Error { response .status_code } : { response .text } "
200
+ raise Exception (error_msg )
201
+
202
+ return response .json ()
203
+
107
204
def close (self ) -> None :
108
205
"""Close the HTTP client."""
109
206
self .client .close ()
@@ -142,37 +239,45 @@ def markdownify(website_url: str) -> Dict[str, Any]:
142
239
@mcp .tool ()
143
240
def smartscraper (
144
241
user_prompt : str ,
145
- website_url : str
242
+ website_url : str ,
243
+ number_of_scrolls : int = None ,
244
+ markdown_only : bool = None
146
245
) -> Dict [str , Any ]:
147
246
"""
148
247
Extract structured data from a webpage using AI.
149
248
150
249
Args:
151
250
user_prompt: Instructions for what data to extract
152
251
website_url: URL of the webpage to scrape
252
+ number_of_scrolls: Number of infinite scrolls to perform (optional)
253
+ markdown_only: Whether to return only markdown content without AI processing (optional)
153
254
154
255
Returns:
155
- Dictionary containing the extracted data
256
+ Dictionary containing the extracted data or markdown content
156
257
"""
157
258
if scrapegraph_client is None :
158
259
return {"error" : "ScapeGraph client not initialized. Please provide an API key." }
159
260
160
261
try :
161
- return scrapegraph_client .smartscraper (user_prompt , website_url )
262
+ return scrapegraph_client .smartscraper (user_prompt , website_url , number_of_scrolls , markdown_only )
162
263
except Exception as e :
163
264
return {"error" : str (e )}
164
265
165
266
166
267
# Add tool for searchscraper
167
268
@mcp .tool ()
168
269
def searchscraper (
169
- user_prompt : str
270
+ user_prompt : str ,
271
+ num_results : int = None ,
272
+ number_of_scrolls : int = None
170
273
) -> Dict [str , Any ]:
171
274
"""
172
275
Perform AI-powered web searches with structured results.
173
276
174
277
Args:
175
278
user_prompt: Search query or instructions
279
+ num_results: Number of websites to search (optional, default: 3 websites = 30 credits)
280
+ number_of_scrolls: Number of infinite scrolls to perform on each website (optional)
176
281
177
282
Returns:
178
283
Dictionary containing search results and reference URLs
@@ -181,7 +286,71 @@ def searchscraper(
181
286
return {"error" : "ScapeGraph client not initialized. Please provide an API key." }
182
287
183
288
try :
184
- return scrapegraph_client .searchscraper (user_prompt )
289
+ return scrapegraph_client .searchscraper (user_prompt , num_results , number_of_scrolls )
290
+ except Exception as e :
291
+ return {"error" : str (e )}
292
+
293
+
294
+ # Add tool for crawl requester (smartcrawler step 1)
295
+ @mcp .tool ()
296
+ def crawl_requester (
297
+ url : str ,
298
+ prompt : str = None ,
299
+ cache_website : bool = None ,
300
+ depth : int = None ,
301
+ max_pages : int = None ,
302
+ same_domain_only : bool = None ,
303
+ markdown_only : bool = None
304
+ ) -> Dict [str , Any ]:
305
+ """
306
+ Initiate a web crawling request and get a request ID.
307
+
308
+ Args:
309
+ url: Starting URL to crawl
310
+ prompt: AI prompt for data extraction (optional, if not provided returns markdown only)
311
+ cache_website: Whether to cache the website content (optional)
312
+ depth: Maximum crawling depth (optional)
313
+ max_pages: Maximum number of pages to crawl (optional)
314
+ same_domain_only: Whether to crawl only within the same domain (optional)
315
+ markdown_only: Whether to return only markdown content without AI processing (optional)
316
+
317
+ Returns:
318
+ Dictionary containing the request ID and status
319
+ """
320
+ if scrapegraph_client is None :
321
+ return {"error" : "ScapeGraph client not initialized. Please provide an API key." }
322
+
323
+ try :
324
+ return scrapegraph_client .crawl_requester (
325
+ url = url ,
326
+ prompt = prompt ,
327
+ cache_website = cache_website ,
328
+ depth = depth ,
329
+ max_pages = max_pages ,
330
+ same_domain_only = same_domain_only ,
331
+ markdown_only = markdown_only
332
+ )
333
+ except Exception as e :
334
+ return {"error" : str (e )}
335
+
336
+
337
+ # Add tool for crawl fetcher (smartcrawler step 2)
338
+ @mcp .tool ()
339
+ def crawl_fetcher (request_id : str ) -> Dict [str , Any ]:
340
+ """
341
+ Fetch the results of a crawling request using the request ID.
342
+
343
+ Args:
344
+ request_id: The request ID returned by crawl_requester
345
+
346
+ Returns:
347
+ Dictionary containing the crawl results or status
348
+ """
349
+ if scrapegraph_client is None :
350
+ return {"error" : "ScapeGraph client not initialized. Please provide an API key." }
351
+
352
+ try :
353
+ return scrapegraph_client .crawl_fetcher (request_id )
185
354
except Exception as e :
186
355
return {"error" : str (e )}
187
356
0 commit comments