5
5
- markdownify: Convert any webpage into clean, formatted markdown
6
6
- smartscraper: Extract structured data from any webpage using AI
7
7
- searchscraper: Perform AI-powered web searches with structured results
8
+ - crawl: Perform intelligent web crawling with AI-powered data extraction
8
9
"""
9
10
10
11
import os
@@ -56,22 +57,32 @@ def markdownify(self, website_url: str) -> Dict[str, Any]:
56
57
57
58
return response .json ()
58
59
59
- def smartscraper (self , user_prompt : str , website_url : str ) -> Dict [str , Any ]:
60
+ def smartscraper (self , user_prompt : str , website_url : str , number_of_scrolls : int = None , markdown_only : bool = None ) -> Dict [str , Any ]:
60
61
"""
61
62
Extract structured data from a webpage using AI.
62
63
63
64
Args:
64
65
user_prompt: Instructions for what data to extract
65
66
website_url: URL of the webpage to scrape
67
+ number_of_scrolls: Number of infinite scrolls to perform (optional)
68
+ markdown_only: Whether to return only markdown content without AI processing (optional)
66
69
67
70
Returns:
68
- Dictionary containing the extracted data
71
+ Dictionary containing the extracted data or markdown content
69
72
"""
70
73
url = f"{ self .BASE_URL } /smartscraper"
71
74
data = {
72
75
"user_prompt" : user_prompt ,
73
76
"website_url" : website_url
74
77
}
78
+
79
+ # Add number_of_scrolls to the request if provided
80
+ if number_of_scrolls is not None :
81
+ data ["number_of_scrolls" ] = number_of_scrolls
82
+
83
+ # Add markdown_only to the request if provided
84
+ if markdown_only is not None :
85
+ data ["markdown_only" ] = markdown_only
75
86
76
87
response = self .client .post (url , headers = self .headers , json = data )
77
88
@@ -81,12 +92,14 @@ def smartscraper(self, user_prompt: str, website_url: str) -> Dict[str, Any]:
81
92
82
93
return response .json ()
83
94
84
- def searchscraper (self , user_prompt : str ) -> Dict [str , Any ]:
95
+ def searchscraper (self , user_prompt : str , num_results : int = None , number_of_scrolls : int = None ) -> Dict [str , Any ]:
85
96
"""
86
97
Perform AI-powered web searches with structured results.
87
98
88
99
Args:
89
100
user_prompt: Search query or instructions
101
+ num_results: Number of websites to search (optional, default: 3 websites = 30 credits)
102
+ number_of_scrolls: Number of infinite scrolls to perform on each website (optional)
90
103
91
104
Returns:
92
105
Dictionary containing search results and reference URLs
@@ -95,6 +108,14 @@ def searchscraper(self, user_prompt: str) -> Dict[str, Any]:
95
108
data = {
96
109
"user_prompt" : user_prompt
97
110
}
111
+
112
+ # Add num_results to the request if provided
113
+ if num_results is not None :
114
+ data ["num_results" ] = num_results
115
+
116
+ # Add number_of_scrolls to the request if provided
117
+ if number_of_scrolls is not None :
118
+ data ["number_of_scrolls" ] = number_of_scrolls
98
119
99
120
response = self .client .post (url , headers = self .headers , json = data )
100
121
@@ -104,6 +125,58 @@ def searchscraper(self, user_prompt: str) -> Dict[str, Any]:
104
125
105
126
return response .json ()
106
127
128
+ def crawl (
129
+ self ,
130
+ url : str ,
131
+ prompt : str = None ,
132
+ cache_website : bool = None ,
133
+ depth : int = None ,
134
+ max_pages : int = None ,
135
+ same_domain_only : bool = None ,
136
+ markdown_only : bool = None
137
+ ) -> Dict [str , Any ]:
138
+ """
139
+ Perform intelligent web crawling with AI-powered data extraction.
140
+
141
+ Args:
142
+ url: Starting URL to crawl
143
+ prompt: AI prompt for data extraction (optional, if not provided returns markdown only)
144
+ cache_website: Whether to cache the website content (optional)
145
+ depth: Maximum crawling depth (optional)
146
+ max_pages: Maximum number of pages to crawl (optional)
147
+ same_domain_only: Whether to crawl only within the same domain (optional)
148
+ markdown_only: Whether to return only markdown content without AI processing (optional)
149
+
150
+ Returns:
151
+ Dictionary containing the crawl results
152
+ """
153
+ endpoint = f"{ self .BASE_URL } /crawl"
154
+ data = {
155
+ "url" : url
156
+ }
157
+
158
+ # Add optional parameters if provided
159
+ if prompt is not None :
160
+ data ["prompt" ] = prompt
161
+ if cache_website is not None :
162
+ data ["cache_website" ] = cache_website
163
+ if depth is not None :
164
+ data ["depth" ] = depth
165
+ if max_pages is not None :
166
+ data ["max_pages" ] = max_pages
167
+ if same_domain_only is not None :
168
+ data ["same_domain_only" ] = same_domain_only
169
+ if markdown_only is not None :
170
+ data ["markdown_only" ] = markdown_only
171
+
172
+ response = self .client .post (endpoint , headers = self .headers , json = data )
173
+
174
+ if response .status_code != 200 :
175
+ error_msg = f"Error { response .status_code } : { response .text } "
176
+ raise Exception (error_msg )
177
+
178
+ return response .json ()
179
+
107
180
def close (self ) -> None :
108
181
"""Close the HTTP client."""
109
182
self .client .close ()
@@ -142,37 +215,45 @@ def markdownify(website_url: str) -> Dict[str, Any]:
142
215
@mcp .tool ()
143
216
def smartscraper (
144
217
user_prompt : str ,
145
- website_url : str
218
+ website_url : str ,
219
+ number_of_scrolls : int = None ,
220
+ markdown_only : bool = None
146
221
) -> Dict [str , Any ]:
147
222
"""
148
223
Extract structured data from a webpage using AI.
149
224
150
225
Args:
151
226
user_prompt: Instructions for what data to extract
152
227
website_url: URL of the webpage to scrape
228
+ number_of_scrolls: Number of infinite scrolls to perform (optional)
229
+ markdown_only: Whether to return only markdown content without AI processing (optional)
153
230
154
231
Returns:
155
- Dictionary containing the extracted data
232
+ Dictionary containing the extracted data or markdown content
156
233
"""
157
234
if scrapegraph_client is None :
158
235
return {"error" : "ScapeGraph client not initialized. Please provide an API key." }
159
236
160
237
try :
161
- return scrapegraph_client .smartscraper (user_prompt , website_url )
238
+ return scrapegraph_client .smartscraper (user_prompt , website_url , number_of_scrolls , markdown_only )
162
239
except Exception as e :
163
240
return {"error" : str (e )}
164
241
165
242
166
243
# Add tool for searchscraper
167
244
@mcp .tool ()
168
245
def searchscraper (
169
- user_prompt : str
246
+ user_prompt : str ,
247
+ num_results : int = None ,
248
+ number_of_scrolls : int = None
170
249
) -> Dict [str , Any ]:
171
250
"""
172
251
Perform AI-powered web searches with structured results.
173
252
174
253
Args:
175
254
user_prompt: Search query or instructions
255
+ num_results: Number of websites to search (optional, default: 3 websites = 30 credits)
256
+ number_of_scrolls: Number of infinite scrolls to perform on each website (optional)
176
257
177
258
Returns:
178
259
Dictionary containing search results and reference URLs
@@ -181,7 +262,50 @@ def searchscraper(
181
262
return {"error" : "ScapeGraph client not initialized. Please provide an API key." }
182
263
183
264
try :
184
- return scrapegraph_client .searchscraper (user_prompt )
265
+ return scrapegraph_client .searchscraper (user_prompt , num_results , number_of_scrolls )
266
+ except Exception as e :
267
+ return {"error" : str (e )}
268
+
269
+
270
+ # Add tool for crawl (smartcrawler)
271
+ @mcp .tool ()
272
+ def crawl (
273
+ url : str ,
274
+ prompt : str = None ,
275
+ cache_website : bool = None ,
276
+ depth : int = None ,
277
+ max_pages : int = None ,
278
+ same_domain_only : bool = None ,
279
+ markdown_only : bool = None
280
+ ) -> Dict [str , Any ]:
281
+ """
282
+ Perform intelligent web crawling with AI-powered data extraction.
283
+
284
+ Args:
285
+ url: Starting URL to crawl
286
+ prompt: AI prompt for data extraction (optional, if not provided returns markdown only)
287
+ cache_website: Whether to cache the website content (optional)
288
+ depth: Maximum crawling depth (optional)
289
+ max_pages: Maximum number of pages to crawl (optional)
290
+ same_domain_only: Whether to crawl only within the same domain (optional)
291
+ markdown_only: Whether to return only markdown content without AI processing (optional)
292
+
293
+ Returns:
294
+ Dictionary containing the crawl results
295
+ """
296
+ if scrapegraph_client is None :
297
+ return {"error" : "ScapeGraph client not initialized. Please provide an API key." }
298
+
299
+ try :
300
+ return scrapegraph_client .crawl (
301
+ url = url ,
302
+ prompt = prompt ,
303
+ cache_website = cache_website ,
304
+ depth = depth ,
305
+ max_pages = max_pages ,
306
+ same_domain_only = same_domain_only ,
307
+ markdown_only = markdown_only
308
+ )
185
309
except Exception as e :
186
310
return {"error" : str (e )}
187
311
0 commit comments