@@ -32,7 +32,7 @@ def process_request(self, request, spider):
3232 return None
3333
3434 if self .proxycrawl_url not in request .url :
35- new_url = self ._get_proxied_url (request )
35+ new_url = self ._get_proxied_url (request . url , request . query_params_str )
3636 log .debug ('Using ProxyCrawl API, Request overridden with URL: {}' .format (new_url ))
3737 return request .replace (url = new_url )
3838
@@ -46,15 +46,18 @@ def process_response(self, request, response, spider):
4646 log .debug ('Using ProxyCrawl API, Response overridden with URL: {}' .format (request .original_url ))
4747 return response .replace (url = request .original_url )
4848
49- def _get_proxied_url (self , request ):
50- original_url_encoded = quote_plus (request .url , safe = '' )
49+ def _get_proxied_url (self , url , query_params ):
50+ """
51+ Transform the url into a call to proxy crawl api, sending the target url as query parameter.
52+ """
53+ original_url_encoded = quote_plus (url , safe = '' )
5154 proxycrawl_url = self .proxycrawl_url
5255 proxycrawl_token = self .proxycrawl_token
53- proxycrawl_query_params = request . query_params_str # 'country=US&device=desktop&page_wait=5000&ajax_wait=true'
56+ proxycrawl_query_params = query_params
5457 proxied_url = '{}/?token={}&{}&url={}' .format (
5558 proxycrawl_url ,
5659 proxycrawl_token ,
5760 proxycrawl_query_params ,
5861 original_url_encoded
5962 )
60- return proxied_url
63+ return proxied_url
0 commit comments