diff --git a/.github/workflows/code_checks.yaml b/.github/workflows/code_checks.yaml index e59293c..3374c52 100644 --- a/.github/workflows/code_checks.yaml +++ b/.github/workflows/code_checks.yaml @@ -6,7 +6,7 @@ jobs: code_checks: strategy: matrix: - pyver: ['3.7', '3.8', '3.9', '3.10', '3.11'] + pyver: ['3.8', '3.9', '3.10', '3.11', '3.12'] os: [ ubuntu, macos, windows ] fail-fast: true runs-on: ${{ matrix.os }}-latest diff --git a/README.md b/README.md index eadb562..5dbe177 100644 --- a/README.md +++ b/README.md @@ -58,7 +58,11 @@ Main class of this library. * * * -#### ScrapingAntClient.general_request and ScrapingAntClient.general_request_async +#### Common arguments + - ScrapingAntClient.general_request + - ScrapingAntClient.general_request_async + - ScrapingAntClient.markdown_request + - ScrapingAntClient.markdown_request_async https://docs.scrapingant.com/request-response-format#available-parameters @@ -266,6 +270,20 @@ result = client.general_request( print(result.content) ``` +### Receiving markdown + +```python3 +from scrapingant_client import ScrapingAntClient + +client = ScrapingAntClient(token='') + +# Sending POST request with json data +result = client.markdown_request( + url="https://example.com", +) +print(result.markdown) +``` + ## Useful links - [Scrapingant API doumentation](https://docs.scrapingant.com) diff --git a/scrapingant_client/__init__.py b/scrapingant_client/__init__.py index 337a075..e461b86 100644 --- a/scrapingant_client/__init__.py +++ b/scrapingant_client/__init__.py @@ -1,4 +1,4 @@ -__version__ = "2.0.1" +__version__ = "2.1.0" from scrapingant_client.client import ScrapingAntClient from scrapingant_client.cookie import Cookie diff --git a/scrapingant_client/client.py b/scrapingant_client/client.py index 1c9536e..c67023e 100644 --- a/scrapingant_client/client.py +++ b/scrapingant_client/client.py @@ -17,7 +17,7 @@ ) from scrapingant_client.headers import convert_headers from scrapingant_client.proxy_type import ProxyType -from scrapingant_client.response import Response +from scrapingant_client.response import Response, MarkdownResponse from scrapingant_client.utils import base64_encode_string @@ -60,7 +60,7 @@ def _form_payload( request_data['return_page_source'] = return_page_source return request_data - def _parse_response(self, response_status_code: int, response_data: Dict, url: str) -> Response: + def _check_status_code(self, response_status_code: int, response_data: Dict, url: str) -> None: if response_status_code == 403: raise ScrapingantInvalidTokenException() elif response_status_code == 404: @@ -71,6 +71,8 @@ def _parse_response(self, response_status_code: int, response_data: Dict, url: s raise ScrapingantDetectedException() elif response_status_code == 500: raise ScrapingantInternalException() + + def _parse_extended_response(self, response_data: Dict) -> Response: content = response_data['html'] cookies_string = response_data['cookies'] text = response_data['text'] @@ -80,10 +82,24 @@ def _parse_response(self, response_status_code: int, response_data: Dict, url: s content=content, cookies=cookies_list, text=text, - status_code=status_code + status_code=status_code, + ) + + def _parse_markdown_response(self, response_data: Dict) -> MarkdownResponse: + return MarkdownResponse( + url=response_data['url'], + markdown=response_data['markdown'], ) - def general_request( + def _get_scrapingant_api_url(self, endpoint: Optional[str] = None) -> str: + if endpoint is None or endpoint == 'extended': + return SCRAPINGANT_API_BASE_URL + '/extended' + elif endpoint == 'markdown': + return SCRAPINGANT_API_BASE_URL + '/markdown' + else: + raise ValueError(f'Invalid endpoint: {endpoint}, must be either None or "markdown"') + + def _request( self, url: str, method: str = 'GET', @@ -97,7 +113,8 @@ def general_request( return_page_source: Optional[bool] = None, data=None, json=None, - ) -> Response: + endpoint: Optional[str] = None, + ) -> Dict: request_data = self._form_payload( url=url, cookies=cookies, @@ -111,7 +128,7 @@ def general_request( try: response = self.requests_session.request( method=method, - url=SCRAPINGANT_API_BASE_URL + '/extended', + url=self._get_scrapingant_api_url(endpoint), params=request_data, headers=convert_headers(headers), data=data, @@ -121,10 +138,10 @@ def general_request( raise ScrapingantTimeoutException() response_status_code = response.status_code response_data = response.json() - parsed_response: Response = self._parse_response(response_status_code, response_data, url) - return parsed_response + self._check_status_code(response_status_code, response_data, url) + return response_data - async def general_request_async( + async def _request_async( self, url: str, method: str = 'GET', @@ -138,7 +155,8 @@ async def general_request_async( return_page_source: Optional[bool] = None, data=None, json=None, - ) -> Response: + endpoint: Optional[str] = None, + ) -> Dict: import httpx request_data = self._form_payload( @@ -161,7 +179,7 @@ async def general_request_async( try: response = await client.request( method=method, - url=SCRAPINGANT_API_BASE_URL + '/extended', + url=self._get_scrapingant_api_url(endpoint), params=request_data, headers=convert_headers(headers), data=data, @@ -171,5 +189,21 @@ async def general_request_async( raise ScrapingantTimeoutException() response_status_code = response.status_code response_data = response.json() - parsed_response: Response = self._parse_response(response_status_code, response_data, url) - return parsed_response + self._check_status_code(response_status_code, response_data, url) + return response_data + + def general_request(self, *args, **kwargs) -> Response: + response_data = self._request(*args, **kwargs, endpoint='extended') + return self._parse_extended_response(response_data) + + async def general_request_async(self, *args, **kwargs) -> Response: + response_data = await self._request_async(*args, **kwargs, endpoint='extended') + return self._parse_extended_response(response_data) + + def markdown_request(self, *args, **kwargs) -> MarkdownResponse: + response_data = self._request(*args, **kwargs, endpoint='markdown') + return self._parse_markdown_response(response_data) + + async def markdown_request_async(self, *args, **kwargs) -> MarkdownResponse: + response_data = await self._request_async(*args, **kwargs, endpoint='markdown') + return self._parse_markdown_response(response_data) diff --git a/scrapingant_client/response.py b/scrapingant_client/response.py index 90ab279..252751a 100644 --- a/scrapingant_client/response.py +++ b/scrapingant_client/response.py @@ -9,3 +9,9 @@ def __init__(self, content: str, cookies: List[Cookie], text: str, status_code: self.cookies = cookies self.text = text self.status_code = status_code + + +class MarkdownResponse: + def __init__(self, url: str, markdown: str): + self.url = url + self.markdown = markdown diff --git a/setup.py b/setup.py index ef32b2d..e3aad32 100644 --- a/setup.py +++ b/setup.py @@ -39,7 +39,7 @@ extras_require={ 'dev': [ 'pytest>=7,<8', - 'flake8>=4,<5', + 'flake8>=7,<8', 'responses>=0,<1', 'pytest-httpx>=0,<1', 'pytest-asyncio>=0,<1',