Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/code_checks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ jobs:
code_checks:
strategy:
matrix:
pyver: ['3.7', '3.8', '3.9', '3.10', '3.11']
pyver: ['3.8', '3.9', '3.10', '3.11', '3.12']
os: [ ubuntu, macos, windows ]
fail-fast: true
runs-on: ${{ matrix.os }}-latest
Expand Down
16 changes: 16 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ https://docs.scrapingant.com/request-response-format#available-parameters
| return_page_source | <code>boolean</code> | False |
| data | same as [requests param 'data'](https://requests.readthedocs.io/en/latest/user/quickstart/#more-complicated-post-requests) | None |
| json | same as [requests param 'json'](https://requests.readthedocs.io/en/latest/user/quickstart/#more-complicated-post-requests) | None |
| endpoint | None or 'markdown' | None |

**IMPORTANT NOTE:** <code>js_snippet</code> will be encoded to Base64 automatically by the ScrapingAnt client library.

Expand Down Expand Up @@ -266,6 +267,21 @@ result = client.general_request(
print(result.content)
```

### Receiving markdown

```python3
from scrapingant_client import ScrapingAntClient

client = ScrapingAntClient(token='<YOUR-SCRAPINGANT-API-TOKEN>')

# Sending POST request with json data
result = client.general_request(
url="https://example.com",
endpoint='markdown',
)
print(result.text)
```

## Useful links

- [Scrapingant API doumentation](https://docs.scrapingant.com)
Expand Down
2 changes: 1 addition & 1 deletion scrapingant_client/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = "2.0.1"
__version__ = "2.1.0"

from scrapingant_client.client import ScrapingAntClient
from scrapingant_client.cookie import Cookie
Expand Down
50 changes: 34 additions & 16 deletions scrapingant_client/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def _form_payload(
request_data['return_page_source'] = return_page_source
return request_data

def _parse_response(self, response_status_code: int, response_data: Dict, url: str) -> Response:
def _parse_response(self, response_status_code: int, response_data: Dict, url: str, endpoint: str) -> Response:
if response_status_code == 403:
raise ScrapingantInvalidTokenException()
elif response_status_code == 404:
Expand All @@ -71,17 +71,33 @@ def _parse_response(self, response_status_code: int, response_data: Dict, url: s
raise ScrapingantDetectedException()
elif response_status_code == 500:
raise ScrapingantInternalException()
content = response_data['html']
cookies_string = response_data['cookies']
text = response_data['text']
status_code = response_data['status_code']
cookies_list = cookies_list_from_string(cookies_string)
return Response(
content=content,
cookies=cookies_list,
text=text,
status_code=status_code
)
if endpoint is None or endpoint == 'extended':
content = response_data['html']
cookies_string = response_data['cookies']
text = response_data['text']
status_code = response_data['status_code']
cookies_list = cookies_list_from_string(cookies_string)
return Response(
content=content,
cookies=cookies_list,
text=text,
status_code=status_code
)
elif endpoint == 'markdown':
return Response(
content='',
cookies=[],
text=response_data['markdown'],
status_code=0,
)

def _get_scrapingant_api_url(self, endpoint: Optional[str] = None) -> str:
if endpoint is None or endpoint == 'extended':
return SCRAPINGANT_API_BASE_URL + '/extended'
elif endpoint == 'markdown':
return SCRAPINGANT_API_BASE_URL + '/markdown'
else:
raise ValueError(f'Invalid endpoint: {endpoint}, must be either None or "markdown"')

def general_request(
self,
Expand All @@ -97,6 +113,7 @@ def general_request(
return_page_source: Optional[bool] = None,
data=None,
json=None,
endpoint: Optional[str] = None,
) -> Response:
request_data = self._form_payload(
url=url,
Expand All @@ -111,7 +128,7 @@ def general_request(
try:
response = self.requests_session.request(
method=method,
url=SCRAPINGANT_API_BASE_URL + '/extended',
url=self._get_scrapingant_api_url(endpoint),
params=request_data,
headers=convert_headers(headers),
data=data,
Expand All @@ -121,7 +138,7 @@ def general_request(
raise ScrapingantTimeoutException()
response_status_code = response.status_code
response_data = response.json()
parsed_response: Response = self._parse_response(response_status_code, response_data, url)
parsed_response: Response = self._parse_response(response_status_code, response_data, url, endpoint)
return parsed_response

async def general_request_async(
Expand All @@ -138,6 +155,7 @@ async def general_request_async(
return_page_source: Optional[bool] = None,
data=None,
json=None,
endpoint: Optional[str] = None,
) -> Response:
import httpx

Expand All @@ -161,7 +179,7 @@ async def general_request_async(
try:
response = await client.request(
method=method,
url=SCRAPINGANT_API_BASE_URL + '/extended',
url=self._get_scrapingant_api_url(endpoint),
params=request_data,
headers=convert_headers(headers),
data=data,
Expand All @@ -171,5 +189,5 @@ async def general_request_async(
raise ScrapingantTimeoutException()
response_status_code = response.status_code
response_data = response.json()
parsed_response: Response = self._parse_response(response_status_code, response_data, url)
parsed_response: Response = self._parse_response(response_status_code, response_data, url, endpoint)
return parsed_response
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
extras_require={
'dev': [
'pytest>=7,<8',
'flake8>=4,<5',
'flake8>=7,<8',
'responses>=0,<1',
'pytest-httpx>=0,<1',
'pytest-asyncio>=0,<1',
Expand Down