Skip to content

feature/added-markdown: done #36

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jul 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/code_checks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ jobs:
code_checks:
strategy:
matrix:
pyver: ['3.7', '3.8', '3.9', '3.10', '3.11']
pyver: ['3.8', '3.9', '3.10', '3.11', '3.12']
os: [ ubuntu, macos, windows ]
fail-fast: true
runs-on: ${{ matrix.os }}-latest
Expand Down
20 changes: 19 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,11 @@ Main class of this library.

* * *

#### ScrapingAntClient.general_request and ScrapingAntClient.general_request_async
#### Common arguments
- ScrapingAntClient.general_request
- ScrapingAntClient.general_request_async
- ScrapingAntClient.markdown_request
- ScrapingAntClient.markdown_request_async

https://docs.scrapingant.com/request-response-format#available-parameters

Expand Down Expand Up @@ -266,6 +270,20 @@ result = client.general_request(
print(result.content)
```

### Receiving markdown

```python3
from scrapingant_client import ScrapingAntClient

client = ScrapingAntClient(token='<YOUR-SCRAPINGANT-API-TOKEN>')

# Sending POST request with json data
result = client.markdown_request(
url="https://example.com",
)
print(result.markdown)
```

## Useful links

- [Scrapingant API doumentation](https://docs.scrapingant.com)
Expand Down
2 changes: 1 addition & 1 deletion scrapingant_client/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = "2.0.1"
__version__ = "2.1.0"

from scrapingant_client.client import ScrapingAntClient
from scrapingant_client.cookie import Cookie
Expand Down
60 changes: 47 additions & 13 deletions scrapingant_client/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
)
from scrapingant_client.headers import convert_headers
from scrapingant_client.proxy_type import ProxyType
from scrapingant_client.response import Response
from scrapingant_client.response import Response, MarkdownResponse
from scrapingant_client.utils import base64_encode_string


Expand Down Expand Up @@ -60,7 +60,7 @@ def _form_payload(
request_data['return_page_source'] = return_page_source
return request_data

def _parse_response(self, response_status_code: int, response_data: Dict, url: str) -> Response:
def _check_status_code(self, response_status_code: int, response_data: Dict, url: str) -> None:
if response_status_code == 403:
raise ScrapingantInvalidTokenException()
elif response_status_code == 404:
Expand All @@ -71,6 +71,8 @@ def _parse_response(self, response_status_code: int, response_data: Dict, url: s
raise ScrapingantDetectedException()
elif response_status_code == 500:
raise ScrapingantInternalException()

def _parse_extended_response(self, response_data: Dict) -> Response:
content = response_data['html']
cookies_string = response_data['cookies']
text = response_data['text']
Expand All @@ -80,10 +82,24 @@ def _parse_response(self, response_status_code: int, response_data: Dict, url: s
content=content,
cookies=cookies_list,
text=text,
status_code=status_code
status_code=status_code,
)

def _parse_markdown_response(self, response_data: Dict) -> MarkdownResponse:
return MarkdownResponse(
url=response_data['url'],
markdown=response_data['markdown'],
)

def general_request(
def _get_scrapingant_api_url(self, endpoint: Optional[str] = None) -> str:
if endpoint is None or endpoint == 'extended':
return SCRAPINGANT_API_BASE_URL + '/extended'
elif endpoint == 'markdown':
return SCRAPINGANT_API_BASE_URL + '/markdown'
else:
raise ValueError(f'Invalid endpoint: {endpoint}, must be either None or "markdown"')

def _request(
self,
url: str,
method: str = 'GET',
Expand All @@ -97,7 +113,8 @@ def general_request(
return_page_source: Optional[bool] = None,
data=None,
json=None,
) -> Response:
endpoint: Optional[str] = None,
) -> Dict:
request_data = self._form_payload(
url=url,
cookies=cookies,
Expand All @@ -111,7 +128,7 @@ def general_request(
try:
response = self.requests_session.request(
method=method,
url=SCRAPINGANT_API_BASE_URL + '/extended',
url=self._get_scrapingant_api_url(endpoint),
params=request_data,
headers=convert_headers(headers),
data=data,
Expand All @@ -121,10 +138,10 @@ def general_request(
raise ScrapingantTimeoutException()
response_status_code = response.status_code
response_data = response.json()
parsed_response: Response = self._parse_response(response_status_code, response_data, url)
return parsed_response
self._check_status_code(response_status_code, response_data, url)
return response_data

async def general_request_async(
async def _request_async(
self,
url: str,
method: str = 'GET',
Expand All @@ -138,7 +155,8 @@ async def general_request_async(
return_page_source: Optional[bool] = None,
data=None,
json=None,
) -> Response:
endpoint: Optional[str] = None,
) -> Dict:
import httpx

request_data = self._form_payload(
Expand All @@ -161,7 +179,7 @@ async def general_request_async(
try:
response = await client.request(
method=method,
url=SCRAPINGANT_API_BASE_URL + '/extended',
url=self._get_scrapingant_api_url(endpoint),
params=request_data,
headers=convert_headers(headers),
data=data,
Expand All @@ -171,5 +189,21 @@ async def general_request_async(
raise ScrapingantTimeoutException()
response_status_code = response.status_code
response_data = response.json()
parsed_response: Response = self._parse_response(response_status_code, response_data, url)
return parsed_response
self._check_status_code(response_status_code, response_data, url)
return response_data

def general_request(self, *args, **kwargs) -> Response:
response_data = self._request(*args, **kwargs, endpoint='extended')
return self._parse_extended_response(response_data)

async def general_request_async(self, *args, **kwargs) -> Response:
response_data = await self._request_async(*args, **kwargs, endpoint='extended')
return self._parse_extended_response(response_data)

def markdown_request(self, *args, **kwargs) -> MarkdownResponse:
response_data = self._request(*args, **kwargs, endpoint='markdown')
return self._parse_markdown_response(response_data)

async def markdown_request_async(self, *args, **kwargs) -> MarkdownResponse:
response_data = await self._request_async(*args, **kwargs, endpoint='markdown')
return self._parse_markdown_response(response_data)
6 changes: 6 additions & 0 deletions scrapingant_client/response.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,9 @@ def __init__(self, content: str, cookies: List[Cookie], text: str, status_code:
self.cookies = cookies
self.text = text
self.status_code = status_code


class MarkdownResponse:
def __init__(self, url: str, markdown: str):
self.url = url
self.markdown = markdown
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
extras_require={
'dev': [
'pytest>=7,<8',
'flake8>=4,<5',
'flake8>=7,<8',
'responses>=0,<1',
'pytest-httpx>=0,<1',
'pytest-asyncio>=0,<1',
Expand Down
Loading