Skip to content

Commit e4060e0

Browse files
authored
Merge pull request #29 from ScrapingAnt/feature/issue28-handle-timeouts
feature/issue28-handle-timeouts: done
2 parents 05f3948 + b4df603 commit e4060e0

File tree

6 files changed

+74
-41
lines changed

6 files changed

+74
-41
lines changed

README.md

+29-28
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,8 @@ All public classes, methods and their parameters can be inspected in this API re
5252

5353
Main class of this library.
5454

55-
| Param | Type |
56-
| --- | --- |
55+
| Param | Type |
56+
|-------|---------------------|
5757
| token | <code>string</code> |
5858

5959
* * *
@@ -62,17 +62,17 @@ Main class of this library.
6262

6363
https://docs.scrapingant.com/request-response-format#available-parameters
6464

65-
| Param | Type | Default |
66-
| --- | --- | --- |
67-
| url | <code>string</code> | |
68-
| cookies | <code>List[Cookie]</code> | None |
69-
| headers | <code>List[Dict[str, str]]</code> | None |
70-
| js_snippet | <code>string</code> | None |
71-
| proxy_type | <code>ProxyType</code> | datacenter |
72-
| proxy_country | <code>str</code> | None |
73-
| return_text | <code>boolean</code> | False |
74-
| wait_for_selector | <code>str</code> | None |
75-
| browser | <code>boolean</code> | True |
65+
| Param | Type | Default |
66+
|-------------------|-----------------------------------|------------|
67+
| url | <code>string</code> | |
68+
| cookies | <code>List[Cookie]</code> | None |
69+
| headers | <code>List[Dict[str, str]]</code> | None |
70+
| js_snippet | <code>string</code> | None |
71+
| proxy_type | <code>ProxyType</code> | datacenter |
72+
| proxy_country | <code>str</code> | None |
73+
| return_text | <code>boolean</code> | False |
74+
| wait_for_selector | <code>str</code> | None |
75+
| browser | <code>boolean</code> | True |
7676

7777
**IMPORTANT NOTE:** <code>js_snippet</code> will be encoded to Base64 automatically by the ScrapingAnt client library.
7878

@@ -82,9 +82,9 @@ https://docs.scrapingant.com/request-response-format#available-parameters
8282

8383
Class defining cookie. Currently it supports only name and value
8484

85-
| Param | Type |
86-
| --- | --- |
87-
| name | <code>string</code> |
85+
| Param | Type |
86+
|-------|---------------------|
87+
| name | <code>string</code> |
8888
| value | <code>string</code> |
8989

9090
* * *
@@ -93,23 +93,24 @@ Class defining cookie. Currently it supports only name and value
9393

9494
Class defining response from API.
9595

96-
| Param | Type |
97-
| --- | --- |
98-
| content | <code>string</code> |
99-
| cookies | <code>List[Cookie]</code> |
100-
| status_code | <code>int</code> |
96+
| Param | Type |
97+
|-------------|---------------------------|
98+
| content | <code>string</code> |
99+
| cookies | <code>List[Cookie]</code> |
100+
| status_code | <code>int</code> |
101101

102102
## Exceptions
103103

104104
`ScrapingantClientException` is base Exception class, used for all errors.
105105

106-
| Exception | Reason |
107-
| --- | --- |
108-
| ScrapingantInvalidTokenException | The API token is wrong or you have exceeded the API calls request limit
109-
| ScrapingantInvalidInputException | Invalid value provided. Please, look into error message for more info |
110-
| ScrapingantInternalException | Something went wrong with the server side code. Try again later or contact ScrapingAnt support |
111-
| ScrapingantSiteNotReachableException | The requested URL is not reachable. Please, check it locally |
112-
| ScrapingantDetectedException | The anti-bot detection system has detected the request. Please, retry or change the request settings. |
106+
| Exception | Reason |
107+
|--------------------------------------|------------------------------------------------------------------------------------------------------------------------------|
108+
| ScrapingantInvalidTokenException | The API token is wrong or you have exceeded the API calls request limit |
109+
| ScrapingantInvalidInputException | Invalid value provided. Please, look into error message for more info |
110+
| ScrapingantInternalException | Something went wrong with the server side code. Try again later or contact ScrapingAnt support |
111+
| ScrapingantSiteNotReachableException | The requested URL is not reachable. Please, check it locally |
112+
| ScrapingantDetectedException | The anti-bot detection system has detected the request. Please, retry or change the request settings. |
113+
| ScrapingantTimeoutException | Got timeout while communicating with Scrapingant servers. Check your network connection. Please try later or contact support |
113114

114115
* * *
115116

scrapingant_client/__init__.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
__version__ = "1.0.0"
1+
__version__ = "1.0.1"
22

33
from scrapingant_client.client import ScrapingAntClient
44
from scrapingant_client.cookie import Cookie
@@ -9,6 +9,7 @@
99
ScrapingantInternalException,
1010
ScrapingantSiteNotReachableException,
1111
ScrapingantDetectedException,
12+
ScrapingantTimeoutException,
1213
)
1314
from scrapingant_client.proxy_type import ProxyType
1415
from scrapingant_client.response import Response
@@ -23,5 +24,6 @@
2324
'ScrapingantInternalException',
2425
'ScrapingantSiteNotReachableException',
2526
'ScrapingantDetectedException',
27+
'ScrapingantTimeoutException',
2628
'Response',
2729
]

scrapingant_client/client.py

+22-12
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,15 @@
55
import requests
66

77
import scrapingant_client
8-
from scrapingant_client.constants import SCRAPINGANT_API_BASE_URL
8+
from scrapingant_client.constants import SCRAPINGANT_API_BASE_URL, TIMEOUT_SECONDS
99
from scrapingant_client.cookie import Cookie, cookies_list_to_string, cookies_list_from_string
1010
from scrapingant_client.errors import (
1111
ScrapingantInvalidTokenException,
1212
ScrapingantInvalidInputException,
1313
ScrapingantInternalException,
1414
ScrapingantSiteNotReachableException,
1515
ScrapingantDetectedException,
16+
ScrapingantTimeoutException,
1617
)
1718
from scrapingant_client.headers import convert_headers
1819
from scrapingant_client.proxy_type import ProxyType
@@ -100,11 +101,15 @@ def general_request(
100101
wait_for_selector=wait_for_selector,
101102
browser=browser,
102103
)
103-
response = self.requests_session.post(
104-
SCRAPINGANT_API_BASE_URL + '/general',
105-
json=request_data,
106-
headers=convert_headers(headers),
107-
)
104+
try:
105+
response = self.requests_session.post(
106+
SCRAPINGANT_API_BASE_URL + '/general',
107+
json=request_data,
108+
headers=convert_headers(headers),
109+
timeout=TIMEOUT_SECONDS
110+
)
111+
except requests.exceptions.Timeout:
112+
raise ScrapingantTimeoutException()
108113
response_status_code = response.status_code
109114
response_data = response.json()
110115
parsed_response: Response = self._parse_response(response_status_code, response_data, url)
@@ -138,13 +143,18 @@ async def general_request_async(
138143
headers={
139144
'x-api-key': self.token,
140145
'User-Agent': self.user_agent,
141-
}
146+
},
147+
timeout=TIMEOUT_SECONDS,
142148
) as client:
143-
response = await client.post(
144-
SCRAPINGANT_API_BASE_URL + '/general',
145-
json=request_data,
146-
headers=convert_headers(headers),
147-
)
149+
try:
150+
response = await client.post(
151+
SCRAPINGANT_API_BASE_URL + '/general',
152+
json=request_data,
153+
headers=convert_headers(headers),
154+
)
155+
except httpx.TimeoutException:
156+
raise ScrapingantTimeoutException()
157+
148158
response_status_code = response.status_code
149159
response_data = response.json()
150160
parsed_response: Response = self._parse_response(response_status_code, response_data, url)

scrapingant_client/constants.py

+1
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
SCRAPINGANT_API_BASE_URL = 'https://api.scrapingant.com/v1'
2+
TIMEOUT_SECONDS = 120

scrapingant_client/errors.py

+7
Original file line numberDiff line numberDiff line change
@@ -30,3 +30,10 @@ class ScrapingantInternalException(ScrapingantClientException):
3030
def __init__(self):
3131
message = 'Something went wrong with the server side. Please try later or contact support'
3232
super().__init__(message)
33+
34+
35+
class ScrapingantTimeoutException(ScrapingantClientException):
36+
def __init__(self):
37+
message = 'Got timeout while communicating with Scrapingant servers.' \
38+
' Check your network connection. Please try later or contact support'
39+
super().__init__(message)

tests/test_exceptions.py

+12
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import pytest
2+
import requests
23
import responses
34

45
from scrapingant_client import (
@@ -8,6 +9,7 @@
89
ScrapingantInternalException,
910
ScrapingantSiteNotReachableException,
1011
ScrapingantDetectedException,
12+
ScrapingantTimeoutException,
1113
)
1214
from scrapingant_client.constants import SCRAPINGANT_API_BASE_URL
1315

@@ -58,3 +60,13 @@ def test_detected():
5860
with pytest.raises(ScrapingantDetectedException) as e:
5961
client.general_request('example.com')
6062
assert 'The anti-bot detection system has detected the request' in str(e)
63+
64+
65+
@responses.activate
66+
def test_timeout():
67+
responses.add(responses.POST, SCRAPINGANT_API_BASE_URL + '/general',
68+
body=requests.exceptions.ReadTimeout())
69+
client = ScrapingAntClient(token='some_token')
70+
with pytest.raises(ScrapingantTimeoutException) as e:
71+
client.general_request('example.com')
72+
assert 'Got timeout' in str(e)

0 commit comments

Comments
 (0)