Skip to content

Commit 0e755cb

Browse files
committed
(33-add-return_page_source-parameter-for-trimming-browser-html: done
1 parent 263d9d4 commit 0e755cb

File tree

4 files changed

+29
-16
lines changed

4 files changed

+29
-16
lines changed

Diff for: README.md

+14-13
Original file line numberDiff line numberDiff line change
@@ -62,19 +62,20 @@ Main class of this library.
6262

6363
https://docs.scrapingant.com/request-response-format#available-parameters
6464

65-
| Param | Type | Default |
66-
|-------------------|----------------------------------------------------------------------------------------------------------------------------|------------|
67-
| url | <code>string</code> | |
68-
| method | <code>string</code> | GET |
69-
| cookies | <code>List[Cookie]</code> | None |
70-
| headers | <code>List[Dict[str, str]]</code> | None |
71-
| js_snippet | <code>string</code> | None |
72-
| proxy_type | <code>ProxyType</code> | datacenter |
73-
| proxy_country | <code>str</code> | None |
74-
| wait_for_selector | <code>str</code> | None |
75-
| browser | <code>boolean</code> | True |
76-
| data | same as [requests param 'data'](https://requests.readthedocs.io/en/latest/user/quickstart/#more-complicated-post-requests) | None |
77-
| json | same as [requests param 'json'](https://requests.readthedocs.io/en/latest/user/quickstart/#more-complicated-post-requests) | None |
65+
| Param | Type | Default |
66+
|---------------------|----------------------------------------------------------------------------------------------------------------------------|------------|
67+
| url | <code>string</code> | |
68+
| method | <code>string</code> | GET |
69+
| cookies | <code>List[Cookie]</code> | None |
70+
| headers | <code>List[Dict[str, str]]</code> | None |
71+
| js_snippet | <code>string</code> | None |
72+
| proxy_type | <code>ProxyType</code> | datacenter |
73+
| proxy_country | <code>str</code> | None |
74+
| wait_for_selector | <code>str</code> | None |
75+
| browser | <code>boolean</code> | True |
76+
| return_page_source | <code>boolean</code> | False |
77+
| data | same as [requests param 'data'](https://requests.readthedocs.io/en/latest/user/quickstart/#more-complicated-post-requests) | None |
78+
| json | same as [requests param 'json'](https://requests.readthedocs.io/en/latest/user/quickstart/#more-complicated-post-requests) | None |
7879

7980
**IMPORTANT NOTE:** <code>js_snippet</code> will be encoded to Base64 automatically by the ScrapingAnt client library.
8081

Diff for: scrapingant_client/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
__version__ = "2.0.0"
1+
__version__ = "2.0.1"
22

33
from scrapingant_client.client import ScrapingAntClient
44
from scrapingant_client.cookie import Cookie

Diff for: scrapingant_client/client.py

+8
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ def _form_payload(
4141
proxy_country: Optional[str] = None,
4242
wait_for_selector: Optional[str] = None,
4343
browser: bool = True,
44+
return_page_source: Optional[bool] = None,
4445
) -> Dict:
4546
request_data = {'url': url}
4647
if cookies is not None:
@@ -54,6 +55,9 @@ def _form_payload(
5455
if wait_for_selector is not None:
5556
request_data['wait_for_selector'] = wait_for_selector
5657
request_data['browser'] = browser
58+
if return_page_source:
59+
assert browser, 'return_page_source can only be used with browser=True'
60+
request_data['return_page_source'] = return_page_source
5761
return request_data
5862

5963
def _parse_response(self, response_status_code: int, response_data: Dict, url: str) -> Response:
@@ -90,6 +94,7 @@ def general_request(
9094
proxy_country: Optional[str] = None,
9195
wait_for_selector: Optional[str] = None,
9296
browser: bool = True,
97+
return_page_source: Optional[bool] = None,
9398
data=None,
9499
json=None,
95100
) -> Response:
@@ -101,6 +106,7 @@ def general_request(
101106
proxy_country=proxy_country,
102107
wait_for_selector=wait_for_selector,
103108
browser=browser,
109+
return_page_source=return_page_source,
104110
)
105111
try:
106112
response = self.requests_session.request(
@@ -129,6 +135,7 @@ async def general_request_async(
129135
proxy_country: Optional[str] = None,
130136
wait_for_selector: Optional[str] = None,
131137
browser: bool = True,
138+
return_page_source: Optional[bool] = None,
132139
data=None,
133140
json=None,
134141
) -> Response:
@@ -142,6 +149,7 @@ async def general_request_async(
142149
proxy_country=proxy_country,
143150
wait_for_selector=wait_for_selector,
144151
browser=browser,
152+
return_page_source=return_page_source,
145153
)
146154
async with httpx.AsyncClient(
147155
headers={

Diff for: tests/test_integration.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@ def test_integration():
2121
'&proxy_type=datacenter'
2222
'&proxy_country=test_country'
2323
'&wait_for_selector=test_selector'
24-
'&browser=True',
24+
'&browser=True'
25+
'&return_page_source=True',
2526
json={
2627
"html": "test_content",
2728
"cookies": "test_key1=test_value1;test_key2=test_value2",
@@ -40,6 +41,7 @@ def test_integration():
4041
proxy_country='test_country',
4142
wait_for_selector='test_selector',
4243
browser=True,
44+
return_page_source=True,
4345
)
4446
expected = {
4547
'content': 'test_content',
@@ -68,7 +70,8 @@ async def test_integration_async(httpx_mock: HTTPXMock):
6870
'&proxy_type=datacenter'
6971
'&proxy_country=test_country'
7072
'&wait_for_selector=test_selector'
71-
'&browser=true',
73+
'&browser=true'
74+
'&return_page_source=true',
7275
json={
7376
"html": "test_content",
7477
"cookies": "test_key1=test_value1;test_key2=test_value2",
@@ -87,6 +90,7 @@ async def test_integration_async(httpx_mock: HTTPXMock):
8790
proxy_country='test_country',
8891
wait_for_selector='test_selector',
8992
browser=True,
93+
return_page_source=True,
9094
)
9195
expected = {
9296
'content': 'test_content',

0 commit comments

Comments
 (0)