@@ -56,7 +56,7 @@ def _form_payload(
56
56
request_data ['browser' ] = browser
57
57
return request_data
58
58
59
- def _parse_response (self , response_status_code : int , response_data : Dict , url : str ) -> Response :
59
+ def _parse_response (self , response_status_code : int , response_data : Dict , url : str , endpoint : str ) -> Response :
60
60
if response_status_code == 403 :
61
61
raise ScrapingantInvalidTokenException ()
62
62
elif response_status_code == 404 :
@@ -67,17 +67,33 @@ def _parse_response(self, response_status_code: int, response_data: Dict, url: s
67
67
raise ScrapingantDetectedException ()
68
68
elif response_status_code == 500 :
69
69
raise ScrapingantInternalException ()
70
- content = response_data ['html' ]
71
- cookies_string = response_data ['cookies' ]
72
- text = response_data ['text' ]
73
- status_code = response_data ['status_code' ]
74
- cookies_list = cookies_list_from_string (cookies_string )
75
- return Response (
76
- content = content ,
77
- cookies = cookies_list ,
78
- text = text ,
79
- status_code = status_code
80
- )
70
+ if endpoint is None or endpoint == 'extended' :
71
+ content = response_data ['html' ]
72
+ cookies_string = response_data ['cookies' ]
73
+ text = response_data ['text' ]
74
+ status_code = response_data ['status_code' ]
75
+ cookies_list = cookies_list_from_string (cookies_string )
76
+ return Response (
77
+ content = content ,
78
+ cookies = cookies_list ,
79
+ text = text ,
80
+ status_code = status_code
81
+ )
82
+ elif endpoint == 'markdown' :
83
+ return Response (
84
+ content = '' ,
85
+ cookies = [],
86
+ text = response_data ['markdown' ],
87
+ status_code = 0 ,
88
+ )
89
+
90
+ def _get_scrapingant_api_url (self , endpoint : Optional [str ] = None ) -> str :
91
+ if endpoint is None or endpoint == 'extended' :
92
+ return SCRAPINGANT_API_BASE_URL + '/extended'
93
+ elif endpoint == 'markdown' :
94
+ return SCRAPINGANT_API_BASE_URL + '/markdown'
95
+ else :
96
+ raise ValueError (f'Invalid endpoint: { endpoint } , must be either None or "markdown"' )
81
97
82
98
def general_request (
83
99
self ,
@@ -92,6 +108,7 @@ def general_request(
92
108
browser : bool = True ,
93
109
data = None ,
94
110
json = None ,
111
+ endpoint : Optional [str ] = None ,
95
112
) -> Response :
96
113
request_data = self ._form_payload (
97
114
url = url ,
@@ -105,7 +122,7 @@ def general_request(
105
122
try :
106
123
response = self .requests_session .request (
107
124
method = method ,
108
- url = SCRAPINGANT_API_BASE_URL + '/extended' ,
125
+ url = self . _get_scrapingant_api_url ( endpoint ) ,
109
126
params = request_data ,
110
127
headers = convert_headers (headers ),
111
128
data = data ,
@@ -115,7 +132,7 @@ def general_request(
115
132
raise ScrapingantTimeoutException ()
116
133
response_status_code = response .status_code
117
134
response_data = response .json ()
118
- parsed_response : Response = self ._parse_response (response_status_code , response_data , url )
135
+ parsed_response : Response = self ._parse_response (response_status_code , response_data , url , endpoint )
119
136
return parsed_response
120
137
121
138
async def general_request_async (
@@ -131,6 +148,7 @@ async def general_request_async(
131
148
browser : bool = True ,
132
149
data = None ,
133
150
json = None ,
151
+ endpoint : Optional [str ] = None ,
134
152
) -> Response :
135
153
import httpx
136
154
@@ -153,7 +171,7 @@ async def general_request_async(
153
171
try :
154
172
response = await client .request (
155
173
method = method ,
156
- url = SCRAPINGANT_API_BASE_URL + '/extended' ,
174
+ url = self . _get_scrapingant_api_url ( endpoint ) ,
157
175
params = request_data ,
158
176
headers = convert_headers (headers ),
159
177
data = data ,
@@ -163,5 +181,5 @@ async def general_request_async(
163
181
raise ScrapingantTimeoutException ()
164
182
response_status_code = response .status_code
165
183
response_data = response .json ()
166
- parsed_response : Response = self ._parse_response (response_status_code , response_data , url )
184
+ parsed_response : Response = self ._parse_response (response_status_code , response_data , url , endpoint )
167
185
return parsed_response
0 commit comments