diff --git a/custom_components/multiscrape/coordinator.py b/custom_components/multiscrape/coordinator.py index 4e4138a..6b2e742 100644 --- a/custom_components/multiscrape/coordinator.py +++ b/custom_components/multiscrape/coordinator.py @@ -52,6 +52,7 @@ def __init__( self._http = http self._form_submitter = form self._resource_renderer = resource_renderer + self._cookies = None self._form_variables = {} def notify_scrape_exception(self): @@ -65,7 +66,7 @@ async def get_content(self) -> str: if self._form_submitter: try: - result = await self._form_submitter.async_submit(resource) + result, self._cookies = await self._form_submitter.async_submit(resource) self._form_variables = self._form_submitter.scrape_variables() if result: @@ -81,7 +82,7 @@ async def get_content(self) -> str: ex, ) - response = await self._http.async_request("page", resource, variables=self._form_variables) + response = await self._http.async_request("page", resource, cookies=self._cookies, variables=self._form_variables) return response.text @property diff --git a/custom_components/multiscrape/form.py b/custom_components/multiscrape/form.py index a53481b..0f54d18 100644 --- a/custom_components/multiscrape/form.py +++ b/custom_components/multiscrape/form.py @@ -87,6 +87,7 @@ def __init__( self._scraper = scraper self._parser = parser self._should_submit = True + self._cookies = None def notify_scrape_exception(self): """Make sure form is re-submitted after an exception.""" @@ -152,6 +153,7 @@ async def async_submit(self, main_resource): submit_resource, method=method, request_data=input_fields, + cookies=self._cookies ) _LOGGER.debug( "%s # Form seems to be submitted successfully (to be sure, use log_response and check file). Now continuing to retrieve target page.", @@ -165,9 +167,9 @@ async def async_submit(self, main_resource): await self._scraper.set_content(response.text) if not self._form_resource: - return response.text + return response.text, response.cookies else: - return None + return None, response.cookies def scrape_variables(self): """Scrape header mappings.""" @@ -203,6 +205,7 @@ async def _fetch_form_page(self, resource): resource, "GET", ) + self._cookies = response.cookies return response.text def _get_input_fields(self, form): diff --git a/custom_components/multiscrape/http.py b/custom_components/multiscrape/http.py index 6ed772c..4e94b0c 100644 --- a/custom_components/multiscrape/http.py +++ b/custom_components/multiscrape/http.py @@ -1,4 +1,5 @@ """HTTP request related functionality.""" +import asyncio import logging from collections.abc import Callable @@ -80,7 +81,7 @@ def set_authentication(self, username, password, auth_type): _LOGGER.debug( "%s # Authentication configuration processed", self._config_name) - async def async_request(self, context, resource, method=None, request_data=None, variables: dict = {}): + async def async_request(self, context, resource, method=None, request_data=None, cookies=None, variables: dict = {}): """Execute a HTTP request.""" data = request_data or self._data_renderer(variables) method = method or self._method or "GET" @@ -88,16 +89,19 @@ async def async_request(self, context, resource, method=None, request_data=None, params = self._params_renderer(variables) _LOGGER.debug( - "%s # Executing %s-request with a %s to url: %s with headers: %s.", + "%s # Executing %s-request with a %s to url: %s with headers: %s and cookies: %s.", self._config_name, context, method, resource, headers, + cookies ) if self._file_manager: - await self._async_file_log("request_headers", context, headers) - await self._async_file_log("request_body", context, data) + task1 = self._async_file_log("request_headers", context, headers) + task2 = self._async_file_log("request_body", context, data) + task3 = self._async_file_log("request_cookies", context, cookies) + await asyncio.gather(task1, task2, task3) response = None @@ -111,6 +115,7 @@ async def async_request(self, context, resource, method=None, request_data=None, data=data, timeout=self._timeout, follow_redirects=True, + cookies=cookies ) _LOGGER.debug( @@ -119,10 +124,12 @@ async def async_request(self, context, resource, method=None, request_data=None, response.status_code, ) if self._file_manager: - await self._async_file_log( + task1 = self._async_file_log( "response_headers", context, response.headers ) - await self._async_file_log("response_body", context, response.text) + task2 = self._async_file_log("response_body", context, response.text) + task3 = self._async_file_log("response_cookies", context, response.cookies) + await asyncio.gather(task1, task2, task3) # bit of a hack since httpx also raises an exception for redirects: https://github.com/encode/httpx/blob/c6c8cb1fe2da9380f8046a19cdd5aade586f69c8/CHANGELOG.md#0200-13th-october-2021 if 400 <= response.status_code <= 599: @@ -162,36 +169,42 @@ async def async_request(self, context, resource, method=None, request_data=None, async def _handle_request_exception(self, context, response): try: if self._file_manager: - await self._async_file_log( + task1 = self._async_file_log( "response_headers_error", context, response.headers ) - await self._async_file_log( + task2 = self._async_file_log( "response_body_error", context, response.text ) + task3 = self._async_file_log( + "response_cookies_error", context, response.cookies + ) + await asyncio.gather(task1, task2, task3) except Exception as exc: _LOGGER.debug( - "%s # Unable to write headers and body to files during handling of exception.\n Error message:\n %s", + "%s # Unable to write headers, cookies and/or body to file during handling of exception.\n Error message:\n %s", self._config_name, repr(exc), ) async def _async_file_log(self, content_name, context, content): - try: - filename = f"{context}_{content_name}.txt" - await self._hass.async_add_executor_job( - self._file_manager.write, filename, content - ) - except Exception as ex: - _LOGGER.error( - "%s # Unable to write %s to file: %s. \nException: %s", + """Write content to a file if content is not None.""" + if content is not None: + try: + filename = f"{context}_{content_name}.txt" + await self._hass.async_add_executor_job( + self._file_manager.write, filename, content + ) + except Exception as ex: + _LOGGER.error( + "%s # Unable to write %s to file: %s. \nException: %s", + self._config_name, + content_name, + filename, + ex, + ) + _LOGGER.debug( + "%s # %s written to file: %s", self._config_name, content_name, filename, - ex, ) - _LOGGER.debug( - "%s # %s written to file: %s", - self._config_name, - content_name, - filename, - )