From 2b930628984174aa1a6d4652be15fe367e0f4288 Mon Sep 17 00:00:00 2001 From: Milan Jeremic Date: Sat, 3 Feb 2024 19:02:41 +0000 Subject: [PATCH 01/32] [Initial merge with master after manual rebase] Adding support for header mappings in form submit. --- README.md | 45 +++++++++++++++-------- custom_components/multiscrape/__init__.py | 12 +++++- custom_components/multiscrape/const.py | 1 + custom_components/multiscrape/form.py | 12 ++++++ custom_components/multiscrape/http.py | 6 +++ custom_components/multiscrape/schema.py | 17 +++++++++ 6 files changed, 76 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index cd6bc27..334dde3 100644 --- a/README.md +++ b/README.md @@ -86,7 +86,7 @@ Based on latest (pre) release. | authentication | Configure HTTP authentication. `basic` or `digest`. Use this with username and password fields. | False | | string | | username | The username for accessing the url. | False | | string | | password | The password for accessing the url. | False | | string | -| headers | The headers for the requests. | False | | template - list | +| headers | The headers for the requests. | False | | template - list | | params | The query params for the requests. | False | | template - list | | method | The method for the request. Either `POST` or `GET`. | False | GET | string | | payload | Optional payload to send with a POST request. | False | | string | @@ -109,10 +109,7 @@ Configure the sensors that will scrape the data. | ------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------- | ------- | --------------- | | unique_id | Will be used as entity_id and enables editing the entity in the UI | False | | string | | name | Friendly name for the sensor | False | | string | -| select | CSS selector used for retrieving the value of the sensor. Only required when `select_list` is not provided. | True | | string/template | -| select_list | CSS selector for multiple values of multiple elements which will be returned as csv. Only required when `select` is not provided. | True | | string/template | -| attribute | Attribute from the selected element to read as value | False | | string | -| value_template | Defines a template applied on the result of the selector to extract the value. For binary sensors, the sensor is on if the template evaluates as True | False | | string/template | +| | Shared fields from the [Selector](#Selector). | True | | | | attributes | See [Sensor attributes](#sensor-attributes) | False | | list | | unit_of_measurement | Defines the units of measurement of the sensor | False | | string | | device_class | Sets the device_class for [sensors](https://www.home-assistant.io/integrations/sensor/) or [binary sensors](https://www.home-assistant.io/integrations/binary_sensor/) | False | | string | @@ -120,7 +117,6 @@ Configure the sensors that will scrape the data. | icon | Defines the icon or a template for the icon of the sensor. The value of the selector (or value_template when given) is provided as input for the template. For binary sensors, the value is parsed in a boolean. | False | | string/template | | picture | Contains a path to a local image and will set it as entity picture | False | | string | | force_update | Sends update events even if the value hasn’t changed. Useful if you want to have meaningful value graphs in history. | False | False | boolean | -| on_error | See [On-error](#on-error) | False | | | ### Refresh button @@ -135,18 +131,14 @@ Configure a refresh button to manually trigger scraping. Configure the attributes on the sensor that can be set with additional scraping values. -| name | description | required | default | type | -| -------------- | --------------------------------------------------------------------------------------------------------------------------------- | -------- | ------- | --------------- | -| name | Name of the attribute (will be slugified) | True | | string | -| select | CSS selector used for retrieving the value of the attribute. Only required when `select_list` is not provided. | True | | string/template | -| select_list | CSS selector for multiple values of multiple elements which will be returned as csv. Only required when `select` is not provided. | True | | string/template | -| attribute | Attribute from the selected element to read as value | False | | string | -| value_template | Defines a template applied on the result of the selector to extract the value | False | | string/template | -| on_error | See [On-error](#on-error) | False | | | +| name | description | required | default | type | +| -------------- | --------------------------------------------- | -------- | ------- | --------------- | +| name | Name of the attribute (will be slugified) | True | | string | +| | Shared fields from the [Selector](#Selector). | True | | | ### Form-submit -Configure the form-submit functionality which enables you to submit a (login) form before scraping a site. More details on how this works [can be found on the wiki.](https://github.com/danieldotnl/ha-multiscrape/wiki/Form-submit-functionality) +Configure the form-submit functionality which enables you to submit a (login) form before scraping a site. More details on how this works [can be found on the wiki](https://github.com/danieldotnl/ha-multiscrape/wiki/Form-submit-functionality). | name | description | required | default | type | | ----------------- | --------------------------------------------------------------------------------------------------------- | -------- | ------- | ------------------- | @@ -156,6 +148,29 @@ Configure the form-submit functionality which enables you to submit a (login) fo | input_filter | A list of input fields that should not be submitted with the form | False | | string - list | | submit_once | Submit the form only once on startup instead of each scan interval | False | False | boolean | | resubmit_on_error | Resubmit the form after a scraping error is encountered | False | True | boolean | +| header_mappings | See [Header Mappings](#Header-Mappings) | False | | list | + +### Header Mappings + +Configure the headers you want to be forwarded from scraping the [Form-submit](#form-submit) page to scraping the main page for sensor data. A common use case is to populate the `X-Login-Token` header which is the result of the login. + +| name | description | required | default | type | +| -------------- | --------------------------------------------- | -------- | ------- | --------------- | +| name | Name of the header | True | | string | +| | Shared fields from the [Selector](#Selector). | True | | | + + +### Selector + +Shared field used in multiple configs above. Used to define the scraping: how to extract a value from the page. + +| name | description | required | default | type | +| -------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------- | -------- | ------- | --------------- | +| select | CSS selector used for retrieving the value of the attribute. Only required when `select_list` or `value_template` is not provided. | False | | string/template | +| select_list | CSS selector for multiple values of multiple elements which will be returned as csv. Only required when `select` or `value_template` is not provided. | False | | string/template | +| attribute | Attribute from the selected element to read as value. | False | | string | +| value_template | Defines a template applied to extract the value from the result of the selector (if provided) or raw page (if selector not provided) | False | | string/template | +| on_error | See [On-error](#on-error) | False | | | ### On-error diff --git a/custom_components/multiscrape/__init__.py b/custom_components/multiscrape/__init__.py index 4e6292a..90de20e 100644 --- a/custom_components/multiscrape/__init__.py +++ b/custom_components/multiscrape/__init__.py @@ -119,13 +119,21 @@ async def _async_process_config(hass: HomeAssistant, config) -> bool: http = create_http_wrapper(config_name, conf, hass, file_manager) + scraper = _create_scraper(config_name, conf, hass, file_manager) + form_submit_config = conf.get(CONF_FORM_SUBMIT) form_submitter = None if form_submit_config: form_http = create_http_wrapper(config_name, form_submit_config, hass, file_manager) parser = conf.get(CONF_PARSER) - form_submitter = create_form_submitter( - config_name, form_submit_config, hass, form_http, file_manager, parser + form_submitter = _create_form_submitter( + config_name, + form_submit_config, + hass, + form_submit_http, + scraper, + file_manager, + parser, ) scraper = create_scraper(config_name, conf, hass, file_manager) diff --git a/custom_components/multiscrape/const.py b/custom_components/multiscrape/const.py index 87502d2..2c2e07a 100644 --- a/custom_components/multiscrape/const.py +++ b/custom_components/multiscrape/const.py @@ -31,6 +31,7 @@ CONF_FORM_INPUT_FILTER = "input_filter" CONF_FORM_SUBMIT_ONCE = "submit_once" CONF_FORM_RESUBMIT_ERROR = "resubmit_on_error" +CONF_FORM_HEADER_MAPPINGS = "header_mappings" CONF_LOG_RESPONSE = "log_response" DEFAULT_PARSER = "lxml" diff --git a/custom_components/multiscrape/form.py b/custom_components/multiscrape/form.py index 54e9a59..5dedfc9 100644 --- a/custom_components/multiscrape/form.py +++ b/custom_components/multiscrape/form.py @@ -60,6 +60,8 @@ def __init__( input_filter, submit_once, resubmit_error, + header_mapping_selectors, + scraper, parser, ): """Initialize FormSubmitter class.""" @@ -74,6 +76,8 @@ def __init__( self._input_filter = input_filter self._submit_once = submit_once self._resubmit_error = resubmit_error + self._header_mapping_selectors = header_mapping_selectors + self._scraper = scraper self._parser = parser self._should_submit = True @@ -150,11 +154,19 @@ async def async_submit(self, main_resource): if self._submit_once: self._should_submit = False + await self._scraper.set_content(response.text) + if not self._form_resource: return response.text else: return None + def scrape(self): + result = {} + for header_mapping_key in self._header_mapping_selectors: + result[header_mapping_key] = self._scraper.scrape(self._header_mapping_selectors[header_mapping_key], header_mapping_key) + return result + def _determine_submit_resource(self, action, main_resource): resource = main_resource if action and self._form_resource: diff --git a/custom_components/multiscrape/http.py b/custom_components/multiscrape/http.py index efe3005..1d62e61 100644 --- a/custom_components/multiscrape/http.py +++ b/custom_components/multiscrape/http.py @@ -77,6 +77,7 @@ def __init__( self._params_renderer = params_renderer self._headers_renderer = headers_renderer self._data_renderer = data_renderer + self._form_headers = None def set_authentication(self, username, password, auth_type): """Set http authentication.""" @@ -86,11 +87,16 @@ def set_authentication(self, username, password, auth_type): self._auth = (username, password) _LOGGER.debug("%s # Authentication configuration processed", self._config_name) + def set_form_headers(self, form_headers): + self._form_headers = form_headers + async def async_request(self, context, resource, method=None, request_data=None): """Execute a HTTP request.""" data = request_data or self._data_renderer() method = method or self._method or "GET" headers = self._headers_renderer(None) + if self._form_headers: + headers.update(self._form_headers) params = self._params_renderer(None) _LOGGER.debug( diff --git a/custom_components/multiscrape/schema.py b/custom_components/multiscrape/schema.py index fe8f01e..431a500 100644 --- a/custom_components/multiscrape/schema.py +++ b/custom_components/multiscrape/schema.py @@ -43,6 +43,7 @@ from .const import CONF_FORM_SELECT from .const import CONF_FORM_SUBMIT from .const import CONF_FORM_SUBMIT_ONCE +from .const import CONF_FORM_HEADER_MAPPINGS from .const import CONF_LOG_RESPONSE from .const import CONF_ON_ERROR from .const import CONF_ON_ERROR_DEFAULT @@ -72,6 +73,8 @@ from .const import METHODS from .scraper import DEFAULT_TIMEOUT +INTEGRATION_SCHEMA = { + _LOGGER = logging.getLogger(__name__) HTTP_SCHEMA = { @@ -128,6 +131,20 @@ vol.Optional(CONF_ON_ERROR): vol.Schema(ON_ERROR_SCHEMA), } +FORM_HEADERS_MAPPING_SCHEMA = {vol.Required(CONF_NAME): cv.string, **SELECTOR_SCHEMA} + +FORM_SUBMIT_SCHEMA = { + vol.Optional(CONF_FORM_RESOURCE): cv.string, + vol.Optional(CONF_FORM_SELECT): cv.string, + vol.Optional(CONF_FORM_INPUT): vol.Schema({cv.string: cv.string}), + vol.Optional(CONF_FORM_INPUT_FILTER, default=[]): cv.ensure_list, + vol.Optional(CONF_FORM_SUBMIT_ONCE, default=False): cv.boolean, + vol.Optional(CONF_FORM_RESUBMIT_ERROR, default=True): cv.boolean, + vol.Optional(CONF_FORM_HEADER_MAPPINGS, default=[]): vol.All( + cv.ensure_list, [vol.Schema(FORM_HEADERS_MAPPING_SCHEMA)] + ), +} + SENSOR_ATTRIBUTE_SCHEMA = {vol.Required(CONF_NAME): cv.string, **SELECTOR_SCHEMA} SENSOR_SCHEMA = { From 5abc00b0031eeb28f8add60572d0c47d3cbcddc9 Mon Sep 17 00:00:00 2001 From: Milan Jeremic Date: Sat, 6 Apr 2024 08:04:08 +0000 Subject: [PATCH 02/32] init fixups --- custom_components/multiscrape/__init__.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/custom_components/multiscrape/__init__.py b/custom_components/multiscrape/__init__.py index 90de20e..d146102 100644 --- a/custom_components/multiscrape/__init__.py +++ b/custom_components/multiscrape/__init__.py @@ -119,18 +119,18 @@ async def _async_process_config(hass: HomeAssistant, config) -> bool: http = create_http_wrapper(config_name, conf, hass, file_manager) - scraper = _create_scraper(config_name, conf, hass, file_manager) + scraper = create_scraper(config_name, conf, hass, file_manager) form_submit_config = conf.get(CONF_FORM_SUBMIT) form_submitter = None if form_submit_config: form_http = create_http_wrapper(config_name, form_submit_config, hass, file_manager) parser = conf.get(CONF_PARSER) - form_submitter = _create_form_submitter( + form_submitter = create_form_submitter( config_name, form_submit_config, hass, - form_submit_http, + form_http, scraper, file_manager, parser, From 355750b57ca5ccc90dc8b8d56d5d618f2afef128 Mon Sep 17 00:00:00 2001 From: Milan Jeremic Date: Sat, 6 Apr 2024 08:50:48 +0000 Subject: [PATCH 03/32] init -> form refactor fixup --- custom_components/multiscrape/form.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/custom_components/multiscrape/form.py b/custom_components/multiscrape/form.py index 5dedfc9..e59bd39 100644 --- a/custom_components/multiscrape/form.py +++ b/custom_components/multiscrape/form.py @@ -2,6 +2,10 @@ import logging from urllib.parse import urljoin +from homeassistant.const import CONF_NAME +from .const import CONF_FORM_HEADER_MAPPINGS + + from bs4 import BeautifulSoup from homeassistant.core import HomeAssistant @@ -16,12 +20,13 @@ ) from .file import LoggingFileManager from .http import HttpWrapper +from .selector import Selector _LOGGER = logging.getLogger(__name__) -def create_form_submitter(config_name, config, hass, http, file_manager, parser): +def create_form_submitter(config_name, config, hass, http, scraper, file_manager, parser): """Create a form submitter instance.""" resource = config.get(CONF_RESOURCE) select = config.get(CONF_FORM_SELECT) @@ -29,6 +34,9 @@ def create_form_submitter(config_name, config, hass, http, file_manager, parser) input_filter = config.get(CONF_FORM_INPUT_FILTER) resubmit_error = config.get(CONF_FORM_RESUBMIT_ERROR) submit_once = config.get(CONF_FORM_SUBMIT_ONCE) + header_mapping_selectors = {} + for header_mapping_conf in config.get(CONF_FORM_HEADER_MAPPINGS): + header_mapping_selectors[header_mapping_conf.get(CONF_NAME)] = Selector(hass, header_mapping_conf) return FormSubmitter( config_name, @@ -41,6 +49,8 @@ def create_form_submitter(config_name, config, hass, http, file_manager, parser) input_filter, submit_once, resubmit_error, + header_mapping_selectors, + scraper, parser, ) From 776fc1db78e3b6fc6c82f9297849c7db0718e10a Mon Sep 17 00:00:00 2001 From: Milan Jeremic Date: Sat, 6 Apr 2024 09:00:10 +0000 Subject: [PATCH 04/32] coordinator fixup --- custom_components/multiscrape/coordinator.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/custom_components/multiscrape/coordinator.py b/custom_components/multiscrape/coordinator.py index 4f3008a..97207c1 100644 --- a/custom_components/multiscrape/coordinator.py +++ b/custom_components/multiscrape/coordinator.py @@ -69,6 +69,8 @@ async def get_content(self) -> str: if self._form_submitter: try: result = await self._form_submitter.async_submit(resource) + form_headers = self._form_submitter.scrape() + self._http.set_form_headers(form_headers) if result: _LOGGER.debug( From a07ef508ef2e8c3092d4a8208c68786317e4f20f Mon Sep 17 00:00:00 2001 From: Milan Jeremic Date: Sat, 6 Apr 2024 09:06:43 +0000 Subject: [PATCH 05/32] schema fixup --- custom_components/multiscrape/schema.py | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/custom_components/multiscrape/schema.py b/custom_components/multiscrape/schema.py index 431a500..fdd0260 100644 --- a/custom_components/multiscrape/schema.py +++ b/custom_components/multiscrape/schema.py @@ -73,8 +73,6 @@ from .const import METHODS from .scraper import DEFAULT_TIMEOUT -INTEGRATION_SCHEMA = { - _LOGGER = logging.getLogger(__name__) HTTP_SCHEMA = { @@ -93,15 +91,6 @@ vol.Optional(CONF_TIMEOUT, default=DEFAULT_TIMEOUT): cv.positive_int, } -FORM_SUBMIT_SCHEMA = { - **HTTP_SCHEMA, - vol.Optional(CONF_FORM_SELECT): cv.string, - vol.Optional(CONF_FORM_INPUT): vol.Schema({cv.string: cv.string}), - vol.Optional(CONF_FORM_INPUT_FILTER, default=[]): cv.ensure_list, - vol.Optional(CONF_FORM_SUBMIT_ONCE, default=False): cv.boolean, - vol.Optional(CONF_FORM_RESUBMIT_ERROR, default=True): cv.boolean, -} - INTEGRATION_SCHEMA = { **HTTP_SCHEMA, vol.Optional(CONF_PARSER, default=DEFAULT_PARSER): cv.string, @@ -134,7 +123,7 @@ FORM_HEADERS_MAPPING_SCHEMA = {vol.Required(CONF_NAME): cv.string, **SELECTOR_SCHEMA} FORM_SUBMIT_SCHEMA = { - vol.Optional(CONF_FORM_RESOURCE): cv.string, + **HTTP_SCHEMA, vol.Optional(CONF_FORM_SELECT): cv.string, vol.Optional(CONF_FORM_INPUT): vol.Schema({cv.string: cv.string}), vol.Optional(CONF_FORM_INPUT_FILTER, default=[]): cv.ensure_list, From 25e954a0378b5aff30edea72de26b29d8cc0afec Mon Sep 17 00:00:00 2001 From: Milan Jeremic Date: Sat, 6 Apr 2024 11:42:04 +0000 Subject: [PATCH 06/32] Adding docs for public functions. --- custom_components/multiscrape/coordinator.py | 2 +- custom_components/multiscrape/form.py | 3 ++- custom_components/multiscrape/http.py | 1 + 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/custom_components/multiscrape/coordinator.py b/custom_components/multiscrape/coordinator.py index 97207c1..91b55a9 100644 --- a/custom_components/multiscrape/coordinator.py +++ b/custom_components/multiscrape/coordinator.py @@ -69,7 +69,7 @@ async def get_content(self) -> str: if self._form_submitter: try: result = await self._form_submitter.async_submit(resource) - form_headers = self._form_submitter.scrape() + form_headers = self._form_submitter.scrape_header_mappings() self._http.set_form_headers(form_headers) if result: diff --git a/custom_components/multiscrape/form.py b/custom_components/multiscrape/form.py index e59bd39..eae99c0 100644 --- a/custom_components/multiscrape/form.py +++ b/custom_components/multiscrape/form.py @@ -171,7 +171,8 @@ async def async_submit(self, main_resource): else: return None - def scrape(self): + def scrape_header_mappings(self): + """Scrape header mappings.""" result = {} for header_mapping_key in self._header_mapping_selectors: result[header_mapping_key] = self._scraper.scrape(self._header_mapping_selectors[header_mapping_key], header_mapping_key) diff --git a/custom_components/multiscrape/http.py b/custom_components/multiscrape/http.py index 1d62e61..a396b9f 100644 --- a/custom_components/multiscrape/http.py +++ b/custom_components/multiscrape/http.py @@ -88,6 +88,7 @@ def set_authentication(self, username, password, auth_type): _LOGGER.debug("%s # Authentication configuration processed", self._config_name) def set_form_headers(self, form_headers): + """Set form headers.""" self._form_headers = form_headers async def async_request(self, context, resource, method=None, request_data=None): From cb99aa70fd45de2fa2b0d8eb29161455da594050 Mon Sep 17 00:00:00 2001 From: Milan Jeremic Date: Sun, 26 May 2024 06:39:04 +0000 Subject: [PATCH 07/32] header_mapping -> variables initial rename, no functionality change. Format trggered automatically. --- custom_components/multiscrape/const.py | 2 +- custom_components/multiscrape/coordinator.py | 24 ++--- custom_components/multiscrape/form.py | 38 +++---- custom_components/multiscrape/http.py | 31 +++--- custom_components/multiscrape/schema.py | 104 +++++++------------ 5 files changed, 72 insertions(+), 127 deletions(-) diff --git a/custom_components/multiscrape/const.py b/custom_components/multiscrape/const.py index 2c2e07a..ef87737 100644 --- a/custom_components/multiscrape/const.py +++ b/custom_components/multiscrape/const.py @@ -31,7 +31,7 @@ CONF_FORM_INPUT_FILTER = "input_filter" CONF_FORM_SUBMIT_ONCE = "submit_once" CONF_FORM_RESUBMIT_ERROR = "resubmit_on_error" -CONF_FORM_HEADER_MAPPINGS = "header_mappings" +CONF_FORM_VARIABLES = "variables" CONF_LOG_RESPONSE = "log_response" DEFAULT_PARSER = "lxml" diff --git a/custom_components/multiscrape/coordinator.py b/custom_components/multiscrape/coordinator.py index 91b55a9..38a7185 100644 --- a/custom_components/multiscrape/coordinator.py +++ b/custom_components/multiscrape/coordinator.py @@ -1,26 +1,22 @@ """Coordinator class for multiscrape integration.""" import logging -from datetime import timedelta from collections.abc import Callable +from datetime import timedelta +from homeassistant.const import (CONF_RESOURCE, CONF_RESOURCE_TEMPLATE, + CONF_SCAN_INTERVAL) from homeassistant.core import HomeAssistant -from homeassistant.const import ( - CONF_RESOURCE, - CONF_RESOURCE_TEMPLATE, - CONF_SCAN_INTERVAL, -) -from homeassistant.helpers.update_coordinator import DataUpdateCoordinator -from homeassistant.helpers.update_coordinator import event +from homeassistant.helpers.update_coordinator import (DataUpdateCoordinator, + event) from homeassistant.util.dt import utcnow -from .scraper import Scraper -from .http import HttpWrapper +from .const import DOMAIN from .file import LoggingFileManager from .form import FormSubmitter +from .http import HttpWrapper +from .scraper import Scraper from .util import create_renderer -from .const import DOMAIN - _LOGGER = logging.getLogger(__name__) # we don't want to go with the default 15 seconds defined in helpers/entity_component DEFAULT_SCAN_INTERVAL = timedelta(seconds=60) @@ -69,8 +65,8 @@ async def get_content(self) -> str: if self._form_submitter: try: result = await self._form_submitter.async_submit(resource) - form_headers = self._form_submitter.scrape_header_mappings() - self._http.set_form_headers(form_headers) + form_variables = self._form_submitter.scrape_variables() + self._http.set_variables(form_variables) if result: _LOGGER.debug( diff --git a/custom_components/multiscrape/form.py b/custom_components/multiscrape/form.py index eae99c0..daf2443 100644 --- a/custom_components/multiscrape/form.py +++ b/custom_components/multiscrape/form.py @@ -2,27 +2,17 @@ import logging from urllib.parse import urljoin -from homeassistant.const import CONF_NAME -from .const import CONF_FORM_HEADER_MAPPINGS - - from bs4 import BeautifulSoup - +from homeassistant.const import CONF_NAME, CONF_RESOURCE from homeassistant.core import HomeAssistant -from homeassistant.const import CONF_RESOURCE - -from .const import ( - CONF_FORM_SELECT, - CONF_FORM_INPUT, - CONF_FORM_INPUT_FILTER, - CONF_FORM_SUBMIT_ONCE, - CONF_FORM_RESUBMIT_ERROR, -) + +from .const import (CONF_FORM_INPUT, CONF_FORM_INPUT_FILTER, + CONF_FORM_RESUBMIT_ERROR, CONF_FORM_SELECT, + CONF_FORM_SUBMIT_ONCE, CONF_FORM_VARIABLES) from .file import LoggingFileManager from .http import HttpWrapper from .selector import Selector - _LOGGER = logging.getLogger(__name__) @@ -34,9 +24,9 @@ def create_form_submitter(config_name, config, hass, http, scraper, file_manager input_filter = config.get(CONF_FORM_INPUT_FILTER) resubmit_error = config.get(CONF_FORM_RESUBMIT_ERROR) submit_once = config.get(CONF_FORM_SUBMIT_ONCE) - header_mapping_selectors = {} - for header_mapping_conf in config.get(CONF_FORM_HEADER_MAPPINGS): - header_mapping_selectors[header_mapping_conf.get(CONF_NAME)] = Selector(hass, header_mapping_conf) + variables_selectors = {} + for variables_conf in config.get(CONF_FORM_VARIABLES): + variables_selectors[variables_conf.get(CONF_NAME)] = Selector(hass, variables_conf) return FormSubmitter( config_name, @@ -49,7 +39,7 @@ def create_form_submitter(config_name, config, hass, http, scraper, file_manager input_filter, submit_once, resubmit_error, - header_mapping_selectors, + variables_selectors, scraper, parser, ) @@ -70,7 +60,7 @@ def __init__( input_filter, submit_once, resubmit_error, - header_mapping_selectors, + variables_selectors, scraper, parser, ): @@ -86,7 +76,7 @@ def __init__( self._input_filter = input_filter self._submit_once = submit_once self._resubmit_error = resubmit_error - self._header_mapping_selectors = header_mapping_selectors + self._variables_selectors = variables_selectors self._scraper = scraper self._parser = parser self._should_submit = True @@ -171,11 +161,11 @@ async def async_submit(self, main_resource): else: return None - def scrape_header_mappings(self): + def scrape_variables(self): """Scrape header mappings.""" result = {} - for header_mapping_key in self._header_mapping_selectors: - result[header_mapping_key] = self._scraper.scrape(self._header_mapping_selectors[header_mapping_key], header_mapping_key) + for variable_key in self._variables_selectors: + result[variable_key] = self._scraper.scrape(self._variables_selectors[variable_key], variable_key) return result def _determine_submit_resource(self, action, main_resource): diff --git a/custom_components/multiscrape/http.py b/custom_components/multiscrape/http.py index a396b9f..5c92537 100644 --- a/custom_components/multiscrape/http.py +++ b/custom_components/multiscrape/http.py @@ -1,21 +1,14 @@ """HTTP request related functionality.""" import logging from collections.abc import Callable -import httpx +import httpx +from homeassistant.const import (CONF_AUTHENTICATION, CONF_HEADERS, + CONF_METHOD, CONF_PARAMS, CONF_PASSWORD, + CONF_PAYLOAD, CONF_TIMEOUT, CONF_USERNAME, + CONF_VERIFY_SSL, HTTP_DIGEST_AUTHENTICATION) from homeassistant.helpers.httpx_client import get_async_client -from homeassistant.const import ( - HTTP_DIGEST_AUTHENTICATION, - CONF_VERIFY_SSL, - CONF_USERNAME, - CONF_PASSWORD, - CONF_AUTHENTICATION, - CONF_TIMEOUT, - CONF_HEADERS, - CONF_PARAMS, - CONF_PAYLOAD, - CONF_METHOD, -) + from .util import create_dict_renderer, create_renderer _LOGGER = logging.getLogger(__name__) @@ -77,7 +70,7 @@ def __init__( self._params_renderer = params_renderer self._headers_renderer = headers_renderer self._data_renderer = data_renderer - self._form_headers = None + self._form_variables = None def set_authentication(self, username, password, auth_type): """Set http authentication.""" @@ -87,17 +80,17 @@ def set_authentication(self, username, password, auth_type): self._auth = (username, password) _LOGGER.debug("%s # Authentication configuration processed", self._config_name) - def set_form_headers(self, form_headers): - """Set form headers.""" - self._form_headers = form_headers + def set_variables(self, form_variables): + """Set form variables.""" + self._form_variables = form_variables async def async_request(self, context, resource, method=None, request_data=None): """Execute a HTTP request.""" data = request_data or self._data_renderer() method = method or self._method or "GET" headers = self._headers_renderer(None) - if self._form_headers: - headers.update(self._form_headers) + if self._form_variables: + headers.update(self._form_variables) params = self._params_renderer(None) _LOGGER.debug( diff --git a/custom_components/multiscrape/schema.py b/custom_components/multiscrape/schema.py index fdd0260..0263d97 100644 --- a/custom_components/multiscrape/schema.py +++ b/custom_components/multiscrape/schema.py @@ -1,76 +1,42 @@ """The multiscrape component schemas.""" +import logging + import homeassistant.helpers.config_validation as cv import voluptuous as vol -import logging -from homeassistant.components.binary_sensor import ( - DEVICE_CLASSES_SCHEMA as BINARY_SENSOR_DEVICE_CLASSES_SCHEMA, -) -from homeassistant.components.binary_sensor import DOMAIN as BINARY_SENSOR_DOMAIN +from homeassistant.components.binary_sensor import \ + DEVICE_CLASSES_SCHEMA as BINARY_SENSOR_DEVICE_CLASSES_SCHEMA +from homeassistant.components.binary_sensor import \ + DOMAIN as BINARY_SENSOR_DOMAIN from homeassistant.components.button import DOMAIN as BUTTON_DOMAIN -from homeassistant.components.sensor import ( - DEVICE_CLASSES_SCHEMA as SENSOR_DEVICE_CLASSES_SCHEMA, -) +from homeassistant.components.sensor import \ + DEVICE_CLASSES_SCHEMA as SENSOR_DEVICE_CLASSES_SCHEMA from homeassistant.components.sensor import DOMAIN as SENSOR_DOMAIN -from homeassistant.components.sensor import ( - STATE_CLASSES_SCHEMA as SENSOR_STATE_CLASSES_SCHEMA, -) -from homeassistant.const import CONF_AUTHENTICATION -from homeassistant.const import CONF_DEVICE_CLASS -from homeassistant.const import CONF_FORCE_UPDATE -from homeassistant.const import CONF_HEADERS -from homeassistant.const import CONF_ICON -from homeassistant.const import CONF_METHOD -from homeassistant.const import CONF_NAME -from homeassistant.const import CONF_PARAMS -from homeassistant.const import CONF_PASSWORD -from homeassistant.const import CONF_PAYLOAD -from homeassistant.const import CONF_RESOURCE -from homeassistant.const import CONF_RESOURCE_TEMPLATE -from homeassistant.const import CONF_SCAN_INTERVAL -from homeassistant.const import CONF_TIMEOUT -from homeassistant.const import CONF_UNIQUE_ID -from homeassistant.const import CONF_UNIT_OF_MEASUREMENT -from homeassistant.const import CONF_USERNAME -from homeassistant.const import CONF_VALUE_TEMPLATE -from homeassistant.const import CONF_VERIFY_SSL -from homeassistant.const import HTTP_BASIC_AUTHENTICATION -from homeassistant.const import HTTP_DIGEST_AUTHENTICATION - -from .const import CONF_ATTR -from .const import CONF_FORM_INPUT -from .const import CONF_FORM_INPUT_FILTER -from .const import CONF_FORM_RESUBMIT_ERROR -from .const import CONF_FORM_SELECT -from .const import CONF_FORM_SUBMIT -from .const import CONF_FORM_SUBMIT_ONCE -from .const import CONF_FORM_HEADER_MAPPINGS -from .const import CONF_LOG_RESPONSE -from .const import CONF_ON_ERROR -from .const import CONF_ON_ERROR_DEFAULT -from .const import CONF_ON_ERROR_LOG -from .const import CONF_ON_ERROR_VALUE -from .const import CONF_ON_ERROR_VALUE_DEFAULT -from .const import CONF_ON_ERROR_VALUE_LAST -from .const import CONF_ON_ERROR_VALUE_NONE -from .const import CONF_PARSER -from .const import CONF_PICTURE -from .const import CONF_SELECT -from .const import CONF_SELECT_LIST -from .const import CONF_SENSOR_ATTRS -from .const import CONF_SEPARATOR -from .const import CONF_STATE_CLASS -from .const import DEFAULT_BINARY_SENSOR_NAME -from .const import DEFAULT_BUTTON_NAME -from .const import DEFAULT_FORCE_UPDATE -from .const import DEFAULT_METHOD -from .const import DEFAULT_PARSER -from .const import DEFAULT_SENSOR_NAME -from .const import DEFAULT_SEPARATOR -from .const import DEFAULT_VERIFY_SSL -from .const import DOMAIN -from .const import LOG_ERROR -from .const import LOG_LEVELS -from .const import METHODS +from homeassistant.components.sensor import \ + STATE_CLASSES_SCHEMA as SENSOR_STATE_CLASSES_SCHEMA +from homeassistant.const import (CONF_AUTHENTICATION, CONF_DEVICE_CLASS, + CONF_FORCE_UPDATE, CONF_HEADERS, CONF_ICON, + CONF_METHOD, CONF_NAME, CONF_PARAMS, + CONF_PASSWORD, CONF_PAYLOAD, CONF_RESOURCE, + CONF_RESOURCE_TEMPLATE, CONF_SCAN_INTERVAL, + CONF_TIMEOUT, CONF_UNIQUE_ID, + CONF_UNIT_OF_MEASUREMENT, CONF_USERNAME, + CONF_VALUE_TEMPLATE, CONF_VERIFY_SSL, + HTTP_BASIC_AUTHENTICATION, + HTTP_DIGEST_AUTHENTICATION) + +from .const import (CONF_ATTR, CONF_FORM_INPUT, CONF_FORM_INPUT_FILTER, + CONF_FORM_RESUBMIT_ERROR, CONF_FORM_SELECT, + CONF_FORM_SUBMIT, CONF_FORM_SUBMIT_ONCE, + CONF_FORM_VARIABLES, CONF_LOG_RESPONSE, CONF_ON_ERROR, + CONF_ON_ERROR_DEFAULT, CONF_ON_ERROR_LOG, + CONF_ON_ERROR_VALUE, CONF_ON_ERROR_VALUE_DEFAULT, + CONF_ON_ERROR_VALUE_LAST, CONF_ON_ERROR_VALUE_NONE, + CONF_PARSER, CONF_PICTURE, CONF_SELECT, CONF_SELECT_LIST, + CONF_SENSOR_ATTRS, CONF_SEPARATOR, CONF_STATE_CLASS, + DEFAULT_BINARY_SENSOR_NAME, DEFAULT_BUTTON_NAME, + DEFAULT_FORCE_UPDATE, DEFAULT_METHOD, DEFAULT_PARSER, + DEFAULT_SENSOR_NAME, DEFAULT_SEPARATOR, DEFAULT_VERIFY_SSL, + DOMAIN, LOG_ERROR, LOG_LEVELS, METHODS) from .scraper import DEFAULT_TIMEOUT _LOGGER = logging.getLogger(__name__) @@ -129,7 +95,7 @@ vol.Optional(CONF_FORM_INPUT_FILTER, default=[]): cv.ensure_list, vol.Optional(CONF_FORM_SUBMIT_ONCE, default=False): cv.boolean, vol.Optional(CONF_FORM_RESUBMIT_ERROR, default=True): cv.boolean, - vol.Optional(CONF_FORM_HEADER_MAPPINGS, default=[]): vol.All( + vol.Optional(CONF_FORM_VARIABLES, default=[]): vol.All( cv.ensure_list, [vol.Schema(FORM_HEADERS_MAPPING_SCHEMA)] ), } From 186a93f12b2ecae64818dbf6e8962aee340b5619 Mon Sep 17 00:00:00 2001 From: Milan Jeremic Date: Sun, 26 May 2024 23:43:25 +0000 Subject: [PATCH 08/32] rearranging --- custom_components/multiscrape/__init__.py | 48 ++++++++--------------- 1 file changed, 16 insertions(+), 32 deletions(-) diff --git a/custom_components/multiscrape/__init__.py b/custom_components/multiscrape/__init__.py index d146102..31b796a 100644 --- a/custom_components/multiscrape/__init__.py +++ b/custom_components/multiscrape/__init__.py @@ -6,38 +6,27 @@ import voluptuous as vol from homeassistant.config_entries import ConfigEntry -from homeassistant.const import CONF_NAME - -from homeassistant.const import Platform -from homeassistant.const import SERVICE_RELOAD, CONF_RESOURCE, CONF_RESOURCE_TEMPLATE +from homeassistant.const import (CONF_NAME, CONF_RESOURCE, + CONF_RESOURCE_TEMPLATE, SERVICE_RELOAD, + Platform) from homeassistant.core import HomeAssistant - from homeassistant.exceptions import HomeAssistantError from homeassistant.helpers import discovery -from homeassistant.helpers.reload import async_integration_yaml_config -from homeassistant.helpers.reload import async_reload_integration_platforms +from homeassistant.helpers.reload import (async_integration_yaml_config, + async_reload_integration_platforms) from homeassistant.util import slugify -from .service import setup_config_services, setup_integration_services - -from .const import CONF_FORM_SUBMIT -from .const import CONF_LOG_RESPONSE -from .const import CONF_PARSER -from .const import COORDINATOR -from .const import DOMAIN -from .const import PLATFORM_IDX -from .const import SCRAPER -from .const import SCRAPER_DATA -from .const import SCRAPER_IDX -from .coordinator import ( - create_multiscrape_coordinator, -) -from .coordinator import create_content_request_manager +from .const import (CONF_FORM_SUBMIT, CONF_LOG_RESPONSE, CONF_PARSER, + COORDINATOR, DOMAIN, PLATFORM_IDX, SCRAPER, SCRAPER_DATA, + SCRAPER_IDX) +from .coordinator import (create_content_request_manager, + create_multiscrape_coordinator) from .file import LoggingFileManager from .form import create_form_submitter from .http import create_http_wrapper from .schema import COMBINED_SCHEMA, CONFIG_SCHEMA # noqa: F401 from .scraper import create_scraper +from .service import setup_config_services, setup_integration_services _LOGGER = logging.getLogger(__name__) PLATFORMS = [Platform.SENSOR, Platform.BINARY_SENSOR, Platform.BUTTON] @@ -117,30 +106,25 @@ async def _async_process_config(hass: HomeAssistant, config) -> bool: file_manager = LoggingFileManager(folder) await hass.async_add_executor_job(file_manager.create_folders) - http = create_http_wrapper(config_name, conf, hass, file_manager) - - scraper = create_scraper(config_name, conf, hass, file_manager) - form_submit_config = conf.get(CONF_FORM_SUBMIT) form_submitter = None if form_submit_config: - form_http = create_http_wrapper(config_name, form_submit_config, hass, file_manager) parser = conf.get(CONF_PARSER) + form_http = create_http_wrapper(config_name, form_submit_config, hass, file_manager) + form_scraper = create_scraper(config_name, conf, hass, file_manager) form_submitter = create_form_submitter( config_name, form_submit_config, hass, form_http, - scraper, + form_scraper, file_manager, parser, ) + http = create_http_wrapper(config_name, conf, hass, file_manager) scraper = create_scraper(config_name, conf, hass, file_manager) - - request_manager = create_content_request_manager( - config_name, conf, hass, http, form_submitter - ) + request_manager = create_content_request_manager(config_name, conf, hass, http, form_submitter) coordinator = create_multiscrape_coordinator( config_name, conf, From d7b2b9483928f3ff7b9577039b31e18a39e7c321 Mon Sep 17 00:00:00 2001 From: Milan Jeremic Date: Sun, 26 May 2024 23:56:28 +0000 Subject: [PATCH 09/32] transitioning from header mappings to variables. --- custom_components/multiscrape/__init__.py | 2 +- custom_components/multiscrape/coordinator.py | 7 +++++-- custom_components/multiscrape/http.py | 16 +++++++--------- custom_components/multiscrape/scraper.py | 9 +++++++-- custom_components/multiscrape/util.py | 13 ++++++------- 5 files changed, 26 insertions(+), 21 deletions(-) diff --git a/custom_components/multiscrape/__init__.py b/custom_components/multiscrape/__init__.py index 31b796a..82f32c1 100644 --- a/custom_components/multiscrape/__init__.py +++ b/custom_components/multiscrape/__init__.py @@ -124,7 +124,7 @@ async def _async_process_config(hass: HomeAssistant, config) -> bool: http = create_http_wrapper(config_name, conf, hass, file_manager) scraper = create_scraper(config_name, conf, hass, file_manager) - request_manager = create_content_request_manager(config_name, conf, hass, http, form_submitter) + request_manager = create_content_request_manager(config_name, conf, hass, http, form_submitter, scraper) coordinator = create_multiscrape_coordinator( config_name, conf, diff --git a/custom_components/multiscrape/coordinator.py b/custom_components/multiscrape/coordinator.py index 38a7185..34ad895 100644 --- a/custom_components/multiscrape/coordinator.py +++ b/custom_components/multiscrape/coordinator.py @@ -23,7 +23,7 @@ def create_content_request_manager( - config_name, config, hass: HomeAssistant, http, form_submitter + config_name, config, hass: HomeAssistant, http, form_submitter, scraper ): """Create a content request manager instance.""" _LOGGER.debug("%s # Creating ContentRequestManager", config_name) @@ -34,7 +34,7 @@ def create_content_request_manager( resource_renderer = create_renderer(hass, resource_template) else: resource_renderer = create_renderer(hass, resource) - return ContentRequestManager(config_name, http, resource_renderer, form_submitter) + return ContentRequestManager(config_name, http, resource_renderer, form_submitter, scraper) class ContentRequestManager: @@ -46,12 +46,14 @@ def __init__( http: HttpWrapper, resource_renderer: Callable, form: FormSubmitter = None, + scraper: Scraper = None, ) -> None: """Initialize ContentRequestManager.""" self._config_name = config_name self._http = http self._form_submitter = form self._resource_renderer = resource_renderer + self._scraper = scraper def notify_scrape_exception(self): """Notify the form_submitter of an exception so it will re-submit next trigger.""" @@ -67,6 +69,7 @@ async def get_content(self) -> str: result = await self._form_submitter.async_submit(resource) form_variables = self._form_submitter.scrape_variables() self._http.set_variables(form_variables) + self._scraper.set_variables(form_variables) if result: _LOGGER.debug( diff --git a/custom_components/multiscrape/http.py b/custom_components/multiscrape/http.py index 5c92537..facdccd 100644 --- a/custom_components/multiscrape/http.py +++ b/custom_components/multiscrape/http.py @@ -70,7 +70,7 @@ def __init__( self._params_renderer = params_renderer self._headers_renderer = headers_renderer self._data_renderer = data_renderer - self._form_variables = None + self._variables = None def set_authentication(self, username, password, auth_type): """Set http authentication.""" @@ -80,18 +80,16 @@ def set_authentication(self, username, password, auth_type): self._auth = (username, password) _LOGGER.debug("%s # Authentication configuration processed", self._config_name) - def set_variables(self, form_variables): - """Set form variables.""" - self._form_variables = form_variables + def set_variables(self, variables): + """Set variables.""" + self._variables = variables async def async_request(self, context, resource, method=None, request_data=None): """Execute a HTTP request.""" - data = request_data or self._data_renderer() + data = request_data or self._data_renderer(None, False, self._variables) method = method or self._method or "GET" - headers = self._headers_renderer(None) - if self._form_variables: - headers.update(self._form_variables) - params = self._params_renderer(None) + headers = self._headers_renderer(None, False, self._variables) + params = self._params_renderer(None, False, self._variables) _LOGGER.debug( "%s # Executing %s-request with a %s to url: %s with headers: %s.", diff --git a/custom_components/multiscrape/scraper.py b/custom_components/multiscrape/scraper.py index 1517c87..5b9b4b1 100644 --- a/custom_components/multiscrape/scraper.py +++ b/custom_components/multiscrape/scraper.py @@ -45,6 +45,7 @@ def __init__( self._soup: BeautifulSoup = None self._data = None self._separator = separator + self._variables = {} self.reset() @property @@ -95,6 +96,10 @@ async def set_content(self, content): ) raise + def set_variables(self, variables): + """Set variables.""" + self._variables = variables + def scrape(self, selector, sensor, attribute=None): """Scrape based on given selector the data.""" # This is required as this function is called separately for sensors and attributes @@ -105,7 +110,7 @@ def scrape(self, selector, sensor, attribute=None): if selector.just_value: _LOGGER.debug("%s # Applying value_template only.", log_prefix) result = selector.value_template.async_render_with_possible_json_value( - self._data, None + self._data, None, variables=self._variables ) return selector.value_template._parse_result(result) @@ -150,7 +155,7 @@ def scrape(self, selector, sensor, attribute=None): if value is not None and selector.value_template is not None: _LOGGER.debug("%s # Applying value_template on selector result", log_prefix) value = selector.value_template.async_render( - variables={"value": value}, parse_result=True + variables={"value": value} | self._variables, parse_result=True ) _LOGGER.debug( diff --git a/custom_components/multiscrape/util.py b/custom_components/multiscrape/util.py index 5df0f3c..81ed200 100644 --- a/custom_components/multiscrape/util.py +++ b/custom_components/multiscrape/util.py @@ -5,23 +5,22 @@ from homeassistant.exceptions import TemplateError from homeassistant.helpers.template import Template - _LOGGER: logging.Logger = logging.getLogger(__name__) def create_renderer(hass, value_template): """Create a template renderer based on value_template.""" if value_template is None: - return lambda value=None, parse_result=None: value + return lambda value=None, parse_result=None, variables={}: value if not isinstance(value_template, Template): value_template = Template(value_template, hass) else: value_template.hass = hass - def _render(value: Any = None, parse_result=False): + def _render(value: Any = None, parse_result=False, variables: dict = {}): try: - return value_template.async_render({"value": value}, parse_result) + return value_template.async_render({"value": value} | variables, parse_result) except TemplateError: _LOGGER.exception( "Error rendering template: %s with value %s", value_template, value @@ -34,16 +33,16 @@ def _render(value: Any = None, parse_result=False): def create_dict_renderer(hass, templates_dict): """Create template renderers for a dictionary with value_templates.""" if templates_dict is None: - return lambda value=None, parse_result=None: {} + return lambda value=None, parse_result=None, variables={}: {} # Create a copy of the templates_dict to avoid modification of the original templates_dict = templates_dict.copy() for item in templates_dict: templates_dict[item] = create_renderer(hass, templates_dict[item]) - def _render(value: Any = None, parse_result=False): + def _render(value: Any = None, parse_result=False, variables: dict = {}): return { - item: templates_dict[item](value, parse_result) for item in templates_dict + item: templates_dict[item](value, parse_result, variables) for item in templates_dict } return _render From 852e95ea80b4c22bcc7035f5873feff134255b63 Mon Sep 17 00:00:00 2001 From: Milan Jeremic Date: Mon, 27 May 2024 00:04:09 +0000 Subject: [PATCH 10/32] Updating README --- README.md | 45 +++++++++++++++++++++++++++++++++------------ 1 file changed, 33 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 334dde3..d06fe84 100644 --- a/README.md +++ b/README.md @@ -131,10 +131,10 @@ Configure a refresh button to manually trigger scraping. Configure the attributes on the sensor that can be set with additional scraping values. -| name | description | required | default | type | -| -------------- | --------------------------------------------- | -------- | ------- | --------------- | -| name | Name of the attribute (will be slugified) | True | | string | -| | Shared fields from the [Selector](#Selector). | True | | | +| name | description | required | default | type | +| ---- | --------------------------------------------- | -------- | ------- | ------ | +| name | Name of the attribute (will be slugified) | True | | string | +| | Shared fields from the [Selector](#Selector). | True | | | ### Form-submit @@ -148,17 +148,38 @@ Configure the form-submit functionality which enables you to submit a (login) fo | input_filter | A list of input fields that should not be submitted with the form | False | | string - list | | submit_once | Submit the form only once on startup instead of each scan interval | False | False | boolean | | resubmit_on_error | Resubmit the form after a scraping error is encountered | False | True | boolean | -| header_mappings | See [Header Mappings](#Header-Mappings) | False | | list | +| variables | See [Form Variables](#Form-Variables) | False | | list | -### Header Mappings +### Form Variables -Configure the headers you want to be forwarded from scraping the [Form-submit](#form-submit) page to scraping the main page for sensor data. A common use case is to populate the `X-Login-Token` header which is the result of the login. +Configure the variables you want to be forwarded from scraping the [Form-submit](#form-submit) page to scraping the main page for sensor data. A common use case is to populate the `X-Login-Token` header which is the result of the login. -| name | description | required | default | type | -| -------------- | --------------------------------------------- | -------- | ------- | --------------- | -| name | Name of the header | True | | string | -| | Shared fields from the [Selector](#Selector). | True | | | +| name | description | required | default | type | +| ---- | --------------------------------------------- | -------- | ------- | ------ | +| name | Name of the variable | True | | string | +| | Shared fields from the [Selector](#Selector). | True | | | +Example: + +```yaml +multiscrape: + - resource: "https://website-api.airvisual.com/v1/users/65a28a0cec1ff309a74ba414/devices/avo_65a2bd77c2f7aeabcd715393?units.system=metric&AQI=US&language=en" + form_submit: + submit_once: True + resource: "https://website-api.airvisual.com/v1/auth/signin/by/email" + input: + email: "" + password: "" + variables: + - name: token + value_template: "{{ (value | from_json).loginToken }}" + headers: + X-Login-Token: "{{ token }}" + sensor: + - name: AirVisual Outdoor AQI + value_template: "{{ (value | from_json).current.aqi.value }}" + unit_of_measurement: "AQI US" +``` ### Selector @@ -191,7 +212,7 @@ Multiscrape also offers a `get_content` and a `scrape` service. `get_content` re `scrape` does what it says. It scrapes a website and provides the sensors and attributes. Both services accept the same configuration as what you would provide in your configuration yaml (what is described above), with a small but important caveat: if the service input contains templates, those are automatically parsed by home assistant when the service is being called. That is fine for templates like `resource` and `select`, but templates that need to be applied on the scraped data itself (like `value_template`), cannot be parsed when the service is called. Therefore you need to slightly alter the syntax and add a `!` in the middle. E.g. `{{` becomes `{!{` and `%}` becomes `%!}`. Multiscrape will then understand that this string needs to handled as a template after the service has been called.\ -*If someone has a better solution, please let me know!* +_If someone has a better solution, please let me know!_ To call one of those services, go to 'Developer tools' in Home Assistant and then to 'services'. Find the `multiscrape.get_content` or `multiscrape.scrape` services and go to yaml mode. There you enter your configuration. Example: From 06f8164a8ac5e098408001052deccee4d77d3829 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Milan=20Jeremi=C4=87?= Date: Sat, 8 Jun 2024 13:27:31 +0000 Subject: [PATCH 11/32] README feedback --- README.md | 33 +++++++++++++++------------------ 1 file changed, 15 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index d06fe84..ba20d7b 100644 --- a/README.md +++ b/README.md @@ -109,7 +109,7 @@ Configure the sensors that will scrape the data. | ------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------- | ------- | --------------- | | unique_id | Will be used as entity_id and enables editing the entity in the UI | False | | string | | name | Friendly name for the sensor | False | | string | -| | Shared fields from the [Selector](#Selector). | True | | | +| | See [Selector](#Selector) fields | True | | | | attributes | See [Sensor attributes](#sensor-attributes) | False | | list | | unit_of_measurement | Defines the units of measurement of the sensor | False | | string | | device_class | Sets the device_class for [sensors](https://www.home-assistant.io/integrations/sensor/) or [binary sensors](https://www.home-assistant.io/integrations/binary_sensor/) | False | | string | @@ -131,10 +131,10 @@ Configure a refresh button to manually trigger scraping. Configure the attributes on the sensor that can be set with additional scraping values. -| name | description | required | default | type | -| ---- | --------------------------------------------- | -------- | ------- | ------ | -| name | Name of the attribute (will be slugified) | True | | string | -| | Shared fields from the [Selector](#Selector). | True | | | +| name | description | required | default | type | +| ---- | ----------------------------------------- | -------- | ------- | ------ | +| name | Name of the attribute (will be slugified) | True | | string | +| | See [Selector](#Selector) fields | True | | | ### Form-submit @@ -152,38 +152,35 @@ Configure the form-submit functionality which enables you to submit a (login) fo ### Form Variables -Configure the variables you want to be forwarded from scraping the [Form-submit](#form-submit) page to scraping the main page for sensor data. A common use case is to populate the `X-Login-Token` header which is the result of the login. +Configure the variables that will be scraped from the [`form_submit`](#form-submit) response. You will be able to use those values in the `value_template` of a header or a selector in the main configuration. A common use case is to populate the `X-Login-Token` header which is the result of the login. -| name | description | required | default | type | -| ---- | --------------------------------------------- | -------- | ------- | ------ | -| name | Name of the variable | True | | string | -| | Shared fields from the [Selector](#Selector). | True | | | +| name | description | required | default | type | +| ---- | -------------------------------- | -------- | ------- | ------ | +| name | Name of the variable | True | | string | +| | See [Selector](#Selector) fields | True | | | Example: ```yaml multiscrape: - - resource: "https://website-api.airvisual.com/v1/users/65a28a0cec1ff309a74ba414/devices/avo_65a2bd77c2f7aeabcd715393?units.system=metric&AQI=US&language=en" + - resource: "https://somesiteyouwanttoscrape.com" form_submit: submit_once: True - resource: "https://website-api.airvisual.com/v1/auth/signin/by/email" + resource: "https://authforsomesiteyouwanttoscrape.com" input: email: "" password: "" variables: - name: token - value_template: "{{ (value | from_json).loginToken }}" + value_template: "{{ ... }}" headers: X-Login-Token: "{{ token }}" - sensor: - - name: AirVisual Outdoor AQI - value_template: "{{ (value | from_json).current.aqi.value }}" - unit_of_measurement: "AQI US" + sensor: ... ``` ### Selector -Shared field used in multiple configs above. Used to define the scraping: how to extract a value from the page. +Used to configure scraping options. | name | description | required | default | type | | -------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------- | -------- | ------- | --------------- | From 274a40a93e2b4a463faaef001663d459eba41850 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Milan=20Jeremi=C4=87?= Date: Sat, 8 Jun 2024 15:02:42 +0000 Subject: [PATCH 12/32] bumping pytest-homeassistant-custom-component version --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index f6ff639..337c941 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ colorlog==6.8.2 -pytest-homeassistant-custom-component==0.13.111 +pytest-homeassistant-custom-component==0.13.133 pip>=24,<25 ruff==0.3.5 From 22ee3dee1d7a615a60db8b756cafcdaf9c003d64 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Milan=20Jeremi=C4=87?= Date: Sat, 8 Jun 2024 15:04:56 +0000 Subject: [PATCH 13/32] Merging value and variables --- custom_components/multiscrape/http.py | 6 +++--- custom_components/multiscrape/util.py | 17 ++++++++--------- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/custom_components/multiscrape/http.py b/custom_components/multiscrape/http.py index facdccd..e819f2d 100644 --- a/custom_components/multiscrape/http.py +++ b/custom_components/multiscrape/http.py @@ -86,10 +86,10 @@ def set_variables(self, variables): async def async_request(self, context, resource, method=None, request_data=None): """Execute a HTTP request.""" - data = request_data or self._data_renderer(None, False, self._variables) + data = request_data or self._data_renderer(self._variables) method = method or self._method or "GET" - headers = self._headers_renderer(None, False, self._variables) - params = self._params_renderer(None, False, self._variables) + headers = self._headers_renderer(self._variables) + params = self._params_renderer(self._variables) _LOGGER.debug( "%s # Executing %s-request with a %s to url: %s with headers: %s.", diff --git a/custom_components/multiscrape/util.py b/custom_components/multiscrape/util.py index 81ed200..b409431 100644 --- a/custom_components/multiscrape/util.py +++ b/custom_components/multiscrape/util.py @@ -1,6 +1,5 @@ """Some utility functions.""" import logging -from typing import Any from homeassistant.exceptions import TemplateError from homeassistant.helpers.template import Template @@ -11,21 +10,21 @@ def create_renderer(hass, value_template): """Create a template renderer based on value_template.""" if value_template is None: - return lambda value=None, parse_result=None, variables={}: value + return lambda value=None, parse_result=None: value if not isinstance(value_template, Template): value_template = Template(value_template, hass) else: value_template.hass = hass - def _render(value: Any = None, parse_result=False, variables: dict = {}): + def _render(variables: dict = {}, parse_result=False): try: - return value_template.async_render({"value": value} | variables, parse_result) + return value_template.async_render(variables, parse_result) except TemplateError: _LOGGER.exception( - "Error rendering template: %s with value %s", value_template, value + "Error rendering template: %s with variables %s", value_template, variables ) - return value + return None return _render @@ -33,16 +32,16 @@ def _render(value: Any = None, parse_result=False, variables: dict = {}): def create_dict_renderer(hass, templates_dict): """Create template renderers for a dictionary with value_templates.""" if templates_dict is None: - return lambda value=None, parse_result=None, variables={}: {} + return lambda value=None, parse_result=None: {} # Create a copy of the templates_dict to avoid modification of the original templates_dict = templates_dict.copy() for item in templates_dict: templates_dict[item] = create_renderer(hass, templates_dict[item]) - def _render(value: Any = None, parse_result=False, variables: dict = {}): + def _render(variables: dict = {}, parse_result=False): return { - item: templates_dict[item](value, parse_result, variables) for item in templates_dict + item: templates_dict[item](variables, parse_result) for item in templates_dict } return _render From 1f55b35141c7143a6e6c645290b8a268f1a55309 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Milan=20Jeremi=C4=87?= Date: Sat, 8 Jun 2024 19:58:24 +0000 Subject: [PATCH 14/32] storing form variables in coordinator instead of http. --- custom_components/multiscrape/coordinator.py | 8 ++++---- custom_components/multiscrape/http.py | 16 ++++++---------- 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/custom_components/multiscrape/coordinator.py b/custom_components/multiscrape/coordinator.py index 34ad895..d8a278c 100644 --- a/custom_components/multiscrape/coordinator.py +++ b/custom_components/multiscrape/coordinator.py @@ -54,6 +54,7 @@ def __init__( self._form_submitter = form self._resource_renderer = resource_renderer self._scraper = scraper + self._form_variables = {} def notify_scrape_exception(self): """Notify the form_submitter of an exception so it will re-submit next trigger.""" @@ -67,9 +68,8 @@ async def get_content(self) -> str: if self._form_submitter: try: result = await self._form_submitter.async_submit(resource) - form_variables = self._form_submitter.scrape_variables() - self._http.set_variables(form_variables) - self._scraper.set_variables(form_variables) + self._form_variables = self._form_submitter.scrape_variables() + self._scraper.set_variables(self._form_variables) if result: _LOGGER.debug( @@ -84,7 +84,7 @@ async def get_content(self) -> str: ex, ) - response = await self._http.async_request("page", resource) + response = await self._http.async_request("page", resource, variables=self._form_variables) return response.text diff --git a/custom_components/multiscrape/http.py b/custom_components/multiscrape/http.py index e819f2d..6ed772c 100644 --- a/custom_components/multiscrape/http.py +++ b/custom_components/multiscrape/http.py @@ -70,7 +70,6 @@ def __init__( self._params_renderer = params_renderer self._headers_renderer = headers_renderer self._data_renderer = data_renderer - self._variables = None def set_authentication(self, username, password, auth_type): """Set http authentication.""" @@ -78,18 +77,15 @@ def set_authentication(self, username, password, auth_type): self._auth = httpx.DigestAuth(username, password) else: self._auth = (username, password) - _LOGGER.debug("%s # Authentication configuration processed", self._config_name) - - def set_variables(self, variables): - """Set variables.""" - self._variables = variables + _LOGGER.debug( + "%s # Authentication configuration processed", self._config_name) - async def async_request(self, context, resource, method=None, request_data=None): + async def async_request(self, context, resource, method=None, request_data=None, variables: dict = {}): """Execute a HTTP request.""" - data = request_data or self._data_renderer(self._variables) + data = request_data or self._data_renderer(variables) method = method or self._method or "GET" - headers = self._headers_renderer(self._variables) - params = self._params_renderer(self._variables) + headers = self._headers_renderer(variables) + params = self._params_renderer(variables) _LOGGER.debug( "%s # Executing %s-request with a %s to url: %s with headers: %s.", From 8766124c9af2de6ef95c0d4dcb7741ef3a904a17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Milan=20Jeremi=C4=87?= Date: Sat, 8 Jun 2024 20:36:39 +0000 Subject: [PATCH 15/32] Reusing _form_variables stored in request manager (which is a part of coordinator) for hass entities/sensors. --- .../multiscrape/binary_sensor.py | 30 ++++----- custom_components/multiscrape/coordinator.py | 1 - custom_components/multiscrape/entity.py | 12 ++-- custom_components/multiscrape/scraper.py | 17 +++-- custom_components/multiscrape/sensor.py | 32 ++++------ custom_components/multiscrape/service.py | 64 ++++++++----------- 6 files changed, 67 insertions(+), 89 deletions(-) diff --git a/custom_components/multiscrape/binary_sensor.py b/custom_components/multiscrape/binary_sensor.py index 4e23016..2b986e6 100644 --- a/custom_components/multiscrape/binary_sensor.py +++ b/custom_components/multiscrape/binary_sensor.py @@ -4,27 +4,20 @@ import logging from homeassistant.components.binary_sensor import BinarySensorEntity -from homeassistant.const import CONF_DEVICE_CLASS -from homeassistant.const import CONF_FORCE_UPDATE -from homeassistant.const import CONF_ICON -from homeassistant.const import CONF_NAME -from homeassistant.const import CONF_UNIQUE_ID -from homeassistant.const import Platform +from homeassistant.const import (CONF_DEVICE_CLASS, CONF_FORCE_UPDATE, + CONF_ICON, CONF_NAME, CONF_UNIQUE_ID, + Platform) from homeassistant.core import HomeAssistant from homeassistant.exceptions import PlatformNotReady from homeassistant.helpers.entity import async_generate_entity_id from homeassistant.helpers.entity_platform import AddEntitiesCallback -from homeassistant.helpers.typing import ConfigType -from homeassistant.helpers.typing import DiscoveryInfoType +from homeassistant.helpers.typing import ConfigType, DiscoveryInfoType from homeassistant.util import slugify from . import async_get_config_and_coordinator -from .const import CONF_ON_ERROR_VALUE_DEFAULT -from .const import CONF_ON_ERROR_VALUE_LAST -from .const import CONF_ON_ERROR_VALUE_NONE -from .const import CONF_PICTURE -from .const import CONF_SENSOR_ATTRS -from .const import LOG_LEVELS +from .const import (CONF_ON_ERROR_VALUE_DEFAULT, CONF_ON_ERROR_VALUE_LAST, + CONF_ON_ERROR_VALUE_NONE, CONF_PICTURE, CONF_SENSOR_ATTRS, + LOG_LEVELS) from .entity import MultiscrapeEntity from .selector import Selector @@ -52,7 +45,8 @@ async def async_setup_platform( raise PlatformNotReady sensor_name = conf.get(CONF_NAME) - _LOGGER.debug("%s # %s # Setting up binary sensor", scraper.name, sensor_name) + _LOGGER.debug("%s # %s # Setting up binary sensor", + scraper.name, sensor_name) unique_id = conf.get(CONF_UNIQUE_ID) device_class = conf.get(CONF_DEVICE_CLASS) force_update = conf.get(CONF_FORCE_UPDATE) @@ -129,9 +123,11 @@ def _update_sensor(self): try: if self.coordinator.update_error is True: - raise ValueError("Skipped scraping because data couldn't be updated") + raise ValueError( + "Skipped scraping because data couldn't be updated") - value = self.scraper.scrape(self._sensor_selector, self._name) + value = self.scraper.scrape( + self._sensor_selector, self._name, variables=self.coordinator._request_manager._form_variables) try: self._attr_is_on = bool(int(value)) except ValueError: diff --git a/custom_components/multiscrape/coordinator.py b/custom_components/multiscrape/coordinator.py index d8a278c..6381fd1 100644 --- a/custom_components/multiscrape/coordinator.py +++ b/custom_components/multiscrape/coordinator.py @@ -69,7 +69,6 @@ async def get_content(self) -> str: try: result = await self._form_submitter.async_submit(resource) self._form_variables = self._form_submitter.scrape_variables() - self._scraper.set_variables(self._form_variables) if result: _LOGGER.debug( diff --git a/custom_components/multiscrape/entity.py b/custom_components/multiscrape/entity.py index e7624ba..b988827 100644 --- a/custom_components/multiscrape/entity.py +++ b/custom_components/multiscrape/entity.py @@ -8,10 +8,8 @@ from homeassistant.helpers.entity import Entity from homeassistant.helpers.update_coordinator import DataUpdateCoordinator -from .const import CONF_ON_ERROR_VALUE_DEFAULT -from .const import CONF_ON_ERROR_VALUE_LAST -from .const import CONF_ON_ERROR_VALUE_NONE -from .const import LOG_LEVELS +from .const import (CONF_ON_ERROR_VALUE_DEFAULT, CONF_ON_ERROR_VALUE_LAST, + CONF_ON_ERROR_VALUE_NONE, LOG_LEVELS) from .scraper import Scraper _LOGGER = logging.getLogger(__name__) @@ -92,7 +90,8 @@ async def async_added_to_hass(self) -> None: ) if self.coordinator: self.async_on_remove( - self.coordinator.async_add_listener(self._handle_coordinator_update) + self.coordinator.async_add_listener( + self._handle_coordinator_update) ) @callback @@ -133,7 +132,8 @@ def _update_attributes(self): ) for name, attr_selector in self._attribute_selectors.items(): try: - attr_value = self.scraper.scrape(attr_selector, self._name, name) + attr_value = self.scraper.scrape( + attr_selector, self._name, name, variables=self.coordinator._request_manager._form_variables) self._attr_extra_state_attributes[name] = attr_value except Exception as exception: _LOGGER.debug( diff --git a/custom_components/multiscrape/scraper.py b/custom_components/multiscrape/scraper.py index 5b9b4b1..575f651 100644 --- a/custom_components/multiscrape/scraper.py +++ b/custom_components/multiscrape/scraper.py @@ -96,11 +96,7 @@ async def set_content(self, content): ) raise - def set_variables(self, variables): - """Set variables.""" - self._variables = variables - - def scrape(self, selector, sensor, attribute=None): + def scrape(self, selector, sensor, attribute=None, variables: dict = {}): """Scrape based on given selector the data.""" # This is required as this function is called separately for sensors and attributes log_prefix = f"{self._config_name} # {sensor}" @@ -110,7 +106,7 @@ def scrape(self, selector, sensor, attribute=None): if selector.just_value: _LOGGER.debug("%s # Applying value_template only.", log_prefix) result = selector.value_template.async_render_with_possible_json_value( - self._data, None, variables=self._variables + self._data, None, variables=variables ) return selector.value_template._parse_result(result) @@ -121,7 +117,8 @@ def scrape(self, selector, sensor, attribute=None): if selector.is_list: tags = self._soup.select(selector.list) - _LOGGER.debug("%s # List selector selected tags: %s", log_prefix, tags) + _LOGGER.debug("%s # List selector selected tags: %s", + log_prefix, tags) if selector.attribute is not None: _LOGGER.debug( "%s # Try to find attributes: %s", @@ -153,13 +150,15 @@ def scrape(self, selector, sensor, attribute=None): _LOGGER.debug("%s # Selector result: %s", log_prefix, value) if value is not None and selector.value_template is not None: - _LOGGER.debug("%s # Applying value_template on selector result", log_prefix) + _LOGGER.debug( + "%s # Applying value_template on selector result", log_prefix) value = selector.value_template.async_render( variables={"value": value} | self._variables, parse_result=True ) _LOGGER.debug( - "%s # Final selector value: %s of type %s", log_prefix, value, type(value) + "%s # Final selector value: %s of type %s", log_prefix, value, type( + value) ) return value diff --git a/custom_components/multiscrape/sensor.py b/custom_components/multiscrape/sensor.py index dfe5ac1..277d0c7 100644 --- a/custom_components/multiscrape/sensor.py +++ b/custom_components/multiscrape/sensor.py @@ -3,32 +3,22 @@ import logging -from homeassistant.components.sensor import SensorDeviceClass -from homeassistant.components.sensor import SensorEntity +from homeassistant.components.sensor import SensorDeviceClass, SensorEntity from homeassistant.components.sensor.helpers import async_parse_date_datetime -from homeassistant.const import CONF_DEVICE_CLASS -from homeassistant.const import CONF_FORCE_UPDATE -from homeassistant.const import CONF_ICON -from homeassistant.const import CONF_NAME -from homeassistant.const import CONF_UNIQUE_ID -from homeassistant.const import CONF_UNIT_OF_MEASUREMENT -from homeassistant.const import Platform +from homeassistant.const import (CONF_DEVICE_CLASS, CONF_FORCE_UPDATE, + CONF_ICON, CONF_NAME, CONF_UNIQUE_ID, + CONF_UNIT_OF_MEASUREMENT, Platform) from homeassistant.core import HomeAssistant from homeassistant.exceptions import PlatformNotReady from homeassistant.helpers.entity import async_generate_entity_id from homeassistant.helpers.entity_platform import AddEntitiesCallback -from homeassistant.helpers.typing import ConfigType -from homeassistant.helpers.typing import DiscoveryInfoType +from homeassistant.helpers.typing import ConfigType, DiscoveryInfoType from homeassistant.util import slugify from . import async_get_config_and_coordinator -from .const import CONF_ON_ERROR_VALUE_DEFAULT -from .const import CONF_ON_ERROR_VALUE_LAST -from .const import CONF_ON_ERROR_VALUE_NONE -from .const import CONF_PICTURE -from .const import CONF_SENSOR_ATTRS -from .const import CONF_STATE_CLASS -from .const import LOG_LEVELS +from .const import (CONF_ON_ERROR_VALUE_DEFAULT, CONF_ON_ERROR_VALUE_LAST, + CONF_ON_ERROR_VALUE_NONE, CONF_PICTURE, CONF_SENSOR_ATTRS, + CONF_STATE_CLASS, LOG_LEVELS) from .entity import MultiscrapeEntity from .selector import Selector @@ -142,9 +132,11 @@ def _update_sensor(self): try: if self.coordinator.update_error is True: - raise ValueError("Skipped scraping because data couldn't be updated") + raise ValueError( + "Skipped scraping because data couldn't be updated") - value = self.scraper.scrape(self._sensor_selector, self._name) + value = self.scraper.scrape( + self._sensor_selector, self._name, variables=self.coordinator._request_manager._form_variables) _LOGGER.debug( "%s # %s # Selected: %s", self.scraper.name, self._name, value ) diff --git a/custom_components/multiscrape/service.py b/custom_components/multiscrape/service.py index b1f0619..3bff0c7 100644 --- a/custom_components/multiscrape/service.py +++ b/custom_components/multiscrape/service.py @@ -1,40 +1,24 @@ """Class for implementing the multiscrape services.""" import logging -import voluptuous as vol + import homeassistant.helpers.config_validation as cv -from homeassistant.core import HomeAssistant -from homeassistant.core import ServiceCall, SupportsResponse -from homeassistant.const import ( - CONF_NAME, - CONF_DESCRIPTION, - CONF_UNIQUE_ID, - CONF_VALUE_TEMPLATE, - CONF_ICON, -) +import voluptuous as vol +from homeassistant.const import (CONF_DESCRIPTION, CONF_ICON, CONF_NAME, + CONF_UNIQUE_ID, CONF_VALUE_TEMPLATE, Platform) +from homeassistant.core import HomeAssistant, ServiceCall, SupportsResponse from homeassistant.helpers.service import async_set_service_schema from homeassistant.util import slugify -from homeassistant.const import Platform - -from .scraper import create_scraper +from .const import (CONF_FIELDS, CONF_FORM_SUBMIT, CONF_PARSER, + CONF_SENSOR_ATTRS, DOMAIN) +from .coordinator import (MultiscrapeDataUpdateCoordinator, + create_content_request_manager) from .form import create_form_submitter - -from .selector import Selector -from .schema import SERVICE_COMBINED_SCHEMA -from .coordinator import ( - MultiscrapeDataUpdateCoordinator, - create_content_request_manager, -) from .http import create_http_wrapper - -from .const import ( - CONF_FORM_SUBMIT, - CONF_PARSER, - CONF_SENSOR_ATTRS, - DOMAIN, - CONF_FIELDS, -) +from .schema import SERVICE_COMBINED_SCHEMA +from .scraper import create_scraper +from .selector import Selector _LOGGER = logging.getLogger(__name__) @@ -59,7 +43,8 @@ async def setup_config_services( async def _setup_trigger_service(hass: HomeAssistant, target_name, coordinator): async def _async_trigger_service(service: ServiceCall): - _LOGGER.info("Multiscrape triggered by service: %s", service.__repr__()) + _LOGGER.info("Multiscrape triggered by service: %s", + service.__repr__()) await coordinator.async_request_refresh() hass.services.async_register( @@ -75,7 +60,8 @@ async def _async_trigger_service(service: ServiceCall): CONF_DESCRIPTION: f"Triggers an update for the multiscrape {target_name} integration, independent of the update interval.", CONF_FIELDS: {}, } - async_set_service_schema(hass, DOMAIN, f"trigger_{target_name}", service_desc) + async_set_service_schema( + hass, DOMAIN, f"trigger_{target_name}", service_desc) async def setup_get_content_service(hass: HomeAssistant): @@ -118,9 +104,11 @@ async def _async_scrape_service(service: ServiceCall) -> None: for platform in [Platform.SENSOR, Platform.BINARY_SENSOR]: for sensor in conf.get(platform) or []: - name = sensor.get(CONF_UNIQUE_ID) or slugify(sensor.get(CONF_NAME)) + name = sensor.get(CONF_UNIQUE_ID) or slugify( + sensor.get(CONF_NAME)) sensor_selector = Selector(hass, sensor) - response[name] = {"value": scraper.scrape(sensor_selector, config_name)} + response[name] = {"value": scraper.scrape( + sensor_selector, config_name, variables=sensor.coordinator._request_manager._form_variables)} if sensor.get(CONF_ICON): response[CONF_ICON] = sensor.get(CONF_ICON).async_render( @@ -131,7 +119,8 @@ async def _async_scrape_service(service: ServiceCall) -> None: attr_name = slugify(attr_conf[CONF_NAME]) attr_selector = Selector(hass, attr_conf) response[name].setdefault(CONF_SENSOR_ATTRS, {}).update( - {attr_name: scraper.scrape(attr_selector, config_name)} + {attr_name: scraper.scrape( + attr_selector, config_name, variables=sensor.coordinator._request_manager._form_variables)} ) return response @@ -151,7 +140,8 @@ async def _prepare_service_request(hass: HomeAssistant, conf, config_name): form_submit_config = conf.get(CONF_FORM_SUBMIT) parser = conf.get(CONF_PARSER) if form_submit_config: - form_http = create_http_wrapper(config_name, form_submit_config, hass, None) + form_http = create_http_wrapper( + config_name, form_submit_config, hass, None) form_submitter = create_form_submitter( config_name, form_submit_config, hass, form_http, None, parser ) @@ -169,7 +159,8 @@ def _restore_templates(config): for attr_conf in sensor.get(CONF_SENSOR_ATTRS) or []: attr_conf[CONF_VALUE_TEMPLATE] = ( cv.template( - _replace_template_characters(attr_conf.get(CONF_VALUE_TEMPLATE)) + _replace_template_characters( + attr_conf.get(CONF_VALUE_TEMPLATE)) ) if attr_conf.get(CONF_VALUE_TEMPLATE) else None @@ -180,7 +171,8 @@ def _restore_templates(config): ) if sensor.get(CONF_VALUE_TEMPLATE): sensor[CONF_VALUE_TEMPLATE] = cv.template( - _replace_template_characters(sensor.get(CONF_VALUE_TEMPLATE)) + _replace_template_characters( + sensor.get(CONF_VALUE_TEMPLATE)) ) return config From 9455f09f48a5d4fbb2f5f4bb1ba7cdc5b3e43326 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Milan=20Jeremi=C4=87?= Date: Sun, 9 Jun 2024 16:41:13 +0000 Subject: [PATCH 16/32] removing unused _variables in Scraper --- custom_components/multiscrape/scraper.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/custom_components/multiscrape/scraper.py b/custom_components/multiscrape/scraper.py index 575f651..b6299ef 100644 --- a/custom_components/multiscrape/scraper.py +++ b/custom_components/multiscrape/scraper.py @@ -45,7 +45,6 @@ def __init__( self._soup: BeautifulSoup = None self._data = None self._separator = separator - self._variables = {} self.reset() @property @@ -152,8 +151,7 @@ def scrape(self, selector, sensor, attribute=None, variables: dict = {}): if value is not None and selector.value_template is not None: _LOGGER.debug( "%s # Applying value_template on selector result", log_prefix) - value = selector.value_template.async_render( - variables={"value": value} | self._variables, parse_result=True + value = selector.value_template.async_render(variables=variables, parse_result=True ) _LOGGER.debug( From d990f71d00692c6a812a8e1c78533f857d29d3ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Milan=20Jeremi=C4=87?= Date: Sun, 9 Jun 2024 16:43:14 +0000 Subject: [PATCH 17/32] Removing unused _scraper in content request manager --- custom_components/multiscrape/__init__.py | 2 +- custom_components/multiscrape/coordinator.py | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/custom_components/multiscrape/__init__.py b/custom_components/multiscrape/__init__.py index 82f32c1..31b796a 100644 --- a/custom_components/multiscrape/__init__.py +++ b/custom_components/multiscrape/__init__.py @@ -124,7 +124,7 @@ async def _async_process_config(hass: HomeAssistant, config) -> bool: http = create_http_wrapper(config_name, conf, hass, file_manager) scraper = create_scraper(config_name, conf, hass, file_manager) - request_manager = create_content_request_manager(config_name, conf, hass, http, form_submitter, scraper) + request_manager = create_content_request_manager(config_name, conf, hass, http, form_submitter) coordinator = create_multiscrape_coordinator( config_name, conf, diff --git a/custom_components/multiscrape/coordinator.py b/custom_components/multiscrape/coordinator.py index 6381fd1..0d3d5c1 100644 --- a/custom_components/multiscrape/coordinator.py +++ b/custom_components/multiscrape/coordinator.py @@ -23,7 +23,7 @@ def create_content_request_manager( - config_name, config, hass: HomeAssistant, http, form_submitter, scraper + config_name, config, hass: HomeAssistant, http, form_submitter ): """Create a content request manager instance.""" _LOGGER.debug("%s # Creating ContentRequestManager", config_name) @@ -34,7 +34,7 @@ def create_content_request_manager( resource_renderer = create_renderer(hass, resource_template) else: resource_renderer = create_renderer(hass, resource) - return ContentRequestManager(config_name, http, resource_renderer, form_submitter, scraper) + return ContentRequestManager(config_name, http, resource_renderer, form_submitter) class ContentRequestManager: @@ -46,14 +46,12 @@ def __init__( http: HttpWrapper, resource_renderer: Callable, form: FormSubmitter = None, - scraper: Scraper = None, ) -> None: """Initialize ContentRequestManager.""" self._config_name = config_name self._http = http self._form_submitter = form self._resource_renderer = resource_renderer - self._scraper = scraper self._form_variables = {} def notify_scrape_exception(self): From e0e02e27dd63e5cf31c25f56288437b5a0a7222a Mon Sep 17 00:00:00 2001 From: Milan Jeremic Date: Wed, 12 Jun 2024 21:25:59 +0000 Subject: [PATCH 18/32] Adding a getter for form variables in coordinator --- custom_components/multiscrape/binary_sensor.py | 2 +- custom_components/multiscrape/coordinator.py | 4 ++++ custom_components/multiscrape/entity.py | 2 +- custom_components/multiscrape/sensor.py | 2 +- custom_components/multiscrape/service.py | 4 ++-- 5 files changed, 9 insertions(+), 5 deletions(-) diff --git a/custom_components/multiscrape/binary_sensor.py b/custom_components/multiscrape/binary_sensor.py index 2b986e6..77dd83c 100644 --- a/custom_components/multiscrape/binary_sensor.py +++ b/custom_components/multiscrape/binary_sensor.py @@ -127,7 +127,7 @@ def _update_sensor(self): "Skipped scraping because data couldn't be updated") value = self.scraper.scrape( - self._sensor_selector, self._name, variables=self.coordinator._request_manager._form_variables) + self._sensor_selector, self._name, variables=self.coordinator.get_form_variables()) try: self._attr_is_on = bool(int(value)) except ValueError: diff --git a/custom_components/multiscrape/coordinator.py b/custom_components/multiscrape/coordinator.py index 0d3d5c1..4c35454 100644 --- a/custom_components/multiscrape/coordinator.py +++ b/custom_components/multiscrape/coordinator.py @@ -201,3 +201,7 @@ async def _prepare_new_run(self): ) self._scraper.reset() + + def get_form_variables(self): + """Return the form variables.""" + return self._request_manager._form_variables diff --git a/custom_components/multiscrape/entity.py b/custom_components/multiscrape/entity.py index b988827..044e360 100644 --- a/custom_components/multiscrape/entity.py +++ b/custom_components/multiscrape/entity.py @@ -133,7 +133,7 @@ def _update_attributes(self): for name, attr_selector in self._attribute_selectors.items(): try: attr_value = self.scraper.scrape( - attr_selector, self._name, name, variables=self.coordinator._request_manager._form_variables) + attr_selector, self._name, name, variables=self.coordinator.get_form_variables()) self._attr_extra_state_attributes[name] = attr_value except Exception as exception: _LOGGER.debug( diff --git a/custom_components/multiscrape/sensor.py b/custom_components/multiscrape/sensor.py index 277d0c7..6601cc6 100644 --- a/custom_components/multiscrape/sensor.py +++ b/custom_components/multiscrape/sensor.py @@ -136,7 +136,7 @@ def _update_sensor(self): "Skipped scraping because data couldn't be updated") value = self.scraper.scrape( - self._sensor_selector, self._name, variables=self.coordinator._request_manager._form_variables) + self._sensor_selector, self._name, variables=self.coordinator.get_form_variables()) _LOGGER.debug( "%s # %s # Selected: %s", self.scraper.name, self._name, value ) diff --git a/custom_components/multiscrape/service.py b/custom_components/multiscrape/service.py index 3bff0c7..be03b14 100644 --- a/custom_components/multiscrape/service.py +++ b/custom_components/multiscrape/service.py @@ -108,7 +108,7 @@ async def _async_scrape_service(service: ServiceCall) -> None: sensor.get(CONF_NAME)) sensor_selector = Selector(hass, sensor) response[name] = {"value": scraper.scrape( - sensor_selector, config_name, variables=sensor.coordinator._request_manager._form_variables)} + sensor_selector, config_name, variables=sensor.coordinator.get_form_variables())} if sensor.get(CONF_ICON): response[CONF_ICON] = sensor.get(CONF_ICON).async_render( @@ -120,7 +120,7 @@ async def _async_scrape_service(service: ServiceCall) -> None: attr_selector = Selector(hass, attr_conf) response[name].setdefault(CONF_SENSOR_ATTRS, {}).update( {attr_name: scraper.scrape( - attr_selector, config_name, variables=sensor.coordinator._request_manager._form_variables)} + attr_selector, config_name, variables=sensor.coordinator.get_form_variables())} ) return response From bab22aee8341fb0ad8bec82dd021ef7e353e8888 Mon Sep 17 00:00:00 2001 From: Milan Jeremic Date: Wed, 12 Jun 2024 22:05:12 +0000 Subject: [PATCH 19/32] Returning None by default if value_template is None --- custom_components/multiscrape/util.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/custom_components/multiscrape/util.py b/custom_components/multiscrape/util.py index b409431..ae994db 100644 --- a/custom_components/multiscrape/util.py +++ b/custom_components/multiscrape/util.py @@ -10,7 +10,7 @@ def create_renderer(hass, value_template): """Create a template renderer based on value_template.""" if value_template is None: - return lambda value=None, parse_result=None: value + return lambda variables={}, parse_result=None: None if not isinstance(value_template, Template): value_template = Template(value_template, hass) @@ -32,7 +32,7 @@ def _render(variables: dict = {}, parse_result=False): def create_dict_renderer(hass, templates_dict): """Create template renderers for a dictionary with value_templates.""" if templates_dict is None: - return lambda value=None, parse_result=None: {} + return lambda variables={}, parse_result=None: {} # Create a copy of the templates_dict to avoid modification of the original templates_dict = templates_dict.copy() From c7658946e5e612e3e667b5b894887000490344b4 Mon Sep 17 00:00:00 2001 From: Milan Jeremic Date: Fri, 14 Jun 2024 22:15:18 +0000 Subject: [PATCH 20/32] get_form_variables() -> form_variables property --- custom_components/multiscrape/binary_sensor.py | 2 +- custom_components/multiscrape/coordinator.py | 10 ++++++++-- custom_components/multiscrape/entity.py | 2 +- custom_components/multiscrape/sensor.py | 2 +- custom_components/multiscrape/service.py | 4 ++-- 5 files changed, 13 insertions(+), 7 deletions(-) diff --git a/custom_components/multiscrape/binary_sensor.py b/custom_components/multiscrape/binary_sensor.py index 77dd83c..1e414a3 100644 --- a/custom_components/multiscrape/binary_sensor.py +++ b/custom_components/multiscrape/binary_sensor.py @@ -127,7 +127,7 @@ def _update_sensor(self): "Skipped scraping because data couldn't be updated") value = self.scraper.scrape( - self._sensor_selector, self._name, variables=self.coordinator.get_form_variables()) + self._sensor_selector, self._name, variables=self.coordinator.form_variables) try: self._attr_is_on = bool(int(value)) except ValueError: diff --git a/custom_components/multiscrape/coordinator.py b/custom_components/multiscrape/coordinator.py index 4c35454..4e4138a 100644 --- a/custom_components/multiscrape/coordinator.py +++ b/custom_components/multiscrape/coordinator.py @@ -84,6 +84,11 @@ async def get_content(self) -> str: response = await self._http.async_request("page", resource, variables=self._form_variables) return response.text + @property + def form_variables(self): + """Return the form variables.""" + return self._form_variables + def create_multiscrape_coordinator( config_name, conf, hass, request_manager, file_manager, scraper @@ -202,6 +207,7 @@ async def _prepare_new_run(self): self._scraper.reset() - def get_form_variables(self): + @property + def form_variables(self): """Return the form variables.""" - return self._request_manager._form_variables + return self._request_manager.form_variables diff --git a/custom_components/multiscrape/entity.py b/custom_components/multiscrape/entity.py index 044e360..0b5f4d6 100644 --- a/custom_components/multiscrape/entity.py +++ b/custom_components/multiscrape/entity.py @@ -133,7 +133,7 @@ def _update_attributes(self): for name, attr_selector in self._attribute_selectors.items(): try: attr_value = self.scraper.scrape( - attr_selector, self._name, name, variables=self.coordinator.get_form_variables()) + attr_selector, self._name, name, variables=self.coordinator.form_variables) self._attr_extra_state_attributes[name] = attr_value except Exception as exception: _LOGGER.debug( diff --git a/custom_components/multiscrape/sensor.py b/custom_components/multiscrape/sensor.py index 6601cc6..3a58352 100644 --- a/custom_components/multiscrape/sensor.py +++ b/custom_components/multiscrape/sensor.py @@ -136,7 +136,7 @@ def _update_sensor(self): "Skipped scraping because data couldn't be updated") value = self.scraper.scrape( - self._sensor_selector, self._name, variables=self.coordinator.get_form_variables()) + self._sensor_selector, self._name, variables=self.coordinator.form_variables) _LOGGER.debug( "%s # %s # Selected: %s", self.scraper.name, self._name, value ) diff --git a/custom_components/multiscrape/service.py b/custom_components/multiscrape/service.py index be03b14..6096f82 100644 --- a/custom_components/multiscrape/service.py +++ b/custom_components/multiscrape/service.py @@ -108,7 +108,7 @@ async def _async_scrape_service(service: ServiceCall) -> None: sensor.get(CONF_NAME)) sensor_selector = Selector(hass, sensor) response[name] = {"value": scraper.scrape( - sensor_selector, config_name, variables=sensor.coordinator.get_form_variables())} + sensor_selector, config_name, variables=sensor.coordinator.form_variables)} if sensor.get(CONF_ICON): response[CONF_ICON] = sensor.get(CONF_ICON).async_render( @@ -120,7 +120,7 @@ async def _async_scrape_service(service: ServiceCall) -> None: attr_selector = Selector(hass, attr_conf) response[name].setdefault(CONF_SENSOR_ATTRS, {}).update( {attr_name: scraper.scrape( - attr_selector, config_name, variables=sensor.coordinator.get_form_variables())} + attr_selector, config_name, variables=sensor.coordinator.form_variables)} ) return response From 7edada198ac0ae1f98cf858b281ea8399779f1ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Milan=20Jeremi=C4=87?= Date: Fri, 5 Jul 2024 10:13:27 +0000 Subject: [PATCH 21/32] Making changes in service.py safe (not cause errors if coordinator is uninitialized. --- custom_components/multiscrape/service.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/custom_components/multiscrape/service.py b/custom_components/multiscrape/service.py index 6096f82..7091eed 100644 --- a/custom_components/multiscrape/service.py +++ b/custom_components/multiscrape/service.py @@ -107,8 +107,9 @@ async def _async_scrape_service(service: ServiceCall) -> None: name = sensor.get(CONF_UNIQUE_ID) or slugify( sensor.get(CONF_NAME)) sensor_selector = Selector(hass, sensor) + variables = sensor.coordinator.form_variables if hasattr(sensor, 'coordinator') else {} response[name] = {"value": scraper.scrape( - sensor_selector, config_name, variables=sensor.coordinator.form_variables)} + sensor_selector, config_name, variables=variables)} if sensor.get(CONF_ICON): response[CONF_ICON] = sensor.get(CONF_ICON).async_render( @@ -120,7 +121,7 @@ async def _async_scrape_service(service: ServiceCall) -> None: attr_selector = Selector(hass, attr_conf) response[name].setdefault(CONF_SENSOR_ATTRS, {}).update( {attr_name: scraper.scrape( - attr_selector, config_name, variables=sensor.coordinator.form_variables)} + attr_selector, config_name, variables=variables)} ) return response From c04c74cc3f4392abb46b878a64691955ae97973a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Milan=20Jeremi=C4=87?= Date: Fri, 5 Jul 2024 10:19:42 +0000 Subject: [PATCH 22/32] raising on error in _render --- custom_components/multiscrape/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/custom_components/multiscrape/util.py b/custom_components/multiscrape/util.py index ae994db..f20cd27 100644 --- a/custom_components/multiscrape/util.py +++ b/custom_components/multiscrape/util.py @@ -24,7 +24,7 @@ def _render(variables: dict = {}, parse_result=False): _LOGGER.exception( "Error rendering template: %s with variables %s", value_template, variables ) - return None + raise return _render From 6e2872750dd0c180c3c15bfae48338cc66d5a296 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Milan=20Jeremi=C4=87?= Date: Fri, 5 Jul 2024 11:07:22 +0000 Subject: [PATCH 23/32] Moving create_scraper into create_form_submitter --- custom_components/multiscrape/__init__.py | 2 -- custom_components/multiscrape/form.py | 14 +++++++++++--- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/custom_components/multiscrape/__init__.py b/custom_components/multiscrape/__init__.py index 31b796a..6f70c2f 100644 --- a/custom_components/multiscrape/__init__.py +++ b/custom_components/multiscrape/__init__.py @@ -111,13 +111,11 @@ async def _async_process_config(hass: HomeAssistant, config) -> bool: if form_submit_config: parser = conf.get(CONF_PARSER) form_http = create_http_wrapper(config_name, form_submit_config, hass, file_manager) - form_scraper = create_scraper(config_name, conf, hass, file_manager) form_submitter = create_form_submitter( config_name, form_submit_config, hass, form_http, - form_scraper, file_manager, parser, ) diff --git a/custom_components/multiscrape/form.py b/custom_components/multiscrape/form.py index daf2443..7982908 100644 --- a/custom_components/multiscrape/form.py +++ b/custom_components/multiscrape/form.py @@ -6,6 +6,8 @@ from homeassistant.const import CONF_NAME, CONF_RESOURCE from homeassistant.core import HomeAssistant +from custom_components.multiscrape.scraper import create_scraper + from .const import (CONF_FORM_INPUT, CONF_FORM_INPUT_FILTER, CONF_FORM_RESUBMIT_ERROR, CONF_FORM_SELECT, CONF_FORM_SUBMIT_ONCE, CONF_FORM_VARIABLES) @@ -16,7 +18,7 @@ _LOGGER = logging.getLogger(__name__) -def create_form_submitter(config_name, config, hass, http, scraper, file_manager, parser): +def create_form_submitter(config_name, config, hass, http, file_manager, parser): """Create a form submitter instance.""" resource = config.get(CONF_RESOURCE) select = config.get(CONF_FORM_SELECT) @@ -24,9 +26,15 @@ def create_form_submitter(config_name, config, hass, http, scraper, file_manager input_filter = config.get(CONF_FORM_INPUT_FILTER) resubmit_error = config.get(CONF_FORM_RESUBMIT_ERROR) submit_once = config.get(CONF_FORM_SUBMIT_ONCE) + + scraper = None variables_selectors = {} - for variables_conf in config.get(CONF_FORM_VARIABLES): - variables_selectors[variables_conf.get(CONF_NAME)] = Selector(hass, variables_conf) + variables = config.get(CONF_FORM_VARIABLES) + _LOGGER.debug("DEBUG VARIABLES: %s", variables) + if (variables != []): + scraper = create_scraper(config_name, config, hass, file_manager) + for variables_conf in variables: + variables_selectors[variables_conf.get(CONF_NAME)] = Selector(hass, variables_conf) return FormSubmitter( config_name, From 2da36f31367efb65738b108a85007c115708f073 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Milan=20Jeremi=C4=87?= Date: Fri, 5 Jul 2024 15:05:41 +0000 Subject: [PATCH 24/32] scraper is not always initialized in the form submit. --- custom_components/multiscrape/form.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/custom_components/multiscrape/form.py b/custom_components/multiscrape/form.py index 7982908..6230e29 100644 --- a/custom_components/multiscrape/form.py +++ b/custom_components/multiscrape/form.py @@ -162,7 +162,8 @@ async def async_submit(self, main_resource): if self._submit_once: self._should_submit = False - await self._scraper.set_content(response.text) + if self._scraper: + await self._scraper.set_content(response.text) if not self._form_resource: return response.text From 3ead4847c1b4a339f55459ad1392ddd6994a2a7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Milan=20Jeremi=C4=87?= Date: Sat, 6 Jul 2024 12:04:34 +0000 Subject: [PATCH 25/32] README: variables can be used in sensor adn attribute configuration --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index ca71567..6ec9709 100644 --- a/README.md +++ b/README.md @@ -152,7 +152,7 @@ Configure the form-submit functionality which enables you to submit a (login) fo ### Form Variables -Configure the variables that will be scraped from the [`form_submit`](#form-submit) response. You will be able to use those values in the `value_template` of a header or a selector in the main configuration. A common use case is to populate the `X-Login-Token` header which is the result of the login. +Configure the variables that will be scraped from the [`form_submit`](#form-submit) response. These variables can be used in the `value_template` of a header, a sensor or an attribute of the main configuration of the current integration. A common use case is to populate the `X-Login-Token` header which is the result of the login. | name | description | required | default | type | | ---- | -------------------------------- | -------- | ------- | ------ | From 315ede3d0955b52803b83c90ace07aef1fa42a0b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Milan=20Jeremi=C4=87?= Date: Tue, 9 Jul 2024 10:43:53 +0000 Subject: [PATCH 26/32] reworded --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index c6ec0cc..602aa73 100644 --- a/README.md +++ b/README.md @@ -154,7 +154,7 @@ Configure the form-submit functionality which enables you to submit a (login) fo ### Form Variables -Configure the variables that will be scraped from the [`form_submit`](#form-submit) response. These variables can be used in the `value_template` of a header, a sensor or an attribute of the main configuration of the current integration. A common use case is to populate the `X-Login-Token` header which is the result of the login. +Configure the variables that will be scraped from the [`form_submit`](#form-submit) response. These variables can be used in the `value_template` of the main configuration of the current integration: a `selector` in sensors/attributes or in a `header`. A common use case is to populate the `X-Login-Token` header which is the result of the login. | name | description | required | default | type | | ---- | -------------------------------- | -------- | ------- | ------ | From c6ebd0123b59f8ec9df18840e877aad2ed955e69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Milan=20Jeremi=C4=87?= Date: Tue, 9 Jul 2024 11:33:57 +0000 Subject: [PATCH 27/32] Fixing a bug where value was overriden with introduction of variables in scraper.scrape --- custom_components/multiscrape/scraper.py | 1 + 1 file changed, 1 insertion(+) diff --git a/custom_components/multiscrape/scraper.py b/custom_components/multiscrape/scraper.py index b6299ef..2f37aef 100644 --- a/custom_components/multiscrape/scraper.py +++ b/custom_components/multiscrape/scraper.py @@ -151,6 +151,7 @@ def scrape(self, selector, sensor, attribute=None, variables: dict = {}): if value is not None and selector.value_template is not None: _LOGGER.debug( "%s # Applying value_template on selector result", log_prefix) + variables["value"] = value value = selector.value_template.async_render(variables=variables, parse_result=True ) From f39f540bdc5ab05bb938befc6a241d40886b53c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Milan=20Jeremi=C4=87?= Date: Wed, 10 Jul 2024 11:45:34 +0000 Subject: [PATCH 28/32] fixing logs --- custom_components/multiscrape/form.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/custom_components/multiscrape/form.py b/custom_components/multiscrape/form.py index 6230e29..a53481b 100644 --- a/custom_components/multiscrape/form.py +++ b/custom_components/multiscrape/form.py @@ -30,7 +30,6 @@ def create_form_submitter(config_name, config, hass, http, file_manager, parser) scraper = None variables_selectors = {} variables = config.get(CONF_FORM_VARIABLES) - _LOGGER.debug("DEBUG VARIABLES: %s", variables) if (variables != []): scraper = create_scraper(config_name, config, hass, file_manager) for variables_conf in variables: @@ -101,7 +100,7 @@ def notify_scrape_exception(self): async def async_submit(self, main_resource): """Submit the form.""" if not self._should_submit: - _LOGGER.debug("%s # Skip submitting form") + _LOGGER.debug("%s # Skip submitting form", self._config_name) return _LOGGER.debug("%s # Starting with form-submit", self._config_name) From 43cdfbb693ba383ef91918412bb4b3ffb68e4bac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Milan=20Jeremi=C4=87?= Date: Wed, 10 Jul 2024 11:47:39 +0000 Subject: [PATCH 29/32] Creating file manager for service to ease debugging. --- custom_components/multiscrape/__init__.py | 19 ++----------------- custom_components/multiscrape/file.py | 21 +++++++++++++++++++++ custom_components/multiscrape/service.py | 14 ++++++++------ 3 files changed, 31 insertions(+), 23 deletions(-) diff --git a/custom_components/multiscrape/__init__.py b/custom_components/multiscrape/__init__.py index 6f70c2f..52da026 100644 --- a/custom_components/multiscrape/__init__.py +++ b/custom_components/multiscrape/__init__.py @@ -2,7 +2,6 @@ import asyncio import contextlib import logging -import os import voluptuous as vol from homeassistant.config_entries import ConfigEntry @@ -14,14 +13,13 @@ from homeassistant.helpers import discovery from homeassistant.helpers.reload import (async_integration_yaml_config, async_reload_integration_platforms) -from homeassistant.util import slugify from .const import (CONF_FORM_SUBMIT, CONF_LOG_RESPONSE, CONF_PARSER, COORDINATOR, DOMAIN, PLATFORM_IDX, SCRAPER, SCRAPER_DATA, SCRAPER_IDX) from .coordinator import (create_content_request_manager, create_multiscrape_coordinator) -from .file import LoggingFileManager +from .file import create_file_manager from .form import create_form_submitter from .http import create_http_wrapper from .schema import COMBINED_SCHEMA, CONFIG_SCHEMA # noqa: F401 @@ -92,20 +90,7 @@ async def _async_process_config(hass: HomeAssistant, config) -> bool: "%s # Setting up multiscrape with config:\n %s", config_name, conf ) - file_manager = None - log_response = conf.get(CONF_LOG_RESPONSE) - if log_response: - folder = os.path.join( - hass.config.config_dir, f"multiscrape/{slugify(config_name)}/" - ) - _LOGGER.debug( - "%s # Log responses enabled, creating logging folder: %s", - config_name, - folder, - ) - file_manager = LoggingFileManager(folder) - await hass.async_add_executor_job(file_manager.create_folders) - + file_manager = await create_file_manager(hass, config_name, conf.get(CONF_LOG_RESPONSE)) form_submit_config = conf.get(CONF_FORM_SUBMIT) form_submitter = None if form_submit_config: diff --git a/custom_components/multiscrape/file.py b/custom_components/multiscrape/file.py index 95200ad..bf09037 100644 --- a/custom_components/multiscrape/file.py +++ b/custom_components/multiscrape/file.py @@ -1,6 +1,27 @@ """LoggingFileManager for file utilities.""" +import logging import os +from homeassistant.core import HomeAssistant +from homeassistant.util import slugify + +_LOGGER = logging.getLogger(__name__) + +async def create_file_manager(hass: HomeAssistant, config_name: str, log_response: bool): + """Create a file manager instance.""" + file_manager = None + if log_response: + folder = os.path.join( + hass.config.config_dir, f"multiscrape/{slugify(config_name)}/" + ) + _LOGGER.debug( + "%s # Log responses enabled, creating logging folder: %s", + config_name, + folder, + ) + file_manager = LoggingFileManager(folder) + hass.async_add_executor_job(file_manager.create_folders) + return file_manager class LoggingFileManager: """LoggingFileManager for handling logging files.""" diff --git a/custom_components/multiscrape/service.py b/custom_components/multiscrape/service.py index 7091eed..8f3509f 100644 --- a/custom_components/multiscrape/service.py +++ b/custom_components/multiscrape/service.py @@ -10,10 +10,11 @@ from homeassistant.helpers.service import async_set_service_schema from homeassistant.util import slugify -from .const import (CONF_FIELDS, CONF_FORM_SUBMIT, CONF_PARSER, - CONF_SENSOR_ATTRS, DOMAIN) +from .const import (CONF_FIELDS, CONF_FORM_SUBMIT, CONF_LOG_RESPONSE, + CONF_PARSER, CONF_SENSOR_ATTRS, DOMAIN) from .coordinator import (MultiscrapeDataUpdateCoordinator, create_content_request_manager) +from .file import create_file_manager from .form import create_form_submitter from .http import create_http_wrapper from .schema import SERVICE_COMBINED_SCHEMA @@ -136,20 +137,21 @@ async def _async_scrape_service(service: ServiceCall) -> None: async def _prepare_service_request(hass: HomeAssistant, conf, config_name): - http = create_http_wrapper(config_name, conf, hass, None) + file_manager = await create_file_manager(hass, config_name, conf.get(CONF_LOG_RESPONSE)) + http = create_http_wrapper(config_name, conf, hass, file_manager) form_submitter = None form_submit_config = conf.get(CONF_FORM_SUBMIT) parser = conf.get(CONF_PARSER) if form_submit_config: form_http = create_http_wrapper( - config_name, form_submit_config, hass, None) + config_name, form_submit_config, hass, file_manager) form_submitter = create_form_submitter( - config_name, form_submit_config, hass, form_http, None, parser + config_name, form_submit_config, hass, form_http, file_manager, parser ) request_manager = create_content_request_manager( config_name, conf, hass, http, form_submitter ) - scraper = create_scraper(config_name, conf, hass, None) + scraper = create_scraper(config_name, conf, hass, file_manager) return request_manager, scraper From 357e5343aa3d747beb017717083b6d14c826e26c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Milan=20Jeremi=C4=87?= Date: Wed, 10 Jul 2024 11:57:03 +0000 Subject: [PATCH 30/32] Fixing templates for services, as multiscrpae has custom handling of templatest (adds ! within {{ and }}). This is done for newly introduced variables as well as headers, which were no handled before. --- custom_components/multiscrape/service.py | 72 +++++++++++++----------- 1 file changed, 38 insertions(+), 34 deletions(-) diff --git a/custom_components/multiscrape/service.py b/custom_components/multiscrape/service.py index 8f3509f..dd421e9 100644 --- a/custom_components/multiscrape/service.py +++ b/custom_components/multiscrape/service.py @@ -4,14 +4,16 @@ import homeassistant.helpers.config_validation as cv import voluptuous as vol -from homeassistant.const import (CONF_DESCRIPTION, CONF_ICON, CONF_NAME, - CONF_UNIQUE_ID, CONF_VALUE_TEMPLATE, Platform) +from homeassistant.const import (CONF_DESCRIPTION, CONF_HEADERS, CONF_ICON, + CONF_NAME, CONF_UNIQUE_ID, + CONF_VALUE_TEMPLATE, Platform) from homeassistant.core import HomeAssistant, ServiceCall, SupportsResponse from homeassistant.helpers.service import async_set_service_schema +from homeassistant.helpers.template import Template from homeassistant.util import slugify -from .const import (CONF_FIELDS, CONF_FORM_SUBMIT, CONF_LOG_RESPONSE, - CONF_PARSER, CONF_SENSOR_ATTRS, DOMAIN) +from .const import (CONF_FIELDS, CONF_FORM_SUBMIT, CONF_FORM_VARIABLES, + CONF_LOG_RESPONSE, CONF_PARSER, CONF_SENSOR_ATTRS, DOMAIN) from .coordinator import (MultiscrapeDataUpdateCoordinator, create_content_request_manager) from .file import create_file_manager @@ -44,8 +46,7 @@ async def setup_config_services( async def _setup_trigger_service(hass: HomeAssistant, target_name, coordinator): async def _async_trigger_service(service: ServiceCall): - _LOGGER.info("Multiscrape triggered by service: %s", - service.__repr__()) + _LOGGER.info("Multiscrape triggered by service: %s", service.__repr__()) await coordinator.async_request_refresh() hass.services.async_register( @@ -61,8 +62,7 @@ async def _async_trigger_service(service: ServiceCall): CONF_DESCRIPTION: f"Triggers an update for the multiscrape {target_name} integration, independent of the update interval.", CONF_FIELDS: {}, } - async_set_service_schema( - hass, DOMAIN, f"trigger_{target_name}", service_desc) + async_set_service_schema(hass, DOMAIN, f"trigger_{target_name}", service_desc) async def setup_get_content_service(hass: HomeAssistant): @@ -105,12 +105,9 @@ async def _async_scrape_service(service: ServiceCall) -> None: for platform in [Platform.SENSOR, Platform.BINARY_SENSOR]: for sensor in conf.get(platform) or []: - name = sensor.get(CONF_UNIQUE_ID) or slugify( - sensor.get(CONF_NAME)) + name = sensor.get(CONF_UNIQUE_ID) or slugify(sensor.get(CONF_NAME)) sensor_selector = Selector(hass, sensor) - variables = sensor.coordinator.form_variables if hasattr(sensor, 'coordinator') else {} - response[name] = {"value": scraper.scrape( - sensor_selector, config_name, variables=variables)} + response[name] = {"value": scraper.scrape(sensor_selector, config_name)} if sensor.get(CONF_ICON): response[CONF_ICON] = sensor.get(CONF_ICON).async_render( @@ -121,8 +118,7 @@ async def _async_scrape_service(service: ServiceCall) -> None: attr_name = slugify(attr_conf[CONF_NAME]) attr_selector = Selector(hass, attr_conf) response[name].setdefault(CONF_SENSOR_ATTRS, {}).update( - {attr_name: scraper.scrape( - attr_selector, config_name, variables=variables)} + {attr_name: scraper.scrape(attr_selector, config_name)} ) return response @@ -157,28 +153,36 @@ async def _prepare_service_request(hass: HomeAssistant, conf, config_name): def _restore_templates(config): config = dict(config) + selectors = [] for platform in [Platform.SENSOR, Platform.BINARY_SENSOR]: - for sensor in config.get(platform) or []: - for attr_conf in sensor.get(CONF_SENSOR_ATTRS) or []: - attr_conf[CONF_VALUE_TEMPLATE] = ( - cv.template( - _replace_template_characters( - attr_conf.get(CONF_VALUE_TEMPLATE)) - ) - if attr_conf.get(CONF_VALUE_TEMPLATE) - else None - ) - if sensor.get(CONF_ICON): - sensor[CONF_ICON] = cv.template( - _replace_template_characters(sensor.get(CONF_ICON)) - ) - if sensor.get(CONF_VALUE_TEMPLATE): - sensor[CONF_VALUE_TEMPLATE] = cv.template( - _replace_template_characters( - sensor.get(CONF_VALUE_TEMPLATE)) - ) + selectors.extend(config.get(platform) or []) + if config.get(CONF_FORM_SUBMIT): + form_variables = config[CONF_FORM_SUBMIT].get(CONF_FORM_VARIABLES) or [] + selectors.extend(form_variables) + + for selector in selectors: + _LOGGER.info("DEBUG selector: %s", selector) + for attr_conf in selector.get(CONF_SENSOR_ATTRS) or []: + attr_conf[CONF_VALUE_TEMPLATE] = ( + _restore_template(attr_conf.get(CONF_VALUE_TEMPLATE)) + if attr_conf.get(CONF_VALUE_TEMPLATE) + else None + ) + if selector.get(CONF_ICON): + selector[CONF_ICON] = _restore_template(selector.get(CONF_ICON)) + if selector.get(CONF_VALUE_TEMPLATE): + selector[CONF_VALUE_TEMPLATE] = _restore_template(selector[CONF_VALUE_TEMPLATE]) + + headers = config.get(CONF_HEADERS) or {} + for key, value in headers.items(): + headers[key] = _restore_template(value) + return config +def _restore_template(value: str | Template): + value = value.template if isinstance(value, Template) else value + return cv.template(_replace_template_characters(value)) + def _replace_template_characters(template: str): template = template.replace("{!{", "{{").replace("}!}", "}}") From bf51d630d5a8ca8e2f3f6727fd2a56775a066371 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Milan=20Jeremi=C4=87?= Date: Wed, 10 Jul 2024 12:10:52 +0000 Subject: [PATCH 31/32] removing debug log --- custom_components/multiscrape/service.py | 1 - 1 file changed, 1 deletion(-) diff --git a/custom_components/multiscrape/service.py b/custom_components/multiscrape/service.py index dd421e9..5d47264 100644 --- a/custom_components/multiscrape/service.py +++ b/custom_components/multiscrape/service.py @@ -161,7 +161,6 @@ def _restore_templates(config): selectors.extend(form_variables) for selector in selectors: - _LOGGER.info("DEBUG selector: %s", selector) for attr_conf in selector.get(CONF_SENSOR_ATTRS) or []: attr_conf[CONF_VALUE_TEMPLATE] = ( _restore_template(attr_conf.get(CONF_VALUE_TEMPLATE)) From 142f6440eaa0e4da924793b6c69534a8e7f80c5f Mon Sep 17 00:00:00 2001 From: Milan Jeremic Date: Thu, 11 Jul 2024 21:13:24 +0000 Subject: [PATCH 32/32] Removing unneeded local variable. --- custom_components/multiscrape/service.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/custom_components/multiscrape/service.py b/custom_components/multiscrape/service.py index 5d47264..ffda235 100644 --- a/custom_components/multiscrape/service.py +++ b/custom_components/multiscrape/service.py @@ -157,8 +157,7 @@ def _restore_templates(config): for platform in [Platform.SENSOR, Platform.BINARY_SENSOR]: selectors.extend(config.get(platform) or []) if config.get(CONF_FORM_SUBMIT): - form_variables = config[CONF_FORM_SUBMIT].get(CONF_FORM_VARIABLES) or [] - selectors.extend(form_variables) + selectors.extend(config[CONF_FORM_SUBMIT].get(CONF_FORM_VARIABLES) or []) for selector in selectors: for attr_conf in selector.get(CONF_SENSOR_ATTRS) or []: