diff --git a/README.md b/README.md index 6ed0463..602aa73 100644 --- a/README.md +++ b/README.md @@ -88,7 +88,7 @@ Based on latest (pre) release. | authentication | Configure HTTP authentication. `basic` or `digest`. Use this with username and password fields. | False | | string | | username | The username for accessing the url. | False | | string | | password | The password for accessing the url. | False | | string | -| headers | The headers for the requests. | False | | template - list | +| headers | The headers for the requests. | False | | template - list | | params | The query params for the requests. | False | | template - list | | method | The method for the request. Either `POST` or `GET`. | False | GET | string | | payload | Optional payload to send with a POST request. | False | | string | @@ -111,10 +111,7 @@ Configure the sensors that will scrape the data. | ------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------- | ------- | --------------- | | unique_id | Will be used as entity_id and enables editing the entity in the UI | False | | string | | name | Friendly name for the sensor | False | | string | -| select | CSS selector used for retrieving the value of the sensor. Only required when `select_list` is not provided. | True | | string/template | -| select_list | CSS selector for multiple values of multiple elements which will be returned as csv. Only required when `select` is not provided. | True | | string/template | -| attribute | Attribute from the selected element to read as value | False | | string | -| value_template | Defines a template applied on the result of the selector to extract the value. For binary sensors, the sensor is on if the template evaluates as True | False | | string/template | +| | See [Selector](#Selector) fields | True | | | | attributes | See [Sensor attributes](#sensor-attributes) | False | | list | | unit_of_measurement | Defines the units of measurement of the sensor | False | | string | | device_class | Sets the device_class for [sensors](https://www.home-assistant.io/integrations/sensor/) or [binary sensors](https://www.home-assistant.io/integrations/binary_sensor/) | False | | string | @@ -122,7 +119,6 @@ Configure the sensors that will scrape the data. | icon | Defines the icon or a template for the icon of the sensor. The value of the selector (or value_template when given) is provided as input for the template. For binary sensors, the value is parsed in a boolean. | False | | string/template | | picture | Contains a path to a local image and will set it as entity picture | False | | string | | force_update | Sends update events even if the value hasn’t changed. Useful if you want to have meaningful value graphs in history. | False | False | boolean | -| on_error | See [On-error](#on-error) | False | | | ### Refresh button @@ -137,18 +133,14 @@ Configure a refresh button to manually trigger scraping. Configure the attributes on the sensor that can be set with additional scraping values. -| name | description | required | default | type | -| -------------- | --------------------------------------------------------------------------------------------------------------------------------- | -------- | ------- | --------------- | -| name | Name of the attribute (will be slugified) | True | | string | -| select | CSS selector used for retrieving the value of the attribute. Only required when `select_list` is not provided. | True | | string/template | -| select_list | CSS selector for multiple values of multiple elements which will be returned as csv. Only required when `select` is not provided. | True | | string/template | -| attribute | Attribute from the selected element to read as value | False | | string | -| value_template | Defines a template applied on the result of the selector to extract the value | False | | string/template | -| on_error | See [On-error](#on-error) | False | | | +| name | description | required | default | type | +| ---- | ----------------------------------------- | -------- | ------- | ------ | +| name | Name of the attribute (will be slugified) | True | | string | +| | See [Selector](#Selector) fields | True | | | ### Form-submit -Configure the form-submit functionality which enables you to submit a (login) form before scraping a site. More details on how this works [can be found on the wiki.](https://github.com/danieldotnl/ha-multiscrape/wiki/Form-submit-functionality) +Configure the form-submit functionality which enables you to submit a (login) form before scraping a site. More details on how this works [can be found on the wiki](https://github.com/danieldotnl/ha-multiscrape/wiki/Form-submit-functionality). | name | description | required | default | type | | ----------------- | --------------------------------------------------------------------------------------------------------- | -------- | ------- | ------------------- | @@ -158,6 +150,47 @@ Configure the form-submit functionality which enables you to submit a (login) fo | input_filter | A list of input fields that should not be submitted with the form | False | | string - list | | submit_once | Submit the form only once on startup instead of each scan interval | False | False | boolean | | resubmit_on_error | Resubmit the form after a scraping error is encountered | False | True | boolean | +| variables | See [Form Variables](#Form-Variables) | False | | list | + +### Form Variables + +Configure the variables that will be scraped from the [`form_submit`](#form-submit) response. These variables can be used in the `value_template` of the main configuration of the current integration: a `selector` in sensors/attributes or in a `header`. A common use case is to populate the `X-Login-Token` header which is the result of the login. + +| name | description | required | default | type | +| ---- | -------------------------------- | -------- | ------- | ------ | +| name | Name of the variable | True | | string | +| | See [Selector](#Selector) fields | True | | | + +Example: + +```yaml +multiscrape: + - resource: "https://somesiteyouwanttoscrape.com" + form_submit: + submit_once: True + resource: "https://authforsomesiteyouwanttoscrape.com" + input: + email: "" + password: "" + variables: + - name: token + value_template: "{{ ... }}" + headers: + X-Login-Token: "{{ token }}" + sensor: ... +``` + +### Selector + +Used to configure scraping options. + +| name | description | required | default | type | +| -------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------- | -------- | ------- | --------------- | +| select | CSS selector used for retrieving the value of the attribute. Only required when `select_list` or `value_template` is not provided. | False | | string/template | +| select_list | CSS selector for multiple values of multiple elements which will be returned as csv. Only required when `select` or `value_template` is not provided. | False | | string/template | +| attribute | Attribute from the selected element to read as value. | False | | string | +| value_template | Defines a template applied to extract the value from the result of the selector (if provided) or raw page (if selector not provided) | False | | string/template | +| on_error | See [On-error](#on-error) | False | | | ### On-error @@ -178,7 +211,7 @@ Multiscrape also offers a `get_content` and a `scrape` service. `get_content` re `scrape` does what it says. It scrapes a website and provides the sensors and attributes. Both services accept the same configuration as what you would provide in your configuration yaml (what is described above), with a small but important caveat: if the service input contains templates, those are automatically parsed by home assistant when the service is being called. That is fine for templates like `resource` and `select`, but templates that need to be applied on the scraped data itself (like `value_template`), cannot be parsed when the service is called. Therefore you need to slightly alter the syntax and add a `!` in the middle. E.g. `{{` becomes `{!{` and `%}` becomes `%!}`. Multiscrape will then understand that this string needs to handled as a template after the service has been called.\ -*If someone has a better solution, please let me know!* +_If someone has a better solution, please let me know!_ To call one of those services, go to 'Developer tools' in Home Assistant and then to 'services'. Find the `multiscrape.get_content` or `multiscrape.scrape` services and go to yaml mode. There you enter your configuration. Example: diff --git a/custom_components/multiscrape/__init__.py b/custom_components/multiscrape/__init__.py index 4e6292a..52da026 100644 --- a/custom_components/multiscrape/__init__.py +++ b/custom_components/multiscrape/__init__.py @@ -2,42 +2,29 @@ import asyncio import contextlib import logging -import os import voluptuous as vol from homeassistant.config_entries import ConfigEntry -from homeassistant.const import CONF_NAME - -from homeassistant.const import Platform -from homeassistant.const import SERVICE_RELOAD, CONF_RESOURCE, CONF_RESOURCE_TEMPLATE +from homeassistant.const import (CONF_NAME, CONF_RESOURCE, + CONF_RESOURCE_TEMPLATE, SERVICE_RELOAD, + Platform) from homeassistant.core import HomeAssistant - from homeassistant.exceptions import HomeAssistantError from homeassistant.helpers import discovery -from homeassistant.helpers.reload import async_integration_yaml_config -from homeassistant.helpers.reload import async_reload_integration_platforms -from homeassistant.util import slugify - -from .service import setup_config_services, setup_integration_services - -from .const import CONF_FORM_SUBMIT -from .const import CONF_LOG_RESPONSE -from .const import CONF_PARSER -from .const import COORDINATOR -from .const import DOMAIN -from .const import PLATFORM_IDX -from .const import SCRAPER -from .const import SCRAPER_DATA -from .const import SCRAPER_IDX -from .coordinator import ( - create_multiscrape_coordinator, -) -from .coordinator import create_content_request_manager -from .file import LoggingFileManager +from homeassistant.helpers.reload import (async_integration_yaml_config, + async_reload_integration_platforms) + +from .const import (CONF_FORM_SUBMIT, CONF_LOG_RESPONSE, CONF_PARSER, + COORDINATOR, DOMAIN, PLATFORM_IDX, SCRAPER, SCRAPER_DATA, + SCRAPER_IDX) +from .coordinator import (create_content_request_manager, + create_multiscrape_coordinator) +from .file import create_file_manager from .form import create_form_submitter from .http import create_http_wrapper from .schema import COMBINED_SCHEMA, CONFIG_SCHEMA # noqa: F401 from .scraper import create_scraper +from .service import setup_config_services, setup_integration_services _LOGGER = logging.getLogger(__name__) PLATFORMS = [Platform.SENSOR, Platform.BINARY_SENSOR, Platform.BUTTON] @@ -103,36 +90,24 @@ async def _async_process_config(hass: HomeAssistant, config) -> bool: "%s # Setting up multiscrape with config:\n %s", config_name, conf ) - file_manager = None - log_response = conf.get(CONF_LOG_RESPONSE) - if log_response: - folder = os.path.join( - hass.config.config_dir, f"multiscrape/{slugify(config_name)}/" - ) - _LOGGER.debug( - "%s # Log responses enabled, creating logging folder: %s", - config_name, - folder, - ) - file_manager = LoggingFileManager(folder) - await hass.async_add_executor_job(file_manager.create_folders) - - http = create_http_wrapper(config_name, conf, hass, file_manager) - + file_manager = await create_file_manager(hass, config_name, conf.get(CONF_LOG_RESPONSE)) form_submit_config = conf.get(CONF_FORM_SUBMIT) form_submitter = None if form_submit_config: - form_http = create_http_wrapper(config_name, form_submit_config, hass, file_manager) parser = conf.get(CONF_PARSER) + form_http = create_http_wrapper(config_name, form_submit_config, hass, file_manager) form_submitter = create_form_submitter( - config_name, form_submit_config, hass, form_http, file_manager, parser + config_name, + form_submit_config, + hass, + form_http, + file_manager, + parser, ) + http = create_http_wrapper(config_name, conf, hass, file_manager) scraper = create_scraper(config_name, conf, hass, file_manager) - - request_manager = create_content_request_manager( - config_name, conf, hass, http, form_submitter - ) + request_manager = create_content_request_manager(config_name, conf, hass, http, form_submitter) coordinator = create_multiscrape_coordinator( config_name, conf, diff --git a/custom_components/multiscrape/binary_sensor.py b/custom_components/multiscrape/binary_sensor.py index 4e23016..1e414a3 100644 --- a/custom_components/multiscrape/binary_sensor.py +++ b/custom_components/multiscrape/binary_sensor.py @@ -4,27 +4,20 @@ import logging from homeassistant.components.binary_sensor import BinarySensorEntity -from homeassistant.const import CONF_DEVICE_CLASS -from homeassistant.const import CONF_FORCE_UPDATE -from homeassistant.const import CONF_ICON -from homeassistant.const import CONF_NAME -from homeassistant.const import CONF_UNIQUE_ID -from homeassistant.const import Platform +from homeassistant.const import (CONF_DEVICE_CLASS, CONF_FORCE_UPDATE, + CONF_ICON, CONF_NAME, CONF_UNIQUE_ID, + Platform) from homeassistant.core import HomeAssistant from homeassistant.exceptions import PlatformNotReady from homeassistant.helpers.entity import async_generate_entity_id from homeassistant.helpers.entity_platform import AddEntitiesCallback -from homeassistant.helpers.typing import ConfigType -from homeassistant.helpers.typing import DiscoveryInfoType +from homeassistant.helpers.typing import ConfigType, DiscoveryInfoType from homeassistant.util import slugify from . import async_get_config_and_coordinator -from .const import CONF_ON_ERROR_VALUE_DEFAULT -from .const import CONF_ON_ERROR_VALUE_LAST -from .const import CONF_ON_ERROR_VALUE_NONE -from .const import CONF_PICTURE -from .const import CONF_SENSOR_ATTRS -from .const import LOG_LEVELS +from .const import (CONF_ON_ERROR_VALUE_DEFAULT, CONF_ON_ERROR_VALUE_LAST, + CONF_ON_ERROR_VALUE_NONE, CONF_PICTURE, CONF_SENSOR_ATTRS, + LOG_LEVELS) from .entity import MultiscrapeEntity from .selector import Selector @@ -52,7 +45,8 @@ async def async_setup_platform( raise PlatformNotReady sensor_name = conf.get(CONF_NAME) - _LOGGER.debug("%s # %s # Setting up binary sensor", scraper.name, sensor_name) + _LOGGER.debug("%s # %s # Setting up binary sensor", + scraper.name, sensor_name) unique_id = conf.get(CONF_UNIQUE_ID) device_class = conf.get(CONF_DEVICE_CLASS) force_update = conf.get(CONF_FORCE_UPDATE) @@ -129,9 +123,11 @@ def _update_sensor(self): try: if self.coordinator.update_error is True: - raise ValueError("Skipped scraping because data couldn't be updated") + raise ValueError( + "Skipped scraping because data couldn't be updated") - value = self.scraper.scrape(self._sensor_selector, self._name) + value = self.scraper.scrape( + self._sensor_selector, self._name, variables=self.coordinator.form_variables) try: self._attr_is_on = bool(int(value)) except ValueError: diff --git a/custom_components/multiscrape/const.py b/custom_components/multiscrape/const.py index 87502d2..ef87737 100644 --- a/custom_components/multiscrape/const.py +++ b/custom_components/multiscrape/const.py @@ -31,6 +31,7 @@ CONF_FORM_INPUT_FILTER = "input_filter" CONF_FORM_SUBMIT_ONCE = "submit_once" CONF_FORM_RESUBMIT_ERROR = "resubmit_on_error" +CONF_FORM_VARIABLES = "variables" CONF_LOG_RESPONSE = "log_response" DEFAULT_PARSER = "lxml" diff --git a/custom_components/multiscrape/coordinator.py b/custom_components/multiscrape/coordinator.py index 4f3008a..4e4138a 100644 --- a/custom_components/multiscrape/coordinator.py +++ b/custom_components/multiscrape/coordinator.py @@ -1,26 +1,22 @@ """Coordinator class for multiscrape integration.""" import logging -from datetime import timedelta from collections.abc import Callable +from datetime import timedelta +from homeassistant.const import (CONF_RESOURCE, CONF_RESOURCE_TEMPLATE, + CONF_SCAN_INTERVAL) from homeassistant.core import HomeAssistant -from homeassistant.const import ( - CONF_RESOURCE, - CONF_RESOURCE_TEMPLATE, - CONF_SCAN_INTERVAL, -) -from homeassistant.helpers.update_coordinator import DataUpdateCoordinator -from homeassistant.helpers.update_coordinator import event +from homeassistant.helpers.update_coordinator import (DataUpdateCoordinator, + event) from homeassistant.util.dt import utcnow -from .scraper import Scraper -from .http import HttpWrapper +from .const import DOMAIN from .file import LoggingFileManager from .form import FormSubmitter +from .http import HttpWrapper +from .scraper import Scraper from .util import create_renderer -from .const import DOMAIN - _LOGGER = logging.getLogger(__name__) # we don't want to go with the default 15 seconds defined in helpers/entity_component DEFAULT_SCAN_INTERVAL = timedelta(seconds=60) @@ -56,6 +52,7 @@ def __init__( self._http = http self._form_submitter = form self._resource_renderer = resource_renderer + self._form_variables = {} def notify_scrape_exception(self): """Notify the form_submitter of an exception so it will re-submit next trigger.""" @@ -69,6 +66,7 @@ async def get_content(self) -> str: if self._form_submitter: try: result = await self._form_submitter.async_submit(resource) + self._form_variables = self._form_submitter.scrape_variables() if result: _LOGGER.debug( @@ -83,9 +81,14 @@ async def get_content(self) -> str: ex, ) - response = await self._http.async_request("page", resource) + response = await self._http.async_request("page", resource, variables=self._form_variables) return response.text + @property + def form_variables(self): + """Return the form variables.""" + return self._form_variables + def create_multiscrape_coordinator( config_name, conf, hass, request_manager, file_manager, scraper @@ -203,3 +206,8 @@ async def _prepare_new_run(self): ) self._scraper.reset() + + @property + def form_variables(self): + """Return the form variables.""" + return self._request_manager.form_variables diff --git a/custom_components/multiscrape/entity.py b/custom_components/multiscrape/entity.py index e7624ba..0b5f4d6 100644 --- a/custom_components/multiscrape/entity.py +++ b/custom_components/multiscrape/entity.py @@ -8,10 +8,8 @@ from homeassistant.helpers.entity import Entity from homeassistant.helpers.update_coordinator import DataUpdateCoordinator -from .const import CONF_ON_ERROR_VALUE_DEFAULT -from .const import CONF_ON_ERROR_VALUE_LAST -from .const import CONF_ON_ERROR_VALUE_NONE -from .const import LOG_LEVELS +from .const import (CONF_ON_ERROR_VALUE_DEFAULT, CONF_ON_ERROR_VALUE_LAST, + CONF_ON_ERROR_VALUE_NONE, LOG_LEVELS) from .scraper import Scraper _LOGGER = logging.getLogger(__name__) @@ -92,7 +90,8 @@ async def async_added_to_hass(self) -> None: ) if self.coordinator: self.async_on_remove( - self.coordinator.async_add_listener(self._handle_coordinator_update) + self.coordinator.async_add_listener( + self._handle_coordinator_update) ) @callback @@ -133,7 +132,8 @@ def _update_attributes(self): ) for name, attr_selector in self._attribute_selectors.items(): try: - attr_value = self.scraper.scrape(attr_selector, self._name, name) + attr_value = self.scraper.scrape( + attr_selector, self._name, name, variables=self.coordinator.form_variables) self._attr_extra_state_attributes[name] = attr_value except Exception as exception: _LOGGER.debug( diff --git a/custom_components/multiscrape/file.py b/custom_components/multiscrape/file.py index 95200ad..bf09037 100644 --- a/custom_components/multiscrape/file.py +++ b/custom_components/multiscrape/file.py @@ -1,6 +1,27 @@ """LoggingFileManager for file utilities.""" +import logging import os +from homeassistant.core import HomeAssistant +from homeassistant.util import slugify + +_LOGGER = logging.getLogger(__name__) + +async def create_file_manager(hass: HomeAssistant, config_name: str, log_response: bool): + """Create a file manager instance.""" + file_manager = None + if log_response: + folder = os.path.join( + hass.config.config_dir, f"multiscrape/{slugify(config_name)}/" + ) + _LOGGER.debug( + "%s # Log responses enabled, creating logging folder: %s", + config_name, + folder, + ) + file_manager = LoggingFileManager(folder) + hass.async_add_executor_job(file_manager.create_folders) + return file_manager class LoggingFileManager: """LoggingFileManager for handling logging files.""" diff --git a/custom_components/multiscrape/form.py b/custom_components/multiscrape/form.py index 54e9a59..a53481b 100644 --- a/custom_components/multiscrape/form.py +++ b/custom_components/multiscrape/form.py @@ -3,20 +3,17 @@ from urllib.parse import urljoin from bs4 import BeautifulSoup - +from homeassistant.const import CONF_NAME, CONF_RESOURCE from homeassistant.core import HomeAssistant -from homeassistant.const import CONF_RESOURCE - -from .const import ( - CONF_FORM_SELECT, - CONF_FORM_INPUT, - CONF_FORM_INPUT_FILTER, - CONF_FORM_SUBMIT_ONCE, - CONF_FORM_RESUBMIT_ERROR, -) + +from custom_components.multiscrape.scraper import create_scraper + +from .const import (CONF_FORM_INPUT, CONF_FORM_INPUT_FILTER, + CONF_FORM_RESUBMIT_ERROR, CONF_FORM_SELECT, + CONF_FORM_SUBMIT_ONCE, CONF_FORM_VARIABLES) from .file import LoggingFileManager from .http import HttpWrapper - +from .selector import Selector _LOGGER = logging.getLogger(__name__) @@ -30,6 +27,14 @@ def create_form_submitter(config_name, config, hass, http, file_manager, parser) resubmit_error = config.get(CONF_FORM_RESUBMIT_ERROR) submit_once = config.get(CONF_FORM_SUBMIT_ONCE) + scraper = None + variables_selectors = {} + variables = config.get(CONF_FORM_VARIABLES) + if (variables != []): + scraper = create_scraper(config_name, config, hass, file_manager) + for variables_conf in variables: + variables_selectors[variables_conf.get(CONF_NAME)] = Selector(hass, variables_conf) + return FormSubmitter( config_name, hass, @@ -41,6 +46,8 @@ def create_form_submitter(config_name, config, hass, http, file_manager, parser) input_filter, submit_once, resubmit_error, + variables_selectors, + scraper, parser, ) @@ -60,6 +67,8 @@ def __init__( input_filter, submit_once, resubmit_error, + variables_selectors, + scraper, parser, ): """Initialize FormSubmitter class.""" @@ -74,6 +83,8 @@ def __init__( self._input_filter = input_filter self._submit_once = submit_once self._resubmit_error = resubmit_error + self._variables_selectors = variables_selectors + self._scraper = scraper self._parser = parser self._should_submit = True @@ -89,7 +100,7 @@ def notify_scrape_exception(self): async def async_submit(self, main_resource): """Submit the form.""" if not self._should_submit: - _LOGGER.debug("%s # Skip submitting form") + _LOGGER.debug("%s # Skip submitting form", self._config_name) return _LOGGER.debug("%s # Starting with form-submit", self._config_name) @@ -150,11 +161,21 @@ async def async_submit(self, main_resource): if self._submit_once: self._should_submit = False + if self._scraper: + await self._scraper.set_content(response.text) + if not self._form_resource: return response.text else: return None + def scrape_variables(self): + """Scrape header mappings.""" + result = {} + for variable_key in self._variables_selectors: + result[variable_key] = self._scraper.scrape(self._variables_selectors[variable_key], variable_key) + return result + def _determine_submit_resource(self, action, main_resource): resource = main_resource if action and self._form_resource: diff --git a/custom_components/multiscrape/http.py b/custom_components/multiscrape/http.py index efe3005..6ed772c 100644 --- a/custom_components/multiscrape/http.py +++ b/custom_components/multiscrape/http.py @@ -1,21 +1,14 @@ """HTTP request related functionality.""" import logging from collections.abc import Callable -import httpx +import httpx +from homeassistant.const import (CONF_AUTHENTICATION, CONF_HEADERS, + CONF_METHOD, CONF_PARAMS, CONF_PASSWORD, + CONF_PAYLOAD, CONF_TIMEOUT, CONF_USERNAME, + CONF_VERIFY_SSL, HTTP_DIGEST_AUTHENTICATION) from homeassistant.helpers.httpx_client import get_async_client -from homeassistant.const import ( - HTTP_DIGEST_AUTHENTICATION, - CONF_VERIFY_SSL, - CONF_USERNAME, - CONF_PASSWORD, - CONF_AUTHENTICATION, - CONF_TIMEOUT, - CONF_HEADERS, - CONF_PARAMS, - CONF_PAYLOAD, - CONF_METHOD, -) + from .util import create_dict_renderer, create_renderer _LOGGER = logging.getLogger(__name__) @@ -84,14 +77,15 @@ def set_authentication(self, username, password, auth_type): self._auth = httpx.DigestAuth(username, password) else: self._auth = (username, password) - _LOGGER.debug("%s # Authentication configuration processed", self._config_name) + _LOGGER.debug( + "%s # Authentication configuration processed", self._config_name) - async def async_request(self, context, resource, method=None, request_data=None): + async def async_request(self, context, resource, method=None, request_data=None, variables: dict = {}): """Execute a HTTP request.""" - data = request_data or self._data_renderer() + data = request_data or self._data_renderer(variables) method = method or self._method or "GET" - headers = self._headers_renderer(None) - params = self._params_renderer(None) + headers = self._headers_renderer(variables) + params = self._params_renderer(variables) _LOGGER.debug( "%s # Executing %s-request with a %s to url: %s with headers: %s.", diff --git a/custom_components/multiscrape/schema.py b/custom_components/multiscrape/schema.py index fe8f01e..0263d97 100644 --- a/custom_components/multiscrape/schema.py +++ b/custom_components/multiscrape/schema.py @@ -1,75 +1,42 @@ """The multiscrape component schemas.""" +import logging + import homeassistant.helpers.config_validation as cv import voluptuous as vol -import logging -from homeassistant.components.binary_sensor import ( - DEVICE_CLASSES_SCHEMA as BINARY_SENSOR_DEVICE_CLASSES_SCHEMA, -) -from homeassistant.components.binary_sensor import DOMAIN as BINARY_SENSOR_DOMAIN +from homeassistant.components.binary_sensor import \ + DEVICE_CLASSES_SCHEMA as BINARY_SENSOR_DEVICE_CLASSES_SCHEMA +from homeassistant.components.binary_sensor import \ + DOMAIN as BINARY_SENSOR_DOMAIN from homeassistant.components.button import DOMAIN as BUTTON_DOMAIN -from homeassistant.components.sensor import ( - DEVICE_CLASSES_SCHEMA as SENSOR_DEVICE_CLASSES_SCHEMA, -) +from homeassistant.components.sensor import \ + DEVICE_CLASSES_SCHEMA as SENSOR_DEVICE_CLASSES_SCHEMA from homeassistant.components.sensor import DOMAIN as SENSOR_DOMAIN -from homeassistant.components.sensor import ( - STATE_CLASSES_SCHEMA as SENSOR_STATE_CLASSES_SCHEMA, -) -from homeassistant.const import CONF_AUTHENTICATION -from homeassistant.const import CONF_DEVICE_CLASS -from homeassistant.const import CONF_FORCE_UPDATE -from homeassistant.const import CONF_HEADERS -from homeassistant.const import CONF_ICON -from homeassistant.const import CONF_METHOD -from homeassistant.const import CONF_NAME -from homeassistant.const import CONF_PARAMS -from homeassistant.const import CONF_PASSWORD -from homeassistant.const import CONF_PAYLOAD -from homeassistant.const import CONF_RESOURCE -from homeassistant.const import CONF_RESOURCE_TEMPLATE -from homeassistant.const import CONF_SCAN_INTERVAL -from homeassistant.const import CONF_TIMEOUT -from homeassistant.const import CONF_UNIQUE_ID -from homeassistant.const import CONF_UNIT_OF_MEASUREMENT -from homeassistant.const import CONF_USERNAME -from homeassistant.const import CONF_VALUE_TEMPLATE -from homeassistant.const import CONF_VERIFY_SSL -from homeassistant.const import HTTP_BASIC_AUTHENTICATION -from homeassistant.const import HTTP_DIGEST_AUTHENTICATION - -from .const import CONF_ATTR -from .const import CONF_FORM_INPUT -from .const import CONF_FORM_INPUT_FILTER -from .const import CONF_FORM_RESUBMIT_ERROR -from .const import CONF_FORM_SELECT -from .const import CONF_FORM_SUBMIT -from .const import CONF_FORM_SUBMIT_ONCE -from .const import CONF_LOG_RESPONSE -from .const import CONF_ON_ERROR -from .const import CONF_ON_ERROR_DEFAULT -from .const import CONF_ON_ERROR_LOG -from .const import CONF_ON_ERROR_VALUE -from .const import CONF_ON_ERROR_VALUE_DEFAULT -from .const import CONF_ON_ERROR_VALUE_LAST -from .const import CONF_ON_ERROR_VALUE_NONE -from .const import CONF_PARSER -from .const import CONF_PICTURE -from .const import CONF_SELECT -from .const import CONF_SELECT_LIST -from .const import CONF_SENSOR_ATTRS -from .const import CONF_SEPARATOR -from .const import CONF_STATE_CLASS -from .const import DEFAULT_BINARY_SENSOR_NAME -from .const import DEFAULT_BUTTON_NAME -from .const import DEFAULT_FORCE_UPDATE -from .const import DEFAULT_METHOD -from .const import DEFAULT_PARSER -from .const import DEFAULT_SENSOR_NAME -from .const import DEFAULT_SEPARATOR -from .const import DEFAULT_VERIFY_SSL -from .const import DOMAIN -from .const import LOG_ERROR -from .const import LOG_LEVELS -from .const import METHODS +from homeassistant.components.sensor import \ + STATE_CLASSES_SCHEMA as SENSOR_STATE_CLASSES_SCHEMA +from homeassistant.const import (CONF_AUTHENTICATION, CONF_DEVICE_CLASS, + CONF_FORCE_UPDATE, CONF_HEADERS, CONF_ICON, + CONF_METHOD, CONF_NAME, CONF_PARAMS, + CONF_PASSWORD, CONF_PAYLOAD, CONF_RESOURCE, + CONF_RESOURCE_TEMPLATE, CONF_SCAN_INTERVAL, + CONF_TIMEOUT, CONF_UNIQUE_ID, + CONF_UNIT_OF_MEASUREMENT, CONF_USERNAME, + CONF_VALUE_TEMPLATE, CONF_VERIFY_SSL, + HTTP_BASIC_AUTHENTICATION, + HTTP_DIGEST_AUTHENTICATION) + +from .const import (CONF_ATTR, CONF_FORM_INPUT, CONF_FORM_INPUT_FILTER, + CONF_FORM_RESUBMIT_ERROR, CONF_FORM_SELECT, + CONF_FORM_SUBMIT, CONF_FORM_SUBMIT_ONCE, + CONF_FORM_VARIABLES, CONF_LOG_RESPONSE, CONF_ON_ERROR, + CONF_ON_ERROR_DEFAULT, CONF_ON_ERROR_LOG, + CONF_ON_ERROR_VALUE, CONF_ON_ERROR_VALUE_DEFAULT, + CONF_ON_ERROR_VALUE_LAST, CONF_ON_ERROR_VALUE_NONE, + CONF_PARSER, CONF_PICTURE, CONF_SELECT, CONF_SELECT_LIST, + CONF_SENSOR_ATTRS, CONF_SEPARATOR, CONF_STATE_CLASS, + DEFAULT_BINARY_SENSOR_NAME, DEFAULT_BUTTON_NAME, + DEFAULT_FORCE_UPDATE, DEFAULT_METHOD, DEFAULT_PARSER, + DEFAULT_SENSOR_NAME, DEFAULT_SEPARATOR, DEFAULT_VERIFY_SSL, + DOMAIN, LOG_ERROR, LOG_LEVELS, METHODS) from .scraper import DEFAULT_TIMEOUT _LOGGER = logging.getLogger(__name__) @@ -90,15 +57,6 @@ vol.Optional(CONF_TIMEOUT, default=DEFAULT_TIMEOUT): cv.positive_int, } -FORM_SUBMIT_SCHEMA = { - **HTTP_SCHEMA, - vol.Optional(CONF_FORM_SELECT): cv.string, - vol.Optional(CONF_FORM_INPUT): vol.Schema({cv.string: cv.string}), - vol.Optional(CONF_FORM_INPUT_FILTER, default=[]): cv.ensure_list, - vol.Optional(CONF_FORM_SUBMIT_ONCE, default=False): cv.boolean, - vol.Optional(CONF_FORM_RESUBMIT_ERROR, default=True): cv.boolean, -} - INTEGRATION_SCHEMA = { **HTTP_SCHEMA, vol.Optional(CONF_PARSER, default=DEFAULT_PARSER): cv.string, @@ -128,6 +86,20 @@ vol.Optional(CONF_ON_ERROR): vol.Schema(ON_ERROR_SCHEMA), } +FORM_HEADERS_MAPPING_SCHEMA = {vol.Required(CONF_NAME): cv.string, **SELECTOR_SCHEMA} + +FORM_SUBMIT_SCHEMA = { + **HTTP_SCHEMA, + vol.Optional(CONF_FORM_SELECT): cv.string, + vol.Optional(CONF_FORM_INPUT): vol.Schema({cv.string: cv.string}), + vol.Optional(CONF_FORM_INPUT_FILTER, default=[]): cv.ensure_list, + vol.Optional(CONF_FORM_SUBMIT_ONCE, default=False): cv.boolean, + vol.Optional(CONF_FORM_RESUBMIT_ERROR, default=True): cv.boolean, + vol.Optional(CONF_FORM_VARIABLES, default=[]): vol.All( + cv.ensure_list, [vol.Schema(FORM_HEADERS_MAPPING_SCHEMA)] + ), +} + SENSOR_ATTRIBUTE_SCHEMA = {vol.Required(CONF_NAME): cv.string, **SELECTOR_SCHEMA} SENSOR_SCHEMA = { diff --git a/custom_components/multiscrape/scraper.py b/custom_components/multiscrape/scraper.py index 1517c87..2f37aef 100644 --- a/custom_components/multiscrape/scraper.py +++ b/custom_components/multiscrape/scraper.py @@ -95,7 +95,7 @@ async def set_content(self, content): ) raise - def scrape(self, selector, sensor, attribute=None): + def scrape(self, selector, sensor, attribute=None, variables: dict = {}): """Scrape based on given selector the data.""" # This is required as this function is called separately for sensors and attributes log_prefix = f"{self._config_name} # {sensor}" @@ -105,7 +105,7 @@ def scrape(self, selector, sensor, attribute=None): if selector.just_value: _LOGGER.debug("%s # Applying value_template only.", log_prefix) result = selector.value_template.async_render_with_possible_json_value( - self._data, None + self._data, None, variables=variables ) return selector.value_template._parse_result(result) @@ -116,7 +116,8 @@ def scrape(self, selector, sensor, attribute=None): if selector.is_list: tags = self._soup.select(selector.list) - _LOGGER.debug("%s # List selector selected tags: %s", log_prefix, tags) + _LOGGER.debug("%s # List selector selected tags: %s", + log_prefix, tags) if selector.attribute is not None: _LOGGER.debug( "%s # Try to find attributes: %s", @@ -148,13 +149,15 @@ def scrape(self, selector, sensor, attribute=None): _LOGGER.debug("%s # Selector result: %s", log_prefix, value) if value is not None and selector.value_template is not None: - _LOGGER.debug("%s # Applying value_template on selector result", log_prefix) - value = selector.value_template.async_render( - variables={"value": value}, parse_result=True + _LOGGER.debug( + "%s # Applying value_template on selector result", log_prefix) + variables["value"] = value + value = selector.value_template.async_render(variables=variables, parse_result=True ) _LOGGER.debug( - "%s # Final selector value: %s of type %s", log_prefix, value, type(value) + "%s # Final selector value: %s of type %s", log_prefix, value, type( + value) ) return value diff --git a/custom_components/multiscrape/sensor.py b/custom_components/multiscrape/sensor.py index dfe5ac1..3a58352 100644 --- a/custom_components/multiscrape/sensor.py +++ b/custom_components/multiscrape/sensor.py @@ -3,32 +3,22 @@ import logging -from homeassistant.components.sensor import SensorDeviceClass -from homeassistant.components.sensor import SensorEntity +from homeassistant.components.sensor import SensorDeviceClass, SensorEntity from homeassistant.components.sensor.helpers import async_parse_date_datetime -from homeassistant.const import CONF_DEVICE_CLASS -from homeassistant.const import CONF_FORCE_UPDATE -from homeassistant.const import CONF_ICON -from homeassistant.const import CONF_NAME -from homeassistant.const import CONF_UNIQUE_ID -from homeassistant.const import CONF_UNIT_OF_MEASUREMENT -from homeassistant.const import Platform +from homeassistant.const import (CONF_DEVICE_CLASS, CONF_FORCE_UPDATE, + CONF_ICON, CONF_NAME, CONF_UNIQUE_ID, + CONF_UNIT_OF_MEASUREMENT, Platform) from homeassistant.core import HomeAssistant from homeassistant.exceptions import PlatformNotReady from homeassistant.helpers.entity import async_generate_entity_id from homeassistant.helpers.entity_platform import AddEntitiesCallback -from homeassistant.helpers.typing import ConfigType -from homeassistant.helpers.typing import DiscoveryInfoType +from homeassistant.helpers.typing import ConfigType, DiscoveryInfoType from homeassistant.util import slugify from . import async_get_config_and_coordinator -from .const import CONF_ON_ERROR_VALUE_DEFAULT -from .const import CONF_ON_ERROR_VALUE_LAST -from .const import CONF_ON_ERROR_VALUE_NONE -from .const import CONF_PICTURE -from .const import CONF_SENSOR_ATTRS -from .const import CONF_STATE_CLASS -from .const import LOG_LEVELS +from .const import (CONF_ON_ERROR_VALUE_DEFAULT, CONF_ON_ERROR_VALUE_LAST, + CONF_ON_ERROR_VALUE_NONE, CONF_PICTURE, CONF_SENSOR_ATTRS, + CONF_STATE_CLASS, LOG_LEVELS) from .entity import MultiscrapeEntity from .selector import Selector @@ -142,9 +132,11 @@ def _update_sensor(self): try: if self.coordinator.update_error is True: - raise ValueError("Skipped scraping because data couldn't be updated") + raise ValueError( + "Skipped scraping because data couldn't be updated") - value = self.scraper.scrape(self._sensor_selector, self._name) + value = self.scraper.scrape( + self._sensor_selector, self._name, variables=self.coordinator.form_variables) _LOGGER.debug( "%s # %s # Selected: %s", self.scraper.name, self._name, value ) diff --git a/custom_components/multiscrape/service.py b/custom_components/multiscrape/service.py index b1f0619..ffda235 100644 --- a/custom_components/multiscrape/service.py +++ b/custom_components/multiscrape/service.py @@ -1,40 +1,27 @@ """Class for implementing the multiscrape services.""" import logging -import voluptuous as vol + import homeassistant.helpers.config_validation as cv -from homeassistant.core import HomeAssistant -from homeassistant.core import ServiceCall, SupportsResponse -from homeassistant.const import ( - CONF_NAME, - CONF_DESCRIPTION, - CONF_UNIQUE_ID, - CONF_VALUE_TEMPLATE, - CONF_ICON, -) +import voluptuous as vol +from homeassistant.const import (CONF_DESCRIPTION, CONF_HEADERS, CONF_ICON, + CONF_NAME, CONF_UNIQUE_ID, + CONF_VALUE_TEMPLATE, Platform) +from homeassistant.core import HomeAssistant, ServiceCall, SupportsResponse from homeassistant.helpers.service import async_set_service_schema +from homeassistant.helpers.template import Template from homeassistant.util import slugify -from homeassistant.const import Platform - -from .scraper import create_scraper +from .const import (CONF_FIELDS, CONF_FORM_SUBMIT, CONF_FORM_VARIABLES, + CONF_LOG_RESPONSE, CONF_PARSER, CONF_SENSOR_ATTRS, DOMAIN) +from .coordinator import (MultiscrapeDataUpdateCoordinator, + create_content_request_manager) +from .file import create_file_manager from .form import create_form_submitter - -from .selector import Selector -from .schema import SERVICE_COMBINED_SCHEMA -from .coordinator import ( - MultiscrapeDataUpdateCoordinator, - create_content_request_manager, -) from .http import create_http_wrapper - -from .const import ( - CONF_FORM_SUBMIT, - CONF_PARSER, - CONF_SENSOR_ATTRS, - DOMAIN, - CONF_FIELDS, -) +from .schema import SERVICE_COMBINED_SCHEMA +from .scraper import create_scraper +from .selector import Selector _LOGGER = logging.getLogger(__name__) @@ -146,44 +133,54 @@ async def _async_scrape_service(service: ServiceCall) -> None: async def _prepare_service_request(hass: HomeAssistant, conf, config_name): - http = create_http_wrapper(config_name, conf, hass, None) + file_manager = await create_file_manager(hass, config_name, conf.get(CONF_LOG_RESPONSE)) + http = create_http_wrapper(config_name, conf, hass, file_manager) form_submitter = None form_submit_config = conf.get(CONF_FORM_SUBMIT) parser = conf.get(CONF_PARSER) if form_submit_config: - form_http = create_http_wrapper(config_name, form_submit_config, hass, None) + form_http = create_http_wrapper( + config_name, form_submit_config, hass, file_manager) form_submitter = create_form_submitter( - config_name, form_submit_config, hass, form_http, None, parser + config_name, form_submit_config, hass, form_http, file_manager, parser ) request_manager = create_content_request_manager( config_name, conf, hass, http, form_submitter ) - scraper = create_scraper(config_name, conf, hass, None) + scraper = create_scraper(config_name, conf, hass, file_manager) return request_manager, scraper def _restore_templates(config): config = dict(config) + selectors = [] for platform in [Platform.SENSOR, Platform.BINARY_SENSOR]: - for sensor in config.get(platform) or []: - for attr_conf in sensor.get(CONF_SENSOR_ATTRS) or []: - attr_conf[CONF_VALUE_TEMPLATE] = ( - cv.template( - _replace_template_characters(attr_conf.get(CONF_VALUE_TEMPLATE)) - ) - if attr_conf.get(CONF_VALUE_TEMPLATE) - else None - ) - if sensor.get(CONF_ICON): - sensor[CONF_ICON] = cv.template( - _replace_template_characters(sensor.get(CONF_ICON)) - ) - if sensor.get(CONF_VALUE_TEMPLATE): - sensor[CONF_VALUE_TEMPLATE] = cv.template( - _replace_template_characters(sensor.get(CONF_VALUE_TEMPLATE)) - ) + selectors.extend(config.get(platform) or []) + if config.get(CONF_FORM_SUBMIT): + selectors.extend(config[CONF_FORM_SUBMIT].get(CONF_FORM_VARIABLES) or []) + + for selector in selectors: + for attr_conf in selector.get(CONF_SENSOR_ATTRS) or []: + attr_conf[CONF_VALUE_TEMPLATE] = ( + _restore_template(attr_conf.get(CONF_VALUE_TEMPLATE)) + if attr_conf.get(CONF_VALUE_TEMPLATE) + else None + ) + if selector.get(CONF_ICON): + selector[CONF_ICON] = _restore_template(selector.get(CONF_ICON)) + if selector.get(CONF_VALUE_TEMPLATE): + selector[CONF_VALUE_TEMPLATE] = _restore_template(selector[CONF_VALUE_TEMPLATE]) + + headers = config.get(CONF_HEADERS) or {} + for key, value in headers.items(): + headers[key] = _restore_template(value) + return config +def _restore_template(value: str | Template): + value = value.template if isinstance(value, Template) else value + return cv.template(_replace_template_characters(value)) + def _replace_template_characters(template: str): template = template.replace("{!{", "{{").replace("}!}", "}}") diff --git a/custom_components/multiscrape/util.py b/custom_components/multiscrape/util.py index 5df0f3c..f20cd27 100644 --- a/custom_components/multiscrape/util.py +++ b/custom_components/multiscrape/util.py @@ -1,32 +1,30 @@ """Some utility functions.""" import logging -from typing import Any from homeassistant.exceptions import TemplateError from homeassistant.helpers.template import Template - _LOGGER: logging.Logger = logging.getLogger(__name__) def create_renderer(hass, value_template): """Create a template renderer based on value_template.""" if value_template is None: - return lambda value=None, parse_result=None: value + return lambda variables={}, parse_result=None: None if not isinstance(value_template, Template): value_template = Template(value_template, hass) else: value_template.hass = hass - def _render(value: Any = None, parse_result=False): + def _render(variables: dict = {}, parse_result=False): try: - return value_template.async_render({"value": value}, parse_result) + return value_template.async_render(variables, parse_result) except TemplateError: _LOGGER.exception( - "Error rendering template: %s with value %s", value_template, value + "Error rendering template: %s with variables %s", value_template, variables ) - return value + raise return _render @@ -34,16 +32,16 @@ def _render(value: Any = None, parse_result=False): def create_dict_renderer(hass, templates_dict): """Create template renderers for a dictionary with value_templates.""" if templates_dict is None: - return lambda value=None, parse_result=None: {} + return lambda variables={}, parse_result=None: {} # Create a copy of the templates_dict to avoid modification of the original templates_dict = templates_dict.copy() for item in templates_dict: templates_dict[item] = create_renderer(hass, templates_dict[item]) - def _render(value: Any = None, parse_result=False): + def _render(variables: dict = {}, parse_result=False): return { - item: templates_dict[item](value, parse_result) for item in templates_dict + item: templates_dict[item](variables, parse_result) for item in templates_dict } return _render