diff --git a/.flake8 b/.flake8
new file mode 100644
index 0000000..8e9a28a
--- /dev/null
+++ b/.flake8
@@ -0,0 +1,2 @@
+[flake8]
+extend-ignore = E131, E722, F401, F403
diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
index af76db2..819b132 100644
--- a/.github/workflows/codeql-analysis.yml
+++ b/.github/workflows/codeql-analysis.yml
@@ -48,11 +48,11 @@ jobs:
# If you wish to specify custom queries, you can do so here or in a config file.
# By default, queries listed here will override any specified in a config file.
# Prefix the list here with "+" to use these queries and those in the config file.
-
+
# Details on CodeQL's query packs refer to : https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
# queries: security-extended,security-and-quality
-
+
# Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
# If this step fails, then you should remove it and run the build manually (see below)
- name: Autobuild
@@ -61,7 +61,7 @@ jobs:
# ℹ️ Command-line programs to run using the OS shell.
# 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
- # If the Autobuild fails above, remove it and uncomment the following three lines.
+ # If the Autobuild fails above, remove it and uncomment the following three lines.
# modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance.
# - run: |
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..5cce972
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,32 @@
+# See https://pre-commit.com for more information
+# See https://pre-commit.com/hooks.html for more hooks
+repos:
+- repo: https://github.com/pre-commit/pre-commit-hooks
+ rev: v4.4.0
+ hooks:
+ - id: check-added-large-files
+ - id: check-executables-have-shebangs
+ - id: check-shebang-scripts-are-executable
+ - id: end-of-file-fixer
+ - id: trailing-whitespace
+- repo: https://github.com/pre-commit/mirrors-mypy
+ rev: v1.4.0
+ hooks:
+ - id: mypy
+ additional_dependencies:
+ - types-requests
+- repo: https://github.com/pylint-dev/pylint
+ rev: v3.0.0a6
+ hooks:
+ - id: pylint
+ additional_dependencies:
+ - flask
+ - Flask-SQLAlchemy
+ - requests
+ - beautifulsoup4
+- repo: https://github.com/PyCQA/flake8
+ rev: 6.0.0
+ hooks:
+ - id: flake8
+ additional_dependencies:
+ - flake8-import-order
diff --git a/.pylintrc b/.pylintrc
index 9152531..f8b632c 100644
--- a/.pylintrc
+++ b/.pylintrc
@@ -1,8 +1,8 @@
[GENERAL]
-fail-under=9.0
+fail-under=10.0
[MESSAGES CONTROL]
-disable=R0903
+disable=C,R,W0511,W0702,W0718
[TYPECHECK]
generated-members=flask_sqlalchemy.SQLAlchemy.DateTime,
diff --git a/argostime.example.conf b/argostime.example.conf
index 914413b..e8b4ef7 100644
--- a/argostime.example.conf
+++ b/argostime.example.conf
@@ -6,4 +6,3 @@ user = argostime_user
password = p@ssw0rd
server = localhost
database = argostime
-
diff --git a/argostime/__init__.py b/argostime/__init__.py
index 2cbbfc1..3c60c79 100644
--- a/argostime/__init__.py
+++ b/argostime/__init__.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python3
"""
__init__.py
@@ -25,28 +24,32 @@
logging.basicConfig(
filename="argostime.log",
level=logging.DEBUG,
- format="%(asctime)s - %(processName)s - %(levelname)s - %(module)s - %(funcName)s - %(message)s"
+ format="%(asctime)s - %(processName)s - %(levelname)s - %(module)s - "
+ "%(funcName)s - %(message)s"
)
-import os.path
+import configparser # noqa: I100, I202, E402
+import os.path # noqa: E402
-import configparser
+from flask import Flask # noqa: E402
-from flask import Flask
-from flask_sqlalchemy import SQLAlchemy
+from flask_sqlalchemy import SQLAlchemy # noqa: E402
db: SQLAlchemy = SQLAlchemy()
+
def get_current_commit() -> str:
"""Return the hexadecimal hash of the current running commit."""
- git_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.git"))
- with open(os.path.join(git_path, "HEAD"), "r") as file_head:
+ gp = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.git"))
+ with open(os.path.join(gp, "HEAD"), "r", encoding="utf-8") as file_head:
hexsha = file_head.read().strip()
while hexsha.startswith("ref: "):
- with open(os.path.join(git_path, hexsha[5:])) as file_ref:
+ with open(os.path.join(gp, hexsha[5:]),
+ "r", encoding="utf-8") as file_ref:
hexsha = file_ref.read().strip()
return hexsha
+
def create_app():
"""Return a flask object for argostime, initialize logger and db."""
logging.getLogger("matplotlib.font_manager").disabled = True
@@ -58,7 +61,9 @@ def create_app():
logging.debug("Found sections %s in config", config.sections())
if "mariadb" in config:
- app.config["SQLALCHEMY_DATABASE_URI"] = "mysql+pymysql://{user}:{password}@{server}/{database}?charset=utf8mb4".format(
+ app.config["SQLALCHEMY_DATABASE_URI"] = \
+ "mysql+pymysql://{user}:{password}@{server}/{database}" \
+ "?charset=utf8mb4".format(
user=config["mariadb"]["user"],
password=config["mariadb"]["password"],
server=config["mariadb"]["server"],
@@ -74,6 +79,6 @@ def create_app():
db.init_app(app)
with app.app_context():
- from . import routes
+ from . import routes # pylint: disable=W0611
db.create_all()
return app
diff --git a/argostime/crawler/__init__.py b/argostime/crawler/__init__.py
index 702b5cb..27e9083 100644
--- a/argostime/crawler/__init__.py
+++ b/argostime/crawler/__init__.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python3
"""
crawler/__init__.py
@@ -22,6 +21,6 @@
along with Argostimè. If not, see .
"""
-from argostime.crawler.crawl_utils import CrawlResult, enabled_shops
from argostime.crawler.crawl_url import crawl_url
+from argostime.crawler.crawl_utils import CrawlResult, enabled_shops
from argostime.crawler.shop import *
diff --git a/argostime/crawler/crawl_url.py b/argostime/crawler/crawl_url.py
index 8095e87..78a4247 100644
--- a/argostime/crawler/crawl_url.py
+++ b/argostime/crawler/crawl_url.py
@@ -1,9 +1,8 @@
-#!/usr/bin/env python3
"""
crawler/crawl_url.py
- Crawler function exposed to the rest of the system to get pricing and product
- information from a given URL.
+ Crawler function exposed to the rest of the system to get pricing and
+ product information from a given URL.
Copyright (c) 2022 Martijn
Copyright (c) 2022 Kevin
@@ -27,9 +26,8 @@
import logging
import urllib.parse
-from argostime.exceptions import WebsiteNotImplementedException
-
from argostime.crawler.crawl_utils import CrawlResult, enabled_shops
+from argostime.exceptions import WebsiteNotImplementedException
def crawl_url(url: str) -> CrawlResult:
@@ -47,8 +45,9 @@ def crawl_url(url: str) -> CrawlResult:
if hostname not in enabled_shops:
raise WebsiteNotImplementedException(url)
- # Note: This is a function call! The called function is the corresponding crawler
- # registered using the "@register_crawler" decorator in the "shop" directory.
+ # Note: This is a function call! The called function is the corresponding
+ # crawler registered using the "@register_crawler" decorator in the "shop"
+ # directory.
result: CrawlResult = enabled_shops[hostname]["crawler"](url)
result.check()
diff --git a/argostime/crawler/crawl_utils.py b/argostime/crawler/crawl_utils.py
index bbefc4f..2b4c285 100644
--- a/argostime/crawler/crawl_utils.py
+++ b/argostime/crawler/crawl_utils.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python3
"""
crawler/crawl_utils.py
@@ -36,7 +35,9 @@
class CrawlResult:
- """Data structure for returning the results of a crawler in a uniform way."""
+ """
+ Data structure for returning the results of a crawler in a uniform way.
+ """
url: Optional[str]
product_name: Optional[str]
@@ -50,15 +51,15 @@ class CrawlResult:
def __init__(
self,
- url: Optional[str]=None,
- product_name: Optional[str]=None,
- product_description: Optional[str]=None,
- product_code: Optional[str]=None,
- normal_price: float=-1.0,
- discount_price: float=-1.0,
- on_sale: bool=False,
- ean: Optional[int]=None,
- ):
+ url: Optional[str] = None,
+ product_name: Optional[str] = None,
+ product_description: Optional[str] = None,
+ product_code: Optional[str] = None,
+ normal_price: float = -1.0,
+ discount_price: float = -1.0,
+ on_sale: bool = False,
+ ean: Optional[int] = None,
+ ):
self.url = url
self.product_name = product_name
self.product_description = product_description
@@ -72,7 +73,8 @@ def __str__(self) -> str:
string = f"CrawlResult(product_name={self.product_name},"\
f"product_description={self.product_description},"\
f"product_code={self.product_code},price={self.normal_price},"\
- f"discount={self.discount_price},sale={self.on_sale},ean={self.ean}"
+ f"discount={self.discount_price},sale={self.on_sale}," \
+ f"ean={self.ean}"
return string
@@ -100,24 +102,28 @@ def check(self) -> None:
if self.discount_price < 0 and self.on_sale:
raise CrawlerException("No discount price given for item on sale!")
if self.normal_price < 0 and not self.on_sale:
- raise CrawlerException("No normal price given for item not on sale!")
+ raise CrawlerException(
+ "No normal price given for item not on sale!")
CrawlerFunc = Callable[[str], CrawlResult]
-ShopDict = TypedDict("ShopDict", {"name": str, "hostname": str, "crawler": CrawlerFunc})
+ShopDict = TypedDict("ShopDict", {"name": str, "hostname": str,
+ "crawler": CrawlerFunc})
enabled_shops: Dict[str, ShopDict] = {}
-def register_crawler(name: str, host: str, use_www: bool = True) -> Callable[[CrawlerFunc], None]:
+def register_crawler(name: str, host: str, use_www: bool = True) \
+ -> Callable[[CrawlerFunc], None]:
"""Decorator to register a new crawler function."""
def decorate(func: Callable[[str], CrawlResult]) -> None:
"""
- This function will be called when you put the "@register_crawler" decorator above
- a function defined in a file in the "shop" directory! The argument will be the
- function above which you put the decorator.
+ This function will be called when you put the "@register_crawler"
+ decorator above a function defined in a file in the "shop" directory!
+ The argument will be the function above which you put the decorator.
"""
- if "argostime" in __config and "disabled_shops" in __config["argostime"]:
+ if "argostime" in __config and \
+ "disabled_shops" in __config["argostime"]:
if host in __config["argostime"]["disabled_shops"]:
logging.debug("Shop %s is disabled", host)
return
@@ -137,7 +143,9 @@ def decorate(func: Callable[[str], CrawlResult]) -> None:
def parse_promotional_message(message: str, price: float) -> float:
- """Parse a given promotional message, and returns the calculated effective price.
+ """
+ Parse a given promotional message, and returns the calculated effective
+ price.
For example "1+1 GRATIS" will be parsed to meaning a 50% discount.
"2+1 GRATIS" will be parsed to mean a 33% discount, and will return 2/3.
@@ -152,7 +160,8 @@ def parse_promotional_message(message: str, price: float) -> float:
message_no_whitespace = message_no_whitespace.lower()
- logging.debug("Promotion yielded sanitized input %s", message_no_whitespace)
+ logging.debug("Promotion yielded sanitized input %s",
+ message_no_whitespace)
if message_no_whitespace == "1+1gratis":
return 1/2 * price
@@ -187,8 +196,9 @@ def parse_promotional_message(message: str, price: float) -> float:
return float(msg_split[1])
return float(msg_split[1]) / float(msg_split[0])
except ArithmeticError as exception:
- logging.error("Calculation error parsing %s %s", message_no_whitespace, exception)
- except IndexError as exception:
+ logging.error("Calculation error parsing %s %s",
+ message_no_whitespace, exception)
+ except IndexError:
logging.error("IndexError in message %s", message_no_whitespace)
logging.error("Promotion text did not match any known promotion")
diff --git a/argostime/crawler/shop/__init__.py b/argostime/crawler/shop/__init__.py
index f2e6352..fe71a9b 100644
--- a/argostime/crawler/shop/__init__.py
+++ b/argostime/crawler/shop/__init__.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python3
"""
crawler/shop/__init__.py
@@ -22,10 +21,11 @@
along with Argostimè. If not, see .
"""
-from os.path import dirname, basename, isfile, join
import glob
+from os.path import basename, dirname, isfile, join
-# Load all modules in the current directory, based on the answer from Anurag Uniyal:
-# https://stackoverflow.com/questions/1057431/how-to-load-all-modules-in-a-folder
+# Load all modules in the current directory, based on the answer from
+# Anurag Uniyal: https://stackoverflow.com/q/1057431
modules = glob.glob(join(dirname(__file__), "*.py"))
-__all__ = [basename(f)[:-3] for f in modules if isfile(f) and not f.endswith('__init__.py')]
+__all__ = [basename(f)[:-3] for f in modules if isfile(f) and
+ not f.endswith('__init__.py')]
diff --git a/argostime/crawler/shop/ah.py b/argostime/crawler/shop/ah.py
index c27e6ae..39ccf07 100644
--- a/argostime/crawler/shop/ah.py
+++ b/argostime/crawler/shop/ah.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python3
"""
crawler/shop/ah.py
@@ -22,33 +21,35 @@
along with Argostimè. If not, see .
"""
-from datetime import date
import json
import logging
+from datetime import date
-import requests
-from bs4 import BeautifulSoup
-
+from argostime.crawler.crawl_utils import \
+ CrawlResult, parse_promotional_message, register_crawler
from argostime.exceptions import CrawlerException
from argostime.exceptions import PageNotFoundException
-from argostime.crawler.crawl_utils import CrawlResult, parse_promotional_message, register_crawler
+from bs4 import BeautifulSoup
+
+import requests
@register_crawler("Albert Heijn", "ah.nl")
def crawl_ah(url: str) -> CrawlResult:
"""Crawler for ah.nl"""
- response: requests.Response = requests.get(url)
+ response: requests.Response = requests.get(url, timeout=10)
if response.status_code != 200:
- logging.error("Got status code %d while getting url %s", response.status_code, url)
+ logging.error("Got status code %d while getting url %s",
+ response.status_code, url)
raise PageNotFoundException(url)
soup = BeautifulSoup(response.text, "html.parser")
raw_json_match = soup.find(
"script",
- attrs={ "type": "application/ld+json", "data-react-helmet": "true"}
+ attrs={"type": "application/ld+json", "data-react-helmet": "true"}
)
result: CrawlResult = CrawlResult(url=url)
@@ -56,7 +57,8 @@ def crawl_ah(url: str) -> CrawlResult:
try:
product_dict = json.loads(raw_json_match.text)
except json.decoder.JSONDecodeError as exception:
- logging.error("Could not decode JSON %s, raising CrawlerException", raw_json_match)
+ logging.error("Could not decode JSON %s, raising CrawlerException",
+ raw_json_match)
raise CrawlerException from exception
except Exception as exception:
logging.error(
@@ -86,7 +88,8 @@ def crawl_ah(url: str) -> CrawlResult:
try:
offer = product_dict["offers"]
except KeyError as exception:
- logging.error("Could not find a valid offer in the json %s", product_dict)
+ logging.error("Could not find a valid offer in the json %s",
+ product_dict)
raise CrawlerException from exception
if "validFrom" in offer.keys():
@@ -111,13 +114,15 @@ def crawl_ah(url: str) -> CrawlResult:
# Try to find a promotional message
promo_text_matches = soup.find_all(
"p",
- attrs={ "class" :lambda x: x and x.startswith("promo-sticker-text") }
+ attrs={"class": lambda x:
+ x and x.startswith("promo-sticker-text")}
)
if len(promo_text_matches) == 0:
promo_text_matches = soup.find_all(
"div",
- attrs={ "class" :lambda x: x and x.startswith("promo-sticker_content") }
+ attrs={"class": lambda x:
+ x and x.startswith("promo-sticker_content")}
)
promotion_message: str = ""
@@ -130,8 +135,8 @@ def crawl_ah(url: str) -> CrawlResult:
price: float = float(offer["price"])
- # If there is a mark with for example "25% Korting", this is already calculated into
- # the price we got from the json.
+ # If there is a mark with for example "25% Korting", this is
+ # already calculated into the price we got from the json.
if "korting" not in message_no_whitespace:
promotion = parse_promotional_message(promotion_message, price)
else:
@@ -157,7 +162,8 @@ def crawl_ah(url: str) -> CrawlResult:
try:
result.normal_price = float(product_dict["offers"]["price"])
except KeyError as inner_exception:
- logging.error("Couldn't even find a normal price in %s", product_dict)
+ logging.error("Couldn't even find a normal price in %s",
+ product_dict)
raise CrawlerException from inner_exception
return result
diff --git a/argostime/crawler/shop/brandzaak.py b/argostime/crawler/shop/brandzaak.py
index 050cc24..79f1f5c 100644
--- a/argostime/crawler/shop/brandzaak.py
+++ b/argostime/crawler/shop/brandzaak.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python3
"""
crawler/shop/brandzaak.py
@@ -23,34 +22,35 @@
along with Argostimè. If not, see .
"""
-
import logging
-import requests
-from bs4 import BeautifulSoup
-
+from argostime.crawler.crawl_utils import CrawlResult, register_crawler
from argostime.exceptions import CrawlerException
from argostime.exceptions import PageNotFoundException
-from argostime.crawler.crawl_utils import CrawlResult, register_crawler
+from bs4 import BeautifulSoup
+
+import requests
@register_crawler("Brandzaak", "brandzaak.nl")
def crawl_brandzaak(url: str) -> CrawlResult:
"""Parse a product from brandzaak.nl"""
- response = requests.get(url)
+ response = requests.get(url, timeout=10)
if response.status_code != 200:
- logging.error("Got status code %d while getting url %s", response.status_code, url)
+ logging.error("Got status code %d while getting url %s",
+ response.status_code, url)
raise PageNotFoundException(url)
soup = BeautifulSoup(response.text, "html.parser")
result: CrawlResult = CrawlResult(url=url)
- product_title = soup.find("meta", attrs={ "name": "title"})
- product_price = soup.find("meta", attrs={ "property": "product:price:amount"})
+ product_title = soup.find("meta", attrs={"name": "title"})
+ product_price = soup.find("meta",
+ attrs={"property": "product:price:amount"})
try:
result.product_name = product_title['content']
diff --git a/argostime/crawler/shop/ekoplaza.py b/argostime/crawler/shop/ekoplaza.py
index 3adb746..2f2721b 100644
--- a/argostime/crawler/shop/ekoplaza.py
+++ b/argostime/crawler/shop/ekoplaza.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python3
"""
crawler/shop/ekoplaza.py
@@ -24,12 +23,11 @@
import logging
-import requests
-
+from argostime.crawler.crawl_utils import CrawlResult, register_crawler
from argostime.exceptions import CrawlerException
from argostime.exceptions import PageNotFoundException
-from argostime.crawler.crawl_utils import CrawlResult, register_crawler
+import requests
@register_crawler("Ekoplaza", "ekoplaza.nl")
diff --git a/argostime/crawler/shop/etos.py b/argostime/crawler/shop/etos.py
index 0c002a5..3ab405d 100644
--- a/argostime/crawler/shop/etos.py
+++ b/argostime/crawler/shop/etos.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python3
"""
crawler/shop/etos.py
@@ -26,14 +25,14 @@
import logging
from typing import Dict
-import requests
-from bs4 import BeautifulSoup
-
+from argostime.crawler.crawl_utils import CrawlResult, register_crawler
+from argostime.crawler.crawl_utils import parse_promotional_message
from argostime.exceptions import CrawlerException
from argostime.exceptions import PageNotFoundException
-from argostime.crawler.crawl_utils import CrawlResult, register_crawler
-from argostime.crawler.crawl_utils import parse_promotional_message
+from bs4 import BeautifulSoup
+
+import requests
@register_crawler("Etos", "etos.nl")
@@ -41,7 +40,8 @@ def crawl_etos(url: str) -> CrawlResult:
"""Crawler for etos.nl"""
headers = {
- "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,"
+ "image/avif,image/webp,*/*;q=0.8",
"Accept-Encoding": "gzip, deflate, br",
"Accept-Language": "nl,en-US;q=0.7,en;q=0.3",
"Cache-Control": "no-cache",
@@ -53,13 +53,15 @@ def crawl_etos(url: str) -> CrawlResult:
"Sec-Fetch-Site": "none",
"Sec-Fetch-User": "?1",
"Upgrade-Insecure-Requests": "1",
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:96.0) Gecko/20100101 Firefox/96.0"
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:96.0) "
+ "Gecko/20100101 Firefox/96.0"
}
response = requests.get(url, timeout=10, headers=headers)
if response.status_code != 200:
- logging.error("Got status code %d while getting url %s", response.status_code, url)
+ logging.error("Got status code %d while getting url %s",
+ response.status_code, url)
raise PageNotFoundException(url)
soup = BeautifulSoup(response.text, "html.parser")
@@ -69,9 +71,7 @@ def crawl_etos(url: str) -> CrawlResult:
try:
raw_product_json = soup.find(
"div",
- attrs= {
- "class": "js-product-detail",
- }
+ attrs={"class": "js-product-detail"}
).get("data-gtm-event")
except AttributeError as exception:
logging.error("Could not find a product detail json")
@@ -80,7 +80,8 @@ def crawl_etos(url: str) -> CrawlResult:
try:
product_dict = json.loads(raw_product_json)
except json.decoder.JSONDecodeError as exception:
- logging.error("Could not decode JSON %s, raising CrawlerException", raw_product_json)
+ logging.error("Could not decode JSON %s, raising CrawlerException",
+ raw_product_json)
raise CrawlerException from exception
logging.debug(product_dict)
@@ -90,7 +91,8 @@ def crawl_etos(url: str) -> CrawlResult:
try:
result.product_name = offer["name"]
except KeyError as exception:
- logging.error("No key name found in json %s parsed as %s", raw_product_json, product_dict)
+ logging.error("No key name found in json %s parsed as %s",
+ raw_product_json, product_dict)
raise CrawlerException from exception
try:
@@ -111,9 +113,10 @@ def crawl_etos(url: str) -> CrawlResult:
result.on_sale = True
else:
# Couldn't parse the promotion!
- logging.info("Couldn't parse promotion %s, assuming no discount", promotion_message)
+ logging.info("Couldn't parse promotion %s, assuming no discount",
+ promotion_message)
result.normal_price = price
- except KeyError as exception:
+ except KeyError:
logging.debug("No promotion found, assuming no discount")
try:
result.normal_price = price
diff --git a/argostime/crawler/shop/hema.py b/argostime/crawler/shop/hema.py
index 858ee1b..d5ede1b 100644
--- a/argostime/crawler/shop/hema.py
+++ b/argostime/crawler/shop/hema.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python3
"""
crawler/shop/hema.py
@@ -28,13 +27,13 @@
import re
from typing import Optional
-import requests
-from bs4 import BeautifulSoup
-
+from argostime.crawler.crawl_utils import CrawlResult, register_crawler
from argostime.exceptions import CrawlerException
from argostime.exceptions import PageNotFoundException
-from argostime.crawler.crawl_utils import CrawlResult, register_crawler
+from bs4 import BeautifulSoup
+
+import requests
@register_crawler("HEMA", "hema.nl")
@@ -44,7 +43,8 @@ def crawl_hema(url: str) -> CrawlResult:
response: requests.Response = requests.get(url, timeout=10)
if response.status_code != 200:
- logging.error("Got status code %d while getting url %s", response.status_code, url)
+ logging.error("Got status code %d while getting url %s",
+ response.status_code, url)
raise PageNotFoundException(url)
soup = BeautifulSoup(response.text, "html.parser")
@@ -70,27 +70,34 @@ def crawl_hema(url: str) -> CrawlResult:
try:
product_dict = json.loads(raw_json)
except json.decoder.JSONDecodeError as exception:
- logging.error("Could not decode JSON %s, raising CrawlerException", raw_json)
+ logging.error("Could not decode JSON %s, raising CrawlerException",
+ raw_json)
raise CrawlerException from exception
logging.debug(product_dict)
try:
- result.product_name = product_dict["ecommerce"]["detail"]["products"][0]["name"]
+ result.product_name = \
+ product_dict["ecommerce"]["detail"]["products"][0]["name"]
except KeyError as exception:
- logging.error("Could not find product name in %s via %s", raw_json, url)
+ logging.error("Could not find product name in %s via %s",
+ raw_json, url)
raise CrawlerException from exception
try:
- result.product_code = product_dict["ecommerce"]["detail"]["products"][0]["id"]
+ result.product_code = \
+ product_dict["ecommerce"]["detail"]["products"][0]["id"]
except KeyError as exception:
- logging.error("Could not find product code in %s via %s", raw_json, url)
+ logging.error("Could not find product code in %s via %s",
+ raw_json, url)
raise CrawlerException from exception
try:
- result.normal_price = float(product_dict["ecommerce"]["detail"]["products"][0]["price"])
- except KeyError as exception:
- logging.error("Could not find a valid price in %s via %s", raw_json, url)
+ result.normal_price = \
+ float(product_dict["ecommerce"]["detail"]["products"][0]["price"])
+ except KeyError:
+ logging.error("Could not find a valid price in %s via %s",
+ raw_json, url)
result.normal_price = -1
return result
diff --git a/argostime/crawler/shop/ikea.py b/argostime/crawler/shop/ikea.py
index 88cf03b..72342e4 100644
--- a/argostime/crawler/shop/ikea.py
+++ b/argostime/crawler/shop/ikea.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python3
"""
crawler/shop/ikea.py
@@ -25,13 +24,13 @@
import logging
import re
-import requests
-from bs4 import BeautifulSoup
-
+from argostime.crawler.crawl_utils import CrawlResult, register_crawler
from argostime.exceptions import CrawlerException
from argostime.exceptions import PageNotFoundException
-from argostime.crawler.crawl_utils import CrawlResult, register_crawler
+from bs4 import BeautifulSoup
+
+import requests
@register_crawler("IKEA", "ikea.com")
@@ -43,14 +42,15 @@ def crawl_ikea(url: str) -> CrawlResult: # pylint: disable=R0915
response: requests.Response = requests.get(url, timeout=10)
if response.status_code != 200:
- logging.error("Got status code %d while getting url %s", response.status_code, url)
+ logging.error("Got status code %d while getting url %s",
+ response.status_code, url)
raise PageNotFoundException(url)
soup = BeautifulSoup(response.text, "html.parser")
info_wrapper = soup.find(
"div",
- id= re.compile("buy-module-content")
+ id=re.compile("buy-module-content")
)
try:
@@ -61,35 +61,36 @@ def crawl_ikea(url: str) -> CrawlResult: # pylint: disable=R0915
try:
result.product_name = info_wrapper.find(
["span", "div"],
- class_= re.compile("header-section__title--big")
+ class_=re.compile("header-section__title--big")
).text
except Exception as exception:
- logging.error("Could not find a name in %s %s", info_wrapper, exception)
+ logging.error("Could not find a name in %s %s",
+ info_wrapper, exception)
raise CrawlerException from exception
try:
result.product_description = info_wrapper.find(
"span",
- class_= re.compile("header-section__description-text")
+ class_=re.compile("header-section__description-text")
).text
- except Exception as exception:
+ except:
logging.error("Could not find a description in %s", info_wrapper)
-
try:
result.product_code = soup.find(
"span",
- class_= re.compile("product-identifier__value")
+ class_=re.compile("product-identifier__value")
).text
except Exception as exception:
- logging.error("Could not find a product code in %s %s", info_wrapper, exception)
+ logging.error("Could not find a product code in %s %s",
+ info_wrapper, exception)
raise CrawlerException from exception
try:
# Todo: Verify if this is needed with discounted product page...
price_tag_prev = info_wrapper.find(
"div",
- class_= re.compile("price-package__previous-price-hasStrikeThrough")
+ class_=re.compile("price-package__previous-price-hasStrikeThrough")
)
if not price_tag_prev:
@@ -110,7 +111,7 @@ def crawl_ikea(url: str) -> CrawlResult: # pylint: disable=R0915
decimals = float(
price_tag_prev.find(
"span",
- class_= re.compile("price__decimal")
+ class_=re.compile("price__decimal")
).text)
except Exception as exception:
logging.debug("No decimals found, assuming 0 %s", exception)
@@ -123,7 +124,7 @@ def crawl_ikea(url: str) -> CrawlResult: # pylint: disable=R0915
try:
price_tag_curr = info_wrapper.find(
# "div",
- class_= re.compile("price-module__current-price")
+ class_=re.compile("price-module__current-price")
)
integers = float(
@@ -131,14 +132,14 @@ def crawl_ikea(url: str) -> CrawlResult: # pylint: disable=R0915
".-", "",
price_tag_curr.find(
"span",
- class_= re.compile("price__integer")
+ class_=re.compile("price__integer")
).text))
try:
decimals = float(
price_tag_curr.find(
"span",
- class_= re.compile("price__decimal")
+ class_=re.compile("price__decimal")
).text)
except Exception as exception:
logging.debug("No decimals found, assuming 0 %s", exception)
@@ -150,7 +151,8 @@ def crawl_ikea(url: str) -> CrawlResult: # pylint: disable=R0915
else:
result.normal_price = integers + decimals
except Exception as exception:
- logging.error("No current price found in %s %s", info_wrapper, exception)
+ logging.error("No current price found in %s %s",
+ info_wrapper, exception)
raise CrawlerException from exception
return result
diff --git a/argostime/crawler/shop/intergamma.py b/argostime/crawler/shop/intergamma.py
index 431bc13..50a81bc 100644
--- a/argostime/crawler/shop/intergamma.py
+++ b/argostime/crawler/shop/intergamma.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python3
"""
crawler/shop/intergamma.py
@@ -24,13 +23,13 @@
import logging
-import requests
-from bs4 import BeautifulSoup
-
+from argostime.crawler.crawl_utils import CrawlResult, register_crawler
from argostime.exceptions import CrawlerException
from argostime.exceptions import PageNotFoundException
-from argostime.crawler.crawl_utils import CrawlResult, register_crawler
+from bs4 import BeautifulSoup
+
+import requests
def crawl_intergamma(url: str) -> CrawlResult:
@@ -38,7 +37,8 @@ def crawl_intergamma(url: str) -> CrawlResult:
response: requests.Response = requests.get(url, timeout=10)
if response.status_code != 200:
- logging.error("Got status code %s while getting url %s", response.status_code, url)
+ logging.error("Got status code %s while getting url %s",
+ response.status_code, url)
raise PageNotFoundException(url)
# Use UTF-8 encoding instead of ISO-8859-1
@@ -74,7 +74,8 @@ def crawl_intergamma(url: str) -> CrawlResult:
itemtype="http://schema.org/Product"
)["data-product-code"]
except Exception as exception:
- logging.error("Could not find a product code, raising CrawlerException")
+ logging.error("Could not find a product code, "
+ "raising CrawlerException")
logging.debug("Got exception: %s", exception)
raise CrawlerException from exception
diff --git a/argostime/crawler/shop/jumbo.py b/argostime/crawler/shop/jumbo.py
index 2cf797a..c500f30 100644
--- a/argostime/crawler/shop/jumbo.py
+++ b/argostime/crawler/shop/jumbo.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python3
"""
crawler/shop/jumbo.py
@@ -25,13 +24,13 @@
import json
import logging
-import requests
-from bs4 import BeautifulSoup
-
+from argostime.crawler.crawl_utils import CrawlResult, register_crawler
from argostime.exceptions import CrawlerException
from argostime.exceptions import PageNotFoundException
-from argostime.crawler.crawl_utils import CrawlResult, register_crawler
+from bs4 import BeautifulSoup
+
+import requests
@register_crawler("Jumbo", "jumbo.com")
@@ -42,7 +41,8 @@ def crawl_jumbo(url: str) -> CrawlResult:
"""
headers = {
"Referer": "https://www.jumbo.com",
- "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,"
+ "image/avif,image/webp,*/*;q=0.8",
"Accept-Encoding": "gzip, deflate, br",
"Accept-Language": "nl,en-US;q=0.7,en;q=0.3",
"Cache-Control": "no-cache",
@@ -54,17 +54,20 @@ def crawl_jumbo(url: str) -> CrawlResult:
"Sec-Fetch-Site": "none",
"Sec-Fetch-User": "?1",
"Upgrade-Insecure-Requests": "1",
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:96.0) Gecko/20100101 Firefox/96.0"
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:96.0) "
+ "Gecko/20100101 Firefox/96.0"
}
response = requests.get(url, timeout=10, headers=headers)
if response.status_code != 200:
- logging.error("Got status code %d while getting url %s", response.status_code, url)
+ logging.error("Got status code %d while getting url %s",
+ response.status_code, url)
raise PageNotFoundException(url)
soup = BeautifulSoup(response.text, "html.parser")
- product_json = soup.find("script", attrs={"type": "application/ld+json", "data-n-head": "ssr"})
+ product_json = soup.find("script", attrs={"type": "application/ld+json",
+ "data-n-head": "ssr"})
raw_json = product_json.string
result: CrawlResult = CrawlResult(url=url)
@@ -72,13 +75,15 @@ def crawl_jumbo(url: str) -> CrawlResult:
try:
product = json.loads(raw_json)
except json.decoder.JSONDecodeError as exception:
- logging.error("Could not decode JSON %s, raising CrawlerException", raw_json)
+ logging.error("Could not decode JSON %s, raising CrawlerException",
+ raw_json)
raise CrawlerException from exception
if product["offers"]["@type"] == "AggregateOffer":
offer = product["offers"]
else:
- logging.error("No price info available in %s, raising CrawlerException", raw_json)
+ logging.error("No price info available in %s, "
+ "raising CrawlerException", raw_json)
raise CrawlerException()
try:
diff --git a/argostime/crawler/shop/pipashop.py b/argostime/crawler/shop/pipashop.py
index ebb0b9e..2f881a1 100644
--- a/argostime/crawler/shop/pipashop.py
+++ b/argostime/crawler/shop/pipashop.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python3
"""
crawler/shop/pipashop.py
@@ -25,13 +24,13 @@
import logging
import re
-import requests
-from bs4 import BeautifulSoup
-
+from argostime.crawler.crawl_utils import CrawlResult, register_crawler
from argostime.exceptions import CrawlerException
from argostime.exceptions import PageNotFoundException
-from argostime.crawler.crawl_utils import CrawlResult, register_crawler
+from bs4 import BeautifulSoup
+
+import requests
@register_crawler("Pipa Shop", "pipa-shop.nl")
@@ -46,7 +45,8 @@ def crawl_pipashop(url: str) -> CrawlResult:
soup = BeautifulSoup(request.text, "html.parser")
try:
- price = re.sub(r"[^0-9.]", "", soup.select_one("div.product-price").text)
+ price = re.sub(r"[^0-9.]", "",
+ soup.select_one("div.product-price").text)
result.product_name = soup.select_one("div.product-title a").text
result.product_code = url.split("/product/").pop().split("/")[0]
result.normal_price = float(price)
diff --git a/argostime/crawler/shop/praxis.py b/argostime/crawler/shop/praxis.py
index 162cd4a..a056b17 100644
--- a/argostime/crawler/shop/praxis.py
+++ b/argostime/crawler/shop/praxis.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python3
"""
crawler/shop/praxis.py
@@ -26,13 +25,13 @@
import logging
import re
-import requests
-from bs4 import BeautifulSoup
-
+from argostime.crawler.crawl_utils import CrawlResult, register_crawler
from argostime.exceptions import CrawlerException
from argostime.exceptions import PageNotFoundException
-from argostime.crawler.crawl_utils import CrawlResult, register_crawler
+from bs4 import BeautifulSoup
+
+import requests
def __fix_bad_json(bad_json: str) -> str:
@@ -45,7 +44,8 @@ def crawl_praxis(url: str) -> CrawlResult:
response: requests.Response = requests.get(url, timeout=10)
if response.status_code != 200:
- logging.error("Got status code %s while getting url %s", response.status_code, url)
+ logging.error("Got status code %s while getting url %s",
+ response.status_code, url)
raise PageNotFoundException(url)
soup: BeautifulSoup = BeautifulSoup(response.text, "html.parser")
@@ -54,17 +54,20 @@ def crawl_praxis(url: str) -> CrawlResult:
try:
raw_product_json = soup.find(
"script",
- text=lambda value: value and value.startswith("window.__PRELOADED_STATE_productDetailsFragmentInfo__")
+ text=lambda value: value and value.startswith(
+ "window.__PRELOADED_STATE_productDetailsFragmentInfo__")
).text.split("=", maxsplit=1)[1].strip()
except Exception as exception:
- logging.error("Could not find a product detail JSON, raising CrawlerException")
+ logging.error("Could not find a product detail JSON, "
+ "raising CrawlerException")
raise CrawlerException from exception
try:
json_data = json.loads(__fix_bad_json(raw_product_json))
product = json_data["productDetails"]
except json.decoder.JSONDecodeError as exception:
- logging.error("Could not decode JSON %s, raising CrawlerException", raw_product_json)
+ logging.error("Could not decode JSON %s, raising CrawlerException",
+ raw_product_json)
raise CrawlerException from exception
except KeyError as exception:
logging.error("No key productDetails found in JSON data")
@@ -90,13 +93,14 @@ def crawl_praxis(url: str) -> CrawlResult:
try:
result.ean = int(product["ean"])
- except KeyError as exception:
+ except KeyError:
# Don't raise an exception since EAN is not strictly necessary!
logging.error("No key ean found in JSON")
try:
if "discount" in product.keys() and \
- ("discountClass" not in product.keys() or product["discountClass"] != "excludedproducts"):
+ ("discountClass" not in product.keys() or
+ product["discountClass"] != "excludedproducts"):
result.discount_price = float(product["discount"]["value"])
result.on_sale = True
else:
diff --git a/argostime/crawler/shop/simonlevelt.py b/argostime/crawler/shop/simonlevelt.py
index 2bc0dbd..510ed56 100644
--- a/argostime/crawler/shop/simonlevelt.py
+++ b/argostime/crawler/shop/simonlevelt.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python3
"""
crawler/shop/simonlevelt.py
@@ -25,13 +24,13 @@
import locale
import logging
-import requests
-from bs4 import BeautifulSoup
-
+from argostime.crawler.crawl_utils import CrawlResult, register_crawler
from argostime.exceptions import CrawlerException
from argostime.exceptions import PageNotFoundException
-from argostime.crawler.crawl_utils import CrawlResult, register_crawler
+from bs4 import BeautifulSoup
+
+import requests
@register_crawler("Simon Lévelt", "simonlevelt.nl")
@@ -41,7 +40,8 @@ def crawl_simonlevelt(url: str) -> CrawlResult:
response: requests.Response = requests.get(url, timeout=10)
if response.status_code != 200:
- logging.debug("Got status code %d while getting url %s", response.status_code, url)
+ logging.debug("Got status code %d while getting url %s",
+ response.status_code, url)
raise PageNotFoundException(url)
soup = BeautifulSoup(response.text, "html.parser")
@@ -49,9 +49,11 @@ def crawl_simonlevelt(url: str) -> CrawlResult:
result = CrawlResult()
try:
- result.url = soup.find("meta", property="product:product_link").get("content")
+ result.url = soup.find("meta",
+ property="product:product_link").get("content")
except Exception as exception:
- logging.info("Couldn't find url in soup, using given instead %s", exception)
+ logging.info("Couldn't find url in soup, using given instead %s",
+ exception)
result.url = url
try:
diff --git a/argostime/crawler/shop/steam.py b/argostime/crawler/shop/steam.py
index ea7577e..6671a90 100644
--- a/argostime/crawler/shop/steam.py
+++ b/argostime/crawler/shop/steam.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python3
"""
crawler/shop/steam.py
@@ -24,13 +23,13 @@
import logging
-import requests
-from bs4 import BeautifulSoup
-
+from argostime.crawler.crawl_utils import CrawlResult, register_crawler
from argostime.exceptions import CrawlerException
from argostime.exceptions import PageNotFoundException
-from argostime.crawler.crawl_utils import CrawlResult, register_crawler
+from bs4 import BeautifulSoup
+
+import requests
@register_crawler("Steam", "store.steampowered.com", False)
@@ -42,7 +41,8 @@ def crawl_steam(url: str) -> CrawlResult:
response: requests.Response = requests.get(url, timeout=10)
if response.status_code != 200:
- logging.error("Got status code %d while getting url %s", response.status_code, url)
+ logging.error("Got status code %d while getting url %s",
+ response.status_code, url)
raise PageNotFoundException(url)
soup = BeautifulSoup(response.text, "html.parser")
@@ -85,7 +85,8 @@ def crawl_steam(url: str) -> CrawlResult:
}
).get("value")
except Exception as exception:
- logging.error("Could not find a product code in %s %s", game_info, exception)
+ logging.error("Could not find a product code in %s %s",
+ game_info, exception)
raise CrawlerException from exception
try:
@@ -96,10 +97,11 @@ def crawl_steam(url: str) -> CrawlResult:
"game_purchase_discount"
).get("data-price-final")) / 100.0
result.on_sale = True
- # There is info in the page about the normal price when there is a discount,
- # it's just more of a hassle to find that information
+ # There is info in the page about the normal price when there is a
+ # discount, it's just more of a hassle to find that information
except Exception as exception:
- logging.info("No discount found, looking for normal price %s", exception)
+ logging.info("No discount found, looking for normal price %s",
+ exception)
try:
result.normal_price = float(
game_info.find(
@@ -107,7 +109,8 @@ def crawl_steam(url: str) -> CrawlResult:
"game_purchase_price"
).get("data-price-final")) / 100.0
except Exception as inner_exception:
- logging.error("No normal price found in %s %s", game_info, inner_exception)
+ logging.error("No normal price found in %s %s",
+ game_info, inner_exception)
raise CrawlerException from inner_exception
return result
diff --git a/argostime/exceptions.py b/argostime/exceptions.py
index 3eeeeeb..25c405b 100644
--- a/argostime/exceptions.py
+++ b/argostime/exceptions.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python3
"""
exceptions.py
@@ -22,6 +21,7 @@
import logging
+
class PageNotFoundException(Exception):
"""Exception to throw when a request gets a 404 returned."""
def __init__(self, url: str):
@@ -31,6 +31,7 @@ def __init__(self, url: str):
super().__init__()
+
class WebsiteNotImplementedException(Exception):
"""Exception to throw if a certain website has no implemented scraper."""
@@ -41,8 +42,10 @@ def __init__(self, url: str):
super().__init__()
+
class NoEffectivePriceAvailableException(Exception):
"""Exception to throw if a Price object has no valid price."""
+
class CrawlerException(Exception):
"""Exception to throw if something goes wrong in the crawler."""
diff --git a/argostime/graphs.py b/argostime/graphs.py
index 93da577..aeeed25 100644
--- a/argostime/graphs.py
+++ b/argostime/graphs.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python3
"""
graphs.py
@@ -23,12 +22,13 @@
along with Argostimè. If not, see .
"""
-from datetime import datetime, timedelta
import json
+from datetime import datetime, timedelta
from argostime import db
from argostime.exceptions import NoEffectivePriceAvailableException
-from argostime.models import ProductOffer, Price
+from argostime.models import Price, ProductOffer
+
def generate_price_graph_data(offer: ProductOffer) -> str:
"""
@@ -51,7 +51,8 @@ def generate_price_graph_data(offer: ProductOffer) -> str:
for price in prices:
try:
effective_prices.append(price.get_effective_price())
- dates.append(price.datetime.replace(hour=12, minute=0, second=0, microsecond=0))
+ dates.append(price.datetime.replace(
+ hour=12, minute=0, second=0, microsecond=0))
if price.on_sale:
if len(sales_index) == 0 or sales_index[-1][1] != (index - 1):
@@ -90,7 +91,8 @@ def generate_price_graph_data(offer: ProductOffer) -> str:
data = {
"title": {
- "text": f"Prijsontwikkeling van {offer.product.name} bij {offer.webshop.name}",
+ "text": f"Prijsontwikkeling van {offer.product.name} "
+ f"bij {offer.webshop.name}",
"left": "center",
"textStyle": {
"color": "#000",
diff --git a/argostime/models.py b/argostime/models.py
index 8f195a9..b012db4 100644
--- a/argostime/models.py
+++ b/argostime/models.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python3
"""
models.py
@@ -22,18 +21,19 @@
along with Argostimè. If not, see .
"""
-from datetime import datetime
import logging
import statistics
+from datetime import datetime
from sys import maxsize
from typing import List
-from argostime.crawler import crawl_url, CrawlResult
-from argostime.exceptions import CrawlerException, WebsiteNotImplementedException
-from argostime.exceptions import PageNotFoundException
+from argostime import db
+from argostime.crawler import CrawlResult, crawl_url
+from argostime.exceptions import \
+ CrawlerException, WebsiteNotImplementedException
from argostime.exceptions import NoEffectivePriceAvailableException
+from argostime.exceptions import PageNotFoundException
-from argostime import db
class Webshop(db.Model): # type: ignore
"""A webshop, which may offer products."""
@@ -41,12 +41,12 @@ class Webshop(db.Model): # type: ignore
id = db.Column(db.Integer, primary_key=True)
name = db.Column(db.Unicode(512), unique=True, nullable=False)
hostname = db.Column(db.Unicode(512), unique=True, nullable=False)
- products = db.relationship("ProductOffer",
- backref="webshop",
- lazy=True, cascade="all, delete", passive_deletes=True)
+ products = db.relationship("ProductOffer", backref="webshop", lazy=True,
+ cascade="all, delete", passive_deletes=True)
def __str__(self) -> str:
- return f"Webshop(id={self.id}, name={self.name}, hostname={self.hostname})"
+ return f"Webshop(id={self.id}, name={self.name}, " \
+ f"hostname={self.hostname})"
class Product(db.Model): # type: ignore
@@ -57,13 +57,15 @@ class Product(db.Model): # type: ignore
description = db.Column(db.Unicode(1024))
ean = db.Column(db.Integer)
product_code = db.Column(db.Unicode(512), unique=True)
- product_offers = db.relationship("ProductOffer",
- backref="product", lazy=True,
- cascade="all, delete", passive_deletes=True)
+ product_offers = db.relationship("ProductOffer", backref="product",
+ lazy=True, cascade="all, delete",
+ passive_deletes=True)
def __str__(self) -> str:
- return (f"Product(id={self.id}, name={self.name}, description={self.description},"
- f"ean={self.ean}, product_code={self.product_code}, product_offers={self.product_offers})")
+ return (f"Product(id={self.id}, name={self.name}, "
+ f"description={self.description}, ean={self.ean}, "
+ f"product_code={self.product_code}, "
+ f"product_offers={self.product_offers})")
class Price(db.Model): # type: ignore
@@ -74,14 +76,17 @@ class Price(db.Model): # type: ignore
discount_price = db.Column(db.Float)
on_sale = db.Column(db.Boolean)
datetime = db.Column(db.DateTime)
- product_offer_id = db.Column(db.Integer,
- db.ForeignKey("ProductOffer.id", ondelete="CASCADE"),
- nullable=False)
+ product_offer_id = db.Column(
+ db.Integer,
+ db.ForeignKey("ProductOffer.id", ondelete="CASCADE"),
+ nullable=False
+ )
def __str__(self) -> str:
return (f"Price(id={self.id}, normal_price={self.normal_price},"
f"discount_price={self.discount_price}, on_sale={self.on_sale}"
- f"datetime={self.datetime}, product_offer_id={self.product_offer_id})")
+ f"datetime={self.datetime}, "
+ f"product_offer_id={self.product_offer_id})")
def get_effective_price(self) -> float:
"""Return the discounted price if on sale, else the normal price."""
@@ -99,9 +104,11 @@ class ProductOffer(db.Model): # type: ignore
__tablename__ = "ProductOffer"
id = db.Column(db.Integer, primary_key=True)
product_id = db.Column(db.Integer,
- db.ForeignKey("Product.id", ondelete="CASCADE"), nullable=False)
+ db.ForeignKey("Product.id", ondelete="CASCADE"),
+ nullable=False)
shop_id = db.Column(db.Integer,
- db.ForeignKey("Webshop.id", ondelete="CASCADE"), nullable=False)
+ db.ForeignKey("Webshop.id", ondelete="CASCADE"),
+ nullable=False)
url = db.Column(db.Unicode(1024), unique=True, nullable=False)
time_added = db.Column(db.DateTime)
average_price = db.Column(db.Float)
@@ -110,11 +117,12 @@ class ProductOffer(db.Model): # type: ignore
# TODO: Memoize current price with reference to the most recent Price entry
prices = db.relationship("Price", backref="product_offer", lazy=True,
- cascade="all, delete", passive_deletes=True)
+ cascade="all, delete", passive_deletes=True)
def __str__(self):
- return (f"ProductOffer(id={self.id}, product_id={self.product_id},"
- f"shop_id={self.shop_id}, url={self.url}, time_added={self.time_added})")
+ return (f"ProductOffer(id={self.id}, product_id={self.product_id}, "
+ f"shop_id={self.shop_id}, url={self.url}, "
+ f"time_added={self.time_added})")
def get_current_price(self) -> Price:
"""Get the latest Price object related to this offer."""
@@ -129,7 +137,10 @@ def get_current_price(self) -> Price:
return price
def update_average_price(self) -> float:
- """Calculate the average price of this offer and update ProductOffer.average_price."""
+ """
+ Calculate the average price of this offer and update
+ ProductOffer.average_price.
+ """
logging.debug("Updating average price for %s", self)
effective_price_values: List[float] = []
@@ -142,7 +153,7 @@ def update_average_price(self) -> float:
try:
effective_price_values.append(price.get_effective_price())
except NoEffectivePriceAvailableException:
- # Ignore price entries without a valid price in calculating the price.
+ # Ignore price entries without a valid price.
pass
try:
avg: float = statistics.mean(effective_price_values)
@@ -150,7 +161,8 @@ def update_average_price(self) -> float:
db.session.commit()
return avg
except statistics.StatisticsError:
- logging.debug("Called get_average_price for %s but no prices were found...", str(self))
+ logging.debug("Called get_average_price for %s but no prices were "
+ "found...", str(self))
return -1
def get_average_price(self) -> float:
@@ -175,8 +187,11 @@ def get_prices_since(self, since_time: datetime) -> list[Price]:
return prices_since_list
def get_lowest_price_since(self, since_time: datetime) -> float:
- """Return the lowest effective price of this offer since a specific time."""
- logging.debug("Calculating lowest price since %s for %s", since_time, self)
+ """
+ Return the lowest effective price of this offer since a specific time.
+ """
+ logging.debug("Calculating lowest price since %s for %s",
+ since_time, self)
min_price: float = maxsize
price: Price
@@ -207,8 +222,11 @@ def get_lowest_price(self) -> float:
return self.minimum_price
def get_highest_price_since(self, since_time: datetime) -> float:
- """Return the highest effective price of this offer since a specific time."""
- logging.debug("Calculating highest price since %s for %s", since_time, self)
+ """
+ Return the highest effective price of this offer since a specific time.
+ """
+ logging.debug("Calculating highest price since %s for %s",
+ since_time, self)
max_price: float = -1
price: Price
@@ -237,8 +255,12 @@ def get_highest_price(self) -> float:
"""
return self.maximum_price
- def get_price_standard_deviation_since(self, since_time: datetime) -> float:
- """Return the standard deviation of the effective price of this offer since a given date."""
+ def get_price_standard_deviation_since(self, since_time: datetime) \
+ -> float:
+ """
+ Return the standard deviation of the effective price of this offer
+ since a given date.
+ """
effective_prices: List[float] = []
price: Price
@@ -256,7 +278,9 @@ def get_price_standard_deviation_since(self, since_time: datetime) -> float:
return 0.0
def get_price_standard_deviation(self) -> float:
- """Return the standard deviation of the effective price of this offer."""
+ """
+ Return the standard deviation of the effective price of this offer.
+ """
return self.get_price_standard_deviation_since(self.time_added)
def update_memoized_values(self) -> None:
diff --git a/argostime/products.py b/argostime/products.py
index 20a341c..cf890e6 100644
--- a/argostime/products.py
+++ b/argostime/products.py
@@ -1,9 +1,8 @@
-#!/usr/bin/env python3
"""
products.py
- Abstraction layer between the crawler & database on one hand, and the actual web interface
- on the other.
+ Abstraction layer between the crawler & database on one hand, and the
+ actual web interface on the other.
Copyright (c) 2022 Martijn
@@ -23,15 +22,16 @@
along with Argostimè. If not, see .
"""
-from enum import Enum
+import urllib.parse
from datetime import datetime
+from enum import Enum
from typing import Tuple
-import urllib.parse
from argostime import db
+from argostime.crawler import CrawlResult, crawl_url, enabled_shops
from argostime.exceptions import WebsiteNotImplementedException
-from argostime.models import Webshop, Price, Product, ProductOffer
-from argostime.crawler import crawl_url, CrawlResult, enabled_shops
+from argostime.models import Price, Product, ProductOffer, Webshop
+
class ProductOfferAddResult(Enum):
"""Enum to indicate the result of add_product_offer"""
@@ -40,8 +40,12 @@ class ProductOfferAddResult(Enum):
ALREADY_EXISTS = 2
FAILED_404_NOT_FOUND = 3
-def add_product_offer_from_url(url: str) -> Tuple[ProductOfferAddResult, ProductOffer]:
- """Try to add a product offer to the database, add product and webshop if required.
+
+def add_product_offer_from_url(url: str) -> \
+ Tuple[ProductOfferAddResult, ProductOffer]:
+ """
+ Try to add a product offer to the database, add product and webshop if
+ required.
Returns a ProductOfferAddResult enum
"""
diff --git a/argostime/routes.py b/argostime/routes.py
index 3c1aa44..55f8555 100644
--- a/argostime/routes.py
+++ b/argostime/routes.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python3
"""
routes.py
@@ -22,22 +21,24 @@
along with Argostimè. If not, see .
"""
-from datetime import datetime
import logging
-from typing import List, Dict
import urllib.parse
-
-from flask import current_app as app
-from flask import render_template, abort, request, redirect
-from flask import Response
+from datetime import datetime
+from typing import Dict, List
from argostime import db
from argostime.exceptions import CrawlerException
from argostime.exceptions import PageNotFoundException
from argostime.exceptions import WebsiteNotImplementedException
from argostime.graphs import generate_price_graph_data
-from argostime.models import Webshop, Product, ProductOffer, Price
-from argostime.products import ProductOfferAddResult, add_product_offer_from_url
+from argostime.models import Price, Product, ProductOffer, Webshop
+from argostime.products import \
+ ProductOfferAddResult, add_product_offer_from_url
+
+from flask import Response
+from flask import abort, redirect, render_template, request
+from flask import current_app as app
+
def add_product_url(url):
"""Helper function for adding a product"""
@@ -47,30 +48,38 @@ def add_product_url(url):
hostname: str = urllib.parse.urlparse(url).netloc
if len(hostname) == 0:
hostname = url
- return render_template("add_product_result.html.jinja",
- result=f"Helaas wordt de website {hostname} nog niet ondersteund."), 400
+ return render_template(
+ "add_product_result.html.jinja",
+ result=f"Helaas wordt de website {hostname} nog niet ondersteund."
+ ), 400
except PageNotFoundException:
- return render_template("add_product_result.html.jinja",
- result=f"De pagina {url} kon niet worden gevonden."), 404
+ return render_template(
+ "add_product_result.html.jinja",
+ result=f"De pagina {url} kon niet worden gevonden."
+ ), 404
except CrawlerException as exception:
logging.info(
"Failed to add product from url %s, got CrawlerException %s",
url,
exception)
- return render_template("add_product_result.html.jinja",
- result=f"Het is niet gelukt om een product te vinden op de gegeven URL {url}."
- " Verwijst de link wel naar een productpagina?")
+ return render_template(
+ "add_product_result.html.jinja",
+ result=f"Het is niet gelukt om een product te vinden op de "
+ f"gegeven URL {url}. Verwijst de link wel naar een "
+ f"productpagina?"
+ )
if (
res == ProductOfferAddResult.ADDED
or
res == ProductOfferAddResult.ALREADY_EXISTS and offer is not None
- ):
+ ):
return redirect(f"/product/{offer.product.product_code}")
return render_template("add_product.html.jinja", result=str(res))
+
@app.route("/", methods=["GET", "POST"])
def index():
"""Render home page"""
@@ -85,7 +94,7 @@ def index():
discounts = db.session.scalars(
db.select(Price).where(
Price.datetime >= datetime.now().date(),
- Price.on_sale == True # pylint: disable=C0121
+ Price.on_sale is True # pylint: disable=C0121
)
).all()
@@ -101,6 +110,7 @@ def index():
discounts=discounts,
shops=shops)
+
@app.route("/product/")
def product_page(product_code):
"""Show the page for a specific product, with all known product offers"""
@@ -110,7 +120,8 @@ def product_page(product_code):
.where(Product.product_code == product_code)
).first()
- logging.debug("Rendering product page for %s based on product code %s", product, product_code)
+ logging.debug("Rendering product page for %s based on product code %s",
+ product, product_code)
if product is None:
abort(404)
@@ -126,6 +137,7 @@ def product_page(product_code):
p=product,
offers=offers)
+
@app.route("/productoffer//price_step_graph_data.json")
def offer_price_json(offer_id):
"""Generate the price step graph data of a specific offer"""
@@ -140,6 +152,7 @@ def offer_price_json(offer_id):
data: str = generate_price_graph_data(offer)
return Response(data, mimetype="application/json")
+
@app.route("/all_offers")
def all_offers():
"""Generate an overview of all available offers"""
@@ -165,6 +178,7 @@ def all_offers():
show_variance=show_variance
)
+
@app.route("/shop/")
def webshop_page(shop_id):
"""Show a page with all the product offers of a specific webshop"""
@@ -199,6 +213,7 @@ def webshop_page(shop_id):
show_variance=show_variance
)
+
@app.route("/add_url", methods=['GET'])
def add_url():
"""GET request to allow users to add a URL using a booklet"""
@@ -208,6 +223,7 @@ def add_url():
abort(404)
return add_product_url(url)
+
@app.errorhandler(404)
def not_found(error):
"""Return the 404 page"""
diff --git a/argostime/static/stylesheet.css b/argostime/static/stylesheet.css
index 05cebb2..712cc62 100644
--- a/argostime/static/stylesheet.css
+++ b/argostime/static/stylesheet.css
@@ -1,5 +1,5 @@
-body {
+body {
font: 1.2em/1.62 sans-serif;
margin: auto;
padding: 20px;
@@ -31,7 +31,7 @@ b {
}
a {
- color: #3273dc;
+ color: #3273dc;
}
.sale {
diff --git a/argostime/static/table-sort.js b/argostime/static/table-sort.js
index a911d10..e547302 100644
--- a/argostime/static/table-sort.js
+++ b/argostime/static/table-sort.js
@@ -1,15 +1,15 @@
-/*
+/*
table-sort-js 1.6.8
Author: Lee Wannacott
-Licence: MIT License Copyright (c) 2021 Lee Wannacott
-
+Licence: MIT License Copyright (c) 2021 Lee Wannacott
+
GitHub Repository: https://github.com/LeeWannacott/table-sort-js
npm package: https://www.npmjs.com/package/table-sort-js
Demo: https://leewannacott.github.io/Portfolio/#/GitHub
Install:
Frontend: or
-Download this file and add to your HTML
-Backend: npm install table-sort-js and use require("../node_modules/table-sort-js/table-sort.js")
+Download this file and add to your HTML
+Backend: npm install table-sort-js and use require("../node_modules/table-sort-js/table-sort.js")
Instructions:
Add class="table-sort" to tables you'd like to make sortable
Click on the table headers to sort them.
diff --git a/argostime_update_prices.py b/argostime_update_prices.py
old mode 100644
new mode 100755
index d1f2aeb..006e470
--- a/argostime_update_prices.py
+++ b/argostime_update_prices.py
@@ -22,12 +22,12 @@
along with Argostimè. If not, see .
"""
-import random
import logging
+import random
import time
-from argostime.models import ProductOffer
from argostime import create_app, db
+from argostime.models import ProductOffer
app = create_app()
app.app_context().push()
@@ -47,7 +47,8 @@
try:
offer.crawl_new_price()
except Exception as exception:
- logging.error("Received %s while updating price of %s, continuing...", exception, offer)
+ logging.error("Received %s while updating price of %s, continuing...",
+ exception, offer)
next_sleep_time: float = random.uniform(1, 180)
logging.debug("Sleeping for %f seconds", next_sleep_time)
diff --git a/argostime_update_prices_parallel.py b/argostime_update_prices_parallel.py
index 1bbedd1..795332c 100755
--- a/argostime_update_prices_parallel.py
+++ b/argostime_update_prices_parallel.py
@@ -22,17 +22,18 @@
along with Argostimè. If not, see .
"""
-import random
import logging
-from multiprocessing import Process
+import random
import time
+from multiprocessing import Process
-from argostime.models import ProductOffer, Webshop
from argostime import create_app, db
+from argostime.models import ProductOffer, Webshop
app = create_app()
app.app_context().push()
+
def update_shop_offers(shop_id: int) -> None:
"""Crawl all the offers of one shop"""
@@ -48,12 +49,14 @@ def update_shop_offers(shop_id: int) -> None:
try:
offer.crawl_new_price()
except Exception as exception:
- logging.error("Received %s while updating price of %s, continuing...", exception, offer)
+ logging.error("Received %s while updating price of %s, "
+ "continuing...", exception, offer)
next_sleep_time: float = random.uniform(1, 180)
logging.debug("Sleeping for %f seconds", next_sleep_time)
time.sleep(next_sleep_time)
+
if __name__ == "__main__":
shops: list[Webshop] = db.session.scalars(
diff --git a/check_url.py b/check_url.py
index 4816e18..8a06ca4 100755
--- a/check_url.py
+++ b/check_url.py
@@ -26,7 +26,6 @@
import traceback
from argostime.crawler.crawl_url import crawl_url
-
from argostime.crawler.crawl_utils import CrawlResult
# Print help message if needed...
@@ -37,7 +36,7 @@
# Just call the crawler with the url given by the user
try:
result: CrawlResult = crawl_url(sys.argv[1])
-except Exception as exception:
+except:
print("Exception thrown during crawling:", file=sys.stderr)
traceback.print_exc()
exit()
diff --git a/create_indexes.py b/create_indexes.py
index 4ef5794..a8ed09c 100755
--- a/create_indexes.py
+++ b/create_indexes.py
@@ -23,11 +23,10 @@
"""
import logging
-from sqlalchemy import text
-from sqlalchemy.exc import OperationalError
-
from argostime import create_app, db
-from argostime.models import ProductOffer, Product, Price, Webshop
+from argostime.models import Price, Product, ProductOffer, Webshop
+
+from sqlalchemy.exc import OperationalError
app = create_app()
app.app_context().push()
@@ -37,12 +36,12 @@
indexes = [
db.Index("idx_Price_datetime", Price.datetime),
db.Index("idx_Price_product_offer", Price.product_offer_id),
- db.Index("idx_Price_product_offer_id_datetime", Price.product_offer_id, Price.datetime),
+ db.Index("idx_Price_product_offer_id_datetime",
+ Price.product_offer_id, Price.datetime),
db.Index("idx_ProductOffer_shop_id", ProductOffer.shop_id),
db.Index("idx_ProductOffer_product_id", ProductOffer.product_id),
db.Index("idx_Webshop_hostname", Webshop.hostname),
db.Index("idx_Product_product_code", Product.product_code),
-
]
for index in indexes:
diff --git a/manual_update.py b/manual_update.py
index 8498f7a..003dabc 100755
--- a/manual_update.py
+++ b/manual_update.py
@@ -2,7 +2,8 @@
"""
manual_update.py
- Standalone script to manually update the price of a product offer by product_offer_id.
+ Standalone script to manually update the price of a product offer by
+ product_offer_id.
Copyright (c) 2022 Kevin
@@ -22,12 +23,13 @@
along with Argostimè. If not, see .
"""
-import sys
import logging
+import sys
from argostime import create_app, db
from argostime.models import ProductOffer
+
app = create_app()
app.app_context().push()
@@ -37,7 +39,10 @@
print("No number given")
sys.exit(-1)
-offer: ProductOffer = db.session.execute(db.select(ProductOffer).where(ProductOffer.id == product_offer_id)).scalar_one()
+offer: ProductOffer = \
+ db.session.execute(
+ db.select(ProductOffer).where(ProductOffer.id == product_offer_id)
+ ).scalar_one()
logging.debug("Found offer %s", product_offer_id)
logging.debug("Manually updating ProductOffer %s", offer)
@@ -45,4 +50,5 @@
try:
offer.crawl_new_price()
except Exception as exception:
- logging.error("Received %s while updating price of %s, continuing...", exception, offer)
+ logging.error("Received %s while updating price of %s, continuing...",
+ exception, offer)
diff --git a/migration_add_productoffer_avg_price_column.py b/migration_add_productoffer_avg_price_column.py
index 4a22ade..2a164f8 100755
--- a/migration_add_productoffer_avg_price_column.py
+++ b/migration_add_productoffer_avg_price_column.py
@@ -24,11 +24,12 @@
import logging
+from argostime import create_app, db
+from argostime.models import Product, ProductOffer
+
from sqlalchemy import text
from sqlalchemy.exc import OperationalError
-from argostime import create_app, db
-from argostime.models import ProductOffer, Product
app = create_app()
app.app_context().push()
@@ -36,17 +37,20 @@
logging.info("Adding average_price column")
try:
- db.session.execute(text('ALTER TABLE ProductOffer ADD COLUMN average_price float'))
+ db.session.execute(
+ text('ALTER TABLE ProductOffer ADD COLUMN average_price float'))
except OperationalError:
logging.info("Column already seems to exist, fine")
try:
- db.session.execute(text('ALTER TABLE ProductOffer ADD COLUMN minimum_price float'))
+ db.session.execute(
+ text('ALTER TABLE ProductOffer ADD COLUMN minimum_price float'))
except OperationalError:
logging.info("Column already seems to exist, fine")
try:
- db.session.execute(text('ALTER TABLE ProductOffer ADD COLUMN maximum_price float'))
+ db.session.execute(
+ text('ALTER TABLE ProductOffer ADD COLUMN maximum_price float'))
except OperationalError:
logging.info("Column already seems to exist, fine")
diff --git a/requirements.txt b/requirements.txt
index 4a0adae..3810bfd 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,4 +2,5 @@ requests>=2.27.1
beautifulsoup4>=4.10.0
Flask-SQLAlchemy>=2.5.1
gunicorn
-SQLAlchemy >= 2
+SQLAlchemy>=2
+pre-commit
diff --git a/requirements_development.txt b/requirements_development.txt
deleted file mode 100644
index a84e007..0000000
--- a/requirements_development.txt
+++ /dev/null
@@ -1,5 +0,0 @@
--r requirements.txt
-
-mypy >= 1.3.0
-pylint >= 2.17.4
-types-requests >= 2.31.0.1
\ No newline at end of file
diff --git a/tests/test_crawler.py b/tests/test_crawler.py
index 36b6cda..d201cb4 100644
--- a/tests/test_crawler.py
+++ b/tests/test_crawler.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python3
"""
test_crawler.py
@@ -8,11 +7,13 @@
import unittest
-from argostime.crawler import ParseProduct
import argostime.exceptions
+from argostime.crawler import crawl_url
+
class ParseProductTestCases(unittest.TestCase):
def test_not_implemented_website(self):
- with self.assertRaises(argostime.exceptions.WebsiteNotImplementedException):
- ParseProduct("https://example.com")
+ with self.assertRaises(
+ argostime.exceptions.WebsiteNotImplementedException):
+ crawl_url("https://example.com")
diff --git a/tests/test_products.py b/tests/test_products.py
index 8c24e85..cf5656b 100644
--- a/tests/test_products.py
+++ b/tests/test_products.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python3
"""
test_products.py
@@ -8,11 +7,14 @@
import unittest
-import argostime.products
import argostime.exceptions
+import argostime.products
+
class ProductsTestCases(unittest.TestCase):
def test_not_implemented_website(self):
- with self.assertRaises(argostime.exceptions.WebsiteNotImplementedException):
- argostime.products.add_product_offer_from_url("https://example.com")
+ with self.assertRaises(
+ argostime.exceptions.WebsiteNotImplementedException):
+ argostime.products.add_product_offer_from_url(
+ "https://example.com")
diff --git a/wsgi.py b/wsgi.py
old mode 100644
new mode 100755