From 447494266ee251b686dc8689c1c453bdc2cea01b Mon Sep 17 00:00:00 2001 From: david-i-berry Date: Fri, 6 Dec 2024 10:54:37 +0100 Subject: [PATCH] Addresses: - #32 (http 200 check) - #30 (integrity field) - #27 (hash function) Fixes path issues in Dockerfile. --- docker/Dockerfile | 11 ++++++----- wis2downloader/downloader/__init__.py | 28 ++++++++++++++++++++------- 2 files changed, 27 insertions(+), 12 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 18c1392..d296d81 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -44,10 +44,10 @@ RUN source /home/wis2downloader/.venv/bin/activate && \ USER root # Now copy files COPY . /home/wis2downloader/tmp -COPY ./docker/config/. /home/wis2downloader/app/config -COPY ./docker/entrypoint.sh /home/wis2downloader/app/entrypoint.sh -COPY ./docker/clean_downloads.cron /home/wis2downloader/app/clean_downloads.cron -COPY ./docker/clean_downloads.py /home/wis2downloader/app/clean_downloads.py +COPY config/. /home/wis2downloader/app/config +COPY entrypoint.sh /home/wis2downloader/app/entrypoint.sh +COPY clean_downloads.cron /home/wis2downloader/app/clean_downloads.cron +COPY clean_downloads.py /home/wis2downloader/app/clean_downloads.py # set ownership / permisssions RUN chown -R wis2downloader /home/wis2downloader/tmp && \ @@ -59,7 +59,8 @@ RUN chown -R wis2downloader /home/wis2downloader/tmp && \ USER wis2downloader WORKDIR /home/wis2downloader/tmp RUN source /home/wis2downloader/.venv/bin/activate && \ - python -m pip install --no-cache-dir . + python -m pip install wis2downloader + # clean up \ WORKDIR /home/wis2downloader/ RUN rm -R /home/wis2downloader/tmp diff --git a/wis2downloader/downloader/__init__.py b/wis2downloader/downloader/__init__.py index a05717b..8dfb680 100644 --- a/wis2downloader/downloader/__init__.py +++ b/wis2downloader/downloader/__init__.py @@ -135,10 +135,9 @@ def process_job(self, job) -> None: # Get information about the job for verification later expected_hash, hash_function = self.get_hash_info(job) - expected_size = job.get('payload', {}).get('content', {}).get('size') # Get the download url, update status, and file type from the job links - _url, update, media_type = self.get_download_url(job) + _url, update, media_type, expected_size = self.get_download_url(job) if _url is None: LOGGER.warning(f"No download link found in job {job}") @@ -182,6 +181,12 @@ def process_job(self, job) -> None: response = None try: response = self.http.request('GET', _url) + if response.status != 200: + LOGGER.error(f"Error fetching file from {_url}.") + LOGGER.error(f".... Status code: {response.status}") + LOGGER.error(f".... Content: {response.data}") + FAILED_DOWNLOADS.labels(topic=topic, centre_id=centre_id).inc(1) + return # Get the filesize in KB filesize = len(response.data) except Exception as e: @@ -230,7 +235,7 @@ def get_topic_and_centre(self, job) -> tuple: def get_hash_info(self, job): expected_hash = job.get('payload', {}).get( - 'properties', {}).get('integrity', {}).get('hash') + 'properties', {}).get('integrity', {}).get('value') hash_method = job.get('payload', {}).get( 'properties', {}).get('integrity', {}).get('method') @@ -238,8 +243,10 @@ def get_hash_info(self, job): # Check if hash method is known using our enumumeration of hash methods if hash_method in VerificationMethods._member_names_: + # get method method = VerificationMethods[hash_method].value - hash_function = hashlib.new(method) + # load and return from the hashlib library + hash_function = getattr(hashlib, method, None) return expected_hash, hash_function @@ -248,18 +255,21 @@ def get_download_url(self, job) -> tuple: _url = None update = False media_type = None + expected_size = None for link in links: if link.get('rel') == 'update': _url = link.get('href') media_type = link.get('type') + expected_size = link.get('length') update = True break elif link.get('rel') == 'canonical': _url = link.get('href') media_type = link.get('type') + expected_size = link.get('length') break - return _url, update, media_type + return _url, update, media_type, expected_size def extract_filename(self, _url) -> tuple: path = urlsplit(_url).path @@ -273,8 +283,12 @@ def validate_data(self, data, expected_hash, hash_function): return True - hash_value = hash_function(data).digest() - hash_value = base64.b64encode(hash_value).decode() + try: + hash_value = hash_function(data).digest() + hash_value = base64.b64encode(hash_value).decode() + except Exception as e: + LOGGER.error(e) + return False if (hash_value != expected_hash) or (len(data) != expected_size): return False