Skip to content

Commit

Permalink
Addresses:
Browse files Browse the repository at this point in the history
- #32 (http 200 check)
- #30 (integrity field)
- #27 (hash function)

Fixes path issues in Dockerfile.
  • Loading branch information
david-i-berry committed Dec 6, 2024
1 parent 3db05fc commit 4474942
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 12 deletions.
11 changes: 6 additions & 5 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,10 @@ RUN source /home/wis2downloader/.venv/bin/activate && \
USER root
# Now copy files
COPY . /home/wis2downloader/tmp
COPY ./docker/config/. /home/wis2downloader/app/config
COPY ./docker/entrypoint.sh /home/wis2downloader/app/entrypoint.sh
COPY ./docker/clean_downloads.cron /home/wis2downloader/app/clean_downloads.cron
COPY ./docker/clean_downloads.py /home/wis2downloader/app/clean_downloads.py
COPY config/. /home/wis2downloader/app/config
COPY entrypoint.sh /home/wis2downloader/app/entrypoint.sh
COPY clean_downloads.cron /home/wis2downloader/app/clean_downloads.cron
COPY clean_downloads.py /home/wis2downloader/app/clean_downloads.py

# set ownership / permisssions
RUN chown -R wis2downloader /home/wis2downloader/tmp && \
Expand All @@ -59,7 +59,8 @@ RUN chown -R wis2downloader /home/wis2downloader/tmp && \
USER wis2downloader
WORKDIR /home/wis2downloader/tmp
RUN source /home/wis2downloader/.venv/bin/activate && \
python -m pip install --no-cache-dir .
python -m pip install wis2downloader

# clean up \
WORKDIR /home/wis2downloader/
RUN rm -R /home/wis2downloader/tmp
Expand Down
28 changes: 21 additions & 7 deletions wis2downloader/downloader/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,10 +135,9 @@ def process_job(self, job) -> None:

# Get information about the job for verification later
expected_hash, hash_function = self.get_hash_info(job)
expected_size = job.get('payload', {}).get('content', {}).get('size')

# Get the download url, update status, and file type from the job links
_url, update, media_type = self.get_download_url(job)
_url, update, media_type, expected_size = self.get_download_url(job)

if _url is None:
LOGGER.warning(f"No download link found in job {job}")
Expand Down Expand Up @@ -182,6 +181,12 @@ def process_job(self, job) -> None:
response = None
try:
response = self.http.request('GET', _url)
if response.status != 200:
LOGGER.error(f"Error fetching file from {_url}.")
LOGGER.error(f".... Status code: {response.status}")
LOGGER.error(f".... Content: {response.data}")
FAILED_DOWNLOADS.labels(topic=topic, centre_id=centre_id).inc(1)
return
# Get the filesize in KB
filesize = len(response.data)
except Exception as e:
Expand Down Expand Up @@ -230,16 +235,18 @@ def get_topic_and_centre(self, job) -> tuple:

def get_hash_info(self, job):
expected_hash = job.get('payload', {}).get(
'properties', {}).get('integrity', {}).get('hash')
'properties', {}).get('integrity', {}).get('value')
hash_method = job.get('payload', {}).get(
'properties', {}).get('integrity', {}).get('method')

hash_function = None

# Check if hash method is known using our enumumeration of hash methods
if hash_method in VerificationMethods._member_names_:
# get method
method = VerificationMethods[hash_method].value
hash_function = hashlib.new(method)
# load and return from the hashlib library
hash_function = getattr(hashlib, method, None)

return expected_hash, hash_function

Expand All @@ -248,18 +255,21 @@ def get_download_url(self, job) -> tuple:
_url = None
update = False
media_type = None
expected_size = None
for link in links:
if link.get('rel') == 'update':
_url = link.get('href')
media_type = link.get('type')
expected_size = link.get('length')
update = True
break
elif link.get('rel') == 'canonical':
_url = link.get('href')
media_type = link.get('type')
expected_size = link.get('length')
break

return _url, update, media_type
return _url, update, media_type, expected_size

def extract_filename(self, _url) -> tuple:
path = urlsplit(_url).path
Expand All @@ -273,8 +283,12 @@ def validate_data(self, data, expected_hash,
hash_function):
return True

hash_value = hash_function(data).digest()
hash_value = base64.b64encode(hash_value).decode()
try:
hash_value = hash_function(data).digest()
hash_value = base64.b64encode(hash_value).decode()
except Exception as e:
LOGGER.error(e)
return False
if (hash_value != expected_hash) or (len(data) != expected_size):
return False

Expand Down

0 comments on commit 4474942

Please sign in to comment.