Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add test for JHOVE module validation #216

Merged
merged 9 commits into from
Feb 26, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions features/black_box/validation.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
@black-box
Feature: Alma wants to ensure that JHOVE validation in Archivematica works correctly when files are validated, and that transfers fail when they contain unvalidated files.

Scenario: Validation for a transfer fail
Given a "standard" transfer type located in "SampleTransfers/JHOVEModulesValidation"
When the transfer compliance is verified
Then the "Identify file format" microservice completes successfully
And the "Validate formats" job fails
And 17 "Validate formats" transfer tasks were executed
And 8 "Validate formats" transfer tasks failed
And 9 "Validate formats" transfer tasks succeeded
And the "Validation" microservice is executed
And 1 AIFF file(s) failed
And 1 AIFF file(s) succeeded
And 1 GIF file(s) failed
And 1 GIF file(s) succeeded
And 1 JP2 file(s) failed
And 1 JP2 file(s) succeeded
And 1 JPG file(s) failed
And 1 JPG file(s) succeeded
And 1 PDF file(s) failed
And 1 PDF file(s) succeeded
And 1 TIF file(s) failed
And 1 TIF file(s) succeeded
And 1 WARC file(s) failed
And 1 WARC file(s) succeeded
And 1 WAV file(s) failed
And 1 WAV file(s) succeeded
And the AIP can be successfully stored
And there are 16 original objects in the AIP METS with a validation event


89 changes: 89 additions & 0 deletions features/steps/black_box_steps.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
"""

import os
import pathlib

import metsrw
from behave import given
Expand Down Expand Up @@ -1527,3 +1528,91 @@ def step(context):
raise AssertionError(error)
else:
assert uses_order_indexes == sorted(uses_order_indexes), error


def get_unit_uuid(context, unit_type):
return (
context.current_transfer["transfer_uuid"]
if unit_type == "transfer"
else context.current_transfer["sip_uuid"]
)


@then('{task_count:d} "{job_name}" {unit_type} tasks were executed')
def step_impl(context, task_count, job_name, unit_type):
unit_uuid = get_unit_uuid(context, unit_type)
jobs = utils.get_jobs(context.api_clients_config, unit_uuid, job_name=job_name)
assert len(jobs), f"No jobs found for unit {unit_uuid}"

task_size = sum([len(job["tasks"]) for job in jobs])
assert task_size == task_count, (
f"Expected {task_count} tasks to be executed for unit {unit_uuid}, got {task_size} instead."
)


@then('{task_count:d} "{job_name}" {unit_type} tasks failed')
def step_impl(context, task_count, job_name, unit_type):
unit_uuid = get_unit_uuid(context, unit_type)
jobs = utils.get_jobs(context.api_clients_config, unit_uuid, job_name=job_name)
assert len(jobs), f"No jobs found for unit {unit_uuid}"

fail_task = 0
for job in jobs:
for task in job["tasks"]:
if task["exit_code"] == 1:
fail_task += 1
assert fail_task == task_count, (
f"Expected {fail_task} failed tasks for unit {unit_uuid}, but found {task_count}."
)


@then('{task_count:d} "{job_name}" {unit_type} tasks succeeded')
def step_impl(context, task_count, job_name, unit_type):
unit_uuid = get_unit_uuid(context, unit_type)
jobs = utils.get_jobs(context.api_clients_config, unit_uuid, job_name=job_name)
assert len(jobs), f"No jobs found for unit {unit_uuid}"

success_task = 0
for job in jobs:
for task in job["tasks"]:
if task["exit_code"] == 0:
success_task += 1
assert success_task == task_count, (
f"Expected {success_task} successful tasks for unit {unit_uuid}, but found {task_count}."
)


@then("{file_count:d} {file_extension} file(s) {status}")
def step_impl(context, file_count, file_extension, status):
unit_uuid = context.current_transfer["transfer_uuid"]
jobs = utils.get_jobs(
context.api_clients_config,
unit_uuid,
job_name="Validate formats",
job_microservice="Validation",
detailed_task=True,
)
assert len(jobs), f"No jobs found for unit {unit_uuid}"

status_to_task_exit_codes = {
"failed": (1, 179),
"succeeded": (0,),
}
assert status in status_to_task_exit_codes, (
f"The requested status {status} does not match the task exit code for the unit {unit_uuid}."
)

expected_exit_codes = status_to_task_exit_codes[status]

total = 0
for job in jobs:
for task in job["tasks"]:
if (
f".{file_extension.lower()}"
== pathlib.Path(task["file_name"]).suffix.lower()
and task["exit_code"] in expected_exit_codes
):
total += 1
assert file_count == total, (
f"Expected {file_count} {file_extension} file(s) {status} during file format validation for unit {unit_uuid}, but got {total} instead."
)
3 changes: 3 additions & 0 deletions features/steps/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -871,6 +871,7 @@ def get_jobs(
job_microservice=None,
job_link_uuid=None,
job_name=None,
detailed_task=None,
):
am = configure_am_client(api_clients_config[AM_API_CONFIG_KEY])
am.unit_uuid = unit_uuid
Expand All @@ -880,6 +881,8 @@ def get_jobs(
am.job_link_uuid = job_link_uuid
if job_name is not None:
am.job_name = job_name
if detailed_task is not None:
am.detailed = detailed_task
return call_api_endpoint(
endpoint=am.get_jobs,
warning_message="Cannot check job status",
Expand Down
73 changes: 37 additions & 36 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,29 +4,29 @@
#
# pip-compile --allow-unsafe --output-file=requirements-dev.txt requirements-dev.in
#
amclient==1.4.0
# via -r requirements.txt
amclient @ git+https://github.com/artefactual-labs/amclient.git@b34ae76a584b5014d24adbc21067e3d0218a2a93
# via -r /src/requirements.txt
attrs==25.1.0
# via
# -r requirements.txt
# -r /src/requirements.txt
# outcome
# trio
behave==1.2.6
# via -r requirements.txt
# via -r /src/requirements.txt
build==1.2.2.post1
# via pip-tools
cachetools==5.5.1
# via tox
certifi==2025.1.31
# via
# -r requirements.txt
# -r /src/requirements.txt
# requests
# selenium
chardet==5.2.0
# via tox
charset-normalizer==3.4.1
# via
# -r requirements.txt
# -r /src/requirements.txt
# requests
click==8.1.8
# via pip-tools
Expand All @@ -36,7 +36,7 @@ distlib==0.3.9
# via virtualenv
exceptiongroup==1.2.2
# via
# -r requirements.txt
# -r /src/requirements.txt
# trio
# trio-websocket
filelock==3.17.0
Expand All @@ -45,41 +45,42 @@ filelock==3.17.0
# virtualenv
h11==0.14.0
# via
# -r requirements.txt
# -r /src/requirements.txt
# wsproto
idna==3.10
# via
# -r requirements.txt
# -r /src/requirements.txt
# requests
# trio
importlib-metadata==8.6.1
# via build
lxml==5.3.0
lxml==5.3.1
# via
# -r requirements.txt
# -r /src/requirements.txt
# metsrw
metsrw==0.6.1
# via -r requirements.txt
# via -r /src/requirements.txt
outcome==1.3.0.post0
# via
# -r requirements.txt
# -r /src/requirements.txt
# trio
# trio-websocket
packaging==24.2
# via
# build
# pyproject-api
# tox
parse==1.20.2
# via
# -r requirements.txt
# -r /src/requirements.txt
# behave
# parse-type
parse-type==0.6.4
# via
# -r requirements.txt
# -r /src/requirements.txt
# behave
pexpect==4.9.0
# via -r requirements.txt
# via -r /src/requirements.txt
pip-tools==7.4.1
# via -r requirements-dev.in
platformdirs==4.3.6
Expand All @@ -90,7 +91,7 @@ pluggy==1.5.0
# via tox
ptyprocess==0.7.0
# via
# -r requirements.txt
# -r /src/requirements.txt
# pexpect
pyproject-api==1.9.0
# via tox
Expand All @@ -100,31 +101,31 @@ pyproject-hooks==1.2.0
# pip-tools
pysocks==1.7.1
# via
# -r requirements.txt
# -r /src/requirements.txt
# urllib3
requests==2.32.3
# via
# -r requirements.txt
# -r /src/requirements.txt
# amclient
ruff==0.9.5
ruff==0.9.6
# via -r requirements-dev.in
selenium==4.28.1
# via -r requirements.txt
selenium==4.29.0
# via -r /src/requirements.txt
six==1.17.0
# via
# -r requirements.txt
# -r /src/requirements.txt
# behave
# parse-type
sniffio==1.3.1
# via
# -r requirements.txt
# -r /src/requirements.txt
# trio
sortedcontainers==2.4.0
# via
# -r requirements.txt
# -r /src/requirements.txt
# trio
tenacity==9.0.0
# via -r requirements.txt
# via -r /src/requirements.txt
tomli==2.2.1
# via
# build
Expand All @@ -133,43 +134,43 @@ tomli==2.2.1
# tox
tox==4.24.1
# via -r requirements-dev.in
trio==0.28.0
trio==0.29.0
# via
# -r requirements.txt
# -r /src/requirements.txt
# selenium
# trio-websocket
trio-websocket==0.11.1
trio-websocket==0.12.1
# via
# -r requirements.txt
# -r /src/requirements.txt
# selenium
typing-extensions==4.12.2
# via
# -r requirements.txt
# -r /src/requirements.txt
# selenium
# tox
urllib3[socks]==2.3.0
# via
# -r requirements.txt
# -r /src/requirements.txt
# amclient
# requests
# selenium
virtualenv==20.29.1
virtualenv==20.29.2
# via tox
websocket-client==1.8.0
# via
# -r requirements.txt
# -r /src/requirements.txt
# selenium
wheel==0.45.1
# via pip-tools
wsproto==1.2.0
# via
# -r requirements.txt
# -r /src/requirements.txt
# trio-websocket
zipp==3.21.0
# via importlib-metadata

# The following packages are considered to be unsafe in a requirements file:
pip==25.0
pip==25.0.1
# via pip-tools
setuptools==75.8.0
# via pip-tools
2 changes: 1 addition & 1 deletion requirements.in
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
amclient
git+https://github.com/artefactual-labs/amclient.git@b34ae76a584b5014d24adbc21067e3d0218a2a93#egg=amclient
behave
lxml
metsrw
Expand Down
Loading