Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/qa/1.x' into qa/2.x
Browse files Browse the repository at this point in the history
  • Loading branch information
sevein committed Sep 12, 2024
2 parents ac21c1c + d6b3869 commit 183488e
Show file tree
Hide file tree
Showing 16 changed files with 161 additions and 209 deletions.
2 changes: 0 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@ RUN set -ex \
clamav \
coreutils \
ffmpeg \
fits \
g++ \
gcc \
gearman \
Expand All @@ -76,7 +75,6 @@ RUN set -ex \
md5deep \
mediaconch \
mediainfo \
nailgun \
nfs-common \
openjdk-8-jre-headless \
p7zip-full \
Expand Down
89 changes: 43 additions & 46 deletions internal/workflow/assets/workflow.json
Original file line number Diff line number Diff line change
Expand Up @@ -1861,15 +1861,15 @@
"config": {
"@manager": "linkTaskManagerFiles",
"@model": "StandardTaskConfig",
"arguments": "\"%relativeLocation%\" \"%SIPLogsDirectory%fileMeta/%fileUUID%.xml\" \"%date%\" \"%taskUUID%\" \"%fileUUID%\" \"%fileGrpUse%\"",
"execute": "FITS_v0.0",
"arguments": "\"%fileUUID%\" \"%SIPUUID%\"",
"execute": "characterizeFile_v0.0",
"filter_subdir": "objects/manualNormalization/preservation"
},
"description": {
"en": "Run FITS on manual normalized preservation files",
"no": "Kjør FITS på manuelt normaliserte bevaringsfiler",
"pt_BR": "Executar o FITS em arquivos de preservação normalizados manuaimente",
"sv": "Kör FITS på manuellt normaliserade bevarandefiler"
"en": "Characterize and extract metadata on manual normalized preservation files",
"no": "Karakteriser og hent ut metadata på manuelt normaliserte bevaringsfiler",
"pt_BR": "Caracterizar e extrair metadados em arquivos de preservação normalizados manuaimente",
"sv": "Karaktärisera och extrahera metadata på manuellt normaliserade bevarandefiler"
},
"exit_codes": {
"0": {
Expand All @@ -1882,7 +1882,7 @@
"group": {
"en": "Process manually normalized files",
"es": "Procesar manualmente ficheros normalizados",
"no": "Prosesser normaliserte filer manuelt",
"no": "Prosesser manuelt normaliserte filer",
"pt_BR": "Processar arquivos normalizados manualmente",
"sv": "Bearbeta manuellt normaliserade filer"
}
Expand Down Expand Up @@ -2807,11 +2807,11 @@
"exit_codes": {
"0": {
"job_status": "Completed successfully",
"link_id": "bd382151-afd0-41bf-bb7a-b39aef728a32"
"link_id": "1b1a4565-b501-407b-b40f-2f20889423f1"
}
},
"fallback_job_status": "Failed",
"fallback_link_id": "bd382151-afd0-41bf-bb7a-b39aef728a32",
"fallback_link_id": "1b1a4565-b501-407b-b40f-2f20889423f1",
"group": {
"en": "Extract packages",
"es": "Extraer paquetes",
Expand Down Expand Up @@ -7347,7 +7347,7 @@
}
},
"fallback_job_status": "Failed",
"fallback_link_id": "bd382151-afd0-41bf-bb7a-b39aef728a32",
"fallback_link_id": "1b1a4565-b501-407b-b40f-2f20889423f1",
"group": {
"en": "Extract packages",
"es": "Extraer paquetes",
Expand Down Expand Up @@ -8598,41 +8598,6 @@
"sv": "Normalisera"
}
},
"bd382151-afd0-41bf-bb7a-b39aef728a32": {
"config": {
"@manager": "linkTaskManagerFiles",
"@model": "StandardTaskConfig",
"arguments": "\"%relativeLocation%\" \"%SIPLogsDirectory%fileMeta/%fileUUID%.xml\" \"%date%\" \"%taskUUID%\" \"%fileUUID%\" \"%fileGrpUse%\"",
"execute": "FITS_v0.0",
"filter_subdir": "objects/attachments"
},
"description": {
"en": "Characterize and extract metadata for attachments",
"es": "Caracterizar y extraer los metadatos de los adjuntos",
"fr": "Caractériser et extraire les métadonnées pour mettre en pièces jointes",
"ja": "添付ファイルのメタデータの特徴付けと抽出",
"no": "Karakteriser og hent ut metadata fra vedlegg",
"pt_BR": "Caracterizar e extrair metadados para anexos",
"sv": "Karaktärisera och extrahera metadata för bilagor"
},
"exit_codes": {
"0": {
"job_status": "Completed successfully",
"link_id": "1b1a4565-b501-407b-b40f-2f20889423f1"
}
},
"fallback_job_status": "Failed",
"fallback_link_id": "61c316a6-0a50-4f65-8767-1f44b1eeb6dd",
"group": {
"en": "Characterize and extract metadata",
"es": "Caracterizar y extraer metadatos",
"fr": "Caractériser et extraire les métadonnées",
"ja": "メタデータの特徴付けと抽出",
"no": "Karakteriser og hent ut metadata",
"pt_BR": "Caracterizar e extrair metadados",
"sv": "Karaktärisera och extrahera metadata"
}
},
"bd792750-a55b-42e9-903a-8c898bb77df1": {
"config": {
"@manager": "linkTaskManagerDirectories",
Expand Down Expand Up @@ -8830,6 +8795,38 @@
"sv": "Byt namn på SIP-mappen med SIP UUID"
}
},
"bf0ea0f6-211b-4b34-8f25-8a68145403c8": {
"config": {
"@manager": "linkTaskManagerFiles",
"@model": "StandardTaskConfig",
"arguments": "\"True\" \"%relativeLocation%\" \"%fileUUID%\" --disable-reidentify",
"execute": "identifyFileFormat_v0.0",
"filter_subdir": "objects/manualNormalization/preservation"
},
"description": {
"en": "Identify file format",
"es": "Identificar formato de fichero",
"fr": "Identifier le format de fichier",
"no": "Identifiser filformat",
"pt_BR": "Identifique o formato do arquivo",
"sv": "Identifiera filformat"
},
"exit_codes": {
"0": {
"job_status": "Completed successfully",
"link_id": "10c40e41-fb10-48b5-9d01-336cd958afe8"
}
},
"fallback_job_status": "Failed",
"fallback_link_id": "10c40e41-fb10-48b5-9d01-336cd958afe8",
"group": {
"en": "Process manually normalized files",
"es": "Procesar manualmente ficheros normalizados",
"no": "Prosesser manuelt normaliserte filer",
"pt_BR": "Processar arquivos normalizados manualmente",
"sv": "Bearbeta manuellt normaliserade filer"
}
},
"c103b2fb-9a6b-4b68-8112-b70597a6cd14": {
"config": {
"@manager": "linkTaskManagerDirectories",
Expand Down Expand Up @@ -10161,7 +10158,7 @@
"exit_codes": {
"0": {
"job_status": "Completed successfully",
"link_id": "10c40e41-fb10-48b5-9d01-336cd958afe8"
"link_id": "bf0ea0f6-211b-4b34-8f25-8a68145403c8"
}
},
"fallback_job_status": "Failed",
Expand Down
2 changes: 2 additions & 0 deletions worker/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ module = [
"worker.clientScripts.policy_check",
"worker.clientScripts.transcribe_file",
"worker.clientScripts.validate_file",
"worker.utils.executeOrRunSubProcess",
"worker.tests.conftest",
"worker.tests.test_characterize_file",
"worker.tests.test_has_packages",
Expand All @@ -103,6 +104,7 @@ module = [
"worker.tests.test_policy_check",
"worker.tests.test_transcribe_file",
"worker.tests.test_validate_file",
"worker.tests.utils.test_execute_function",
]
check_untyped_defs = true
disallow_any_generics = true
Expand Down
2 changes: 1 addition & 1 deletion worker/tests/fixtures/reingest-checksum.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
"fields": {
"event_type": "message digest calculation",
"event_id": "5a561b24-cc7e-4032-b005-f75de2ec558a",
"agents": [1,2],
"agents": [2,3],
"event_detail": "program=\"python\"; module=\"hashlib.md5()\"",
"file_uuid": "ae8d4290-fe52-4954-b72a-0f591bee2e2f",
"event_outcome_detail": "ac63a92ba5a94c337e740d6f189200d0",
Expand Down
2 changes: 1 addition & 1 deletion worker/tests/fixtures/reingest-file-id.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"fields": {
"event_type": "format identification",
"event_id": "44455720-d9a7-43a4-90b6-a9cdf6e5c8cc",
"agents": [1,2,3],
"agents": [2,3],
"event_detail": "program=\"Fido\"; version=\"1.2\"",
"file_uuid": "ae8d4290-fe52-4954-b72a-0f591bee2e2f",
"event_outcome_detail": "fmt/9000",
Expand Down
8 changes: 4 additions & 4 deletions worker/tests/fixtures/reingest-preservation.json
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
"fields": {
"event_type": "normalization",
"event_id": "291f9be4-d19a-4bcc-8e1c-d3f01e4a48b1",
"agents": [1,2,3],
"agents": [2,3],
"event_detail": "ArchivematicaFPRCommandID=\"a34ddc9b-c922-4bb6-8037-bbe713332175\"; program=\"convert\"; version=\"Version: ImageMagick 6.7.7-10 2014-03-06 Q16 http://www.imagemagick.org\"\n",
"file_uuid": "ae8d4290-fe52-4954-b72a-0f591bee2e2f",
"event_outcome_detail": "%SIPDirectory%objects/evelyn_s_photo-d8cc7af7-284a-42f5-b7f4-e181a0efc35f.tif",
Expand Down Expand Up @@ -59,7 +59,7 @@
"fields": {
"event_type": "creation",
"event_id": "a89e6b45-1ac0-49cc-9dda-a4d11ed63f2f",
"agents": [1,2,3],
"agents": [2,3],
"event_detail": "",
"file_uuid": "d8cc7af7-284a-42f5-b7f4-e181a0efc35f",
"event_outcome_detail": "",
Expand All @@ -73,7 +73,7 @@
"fields": {
"event_type": "message digest calculation",
"event_id": "5c505f21-4e9a-49aa-b7dd-ed699fd4f8ef",
"agents": [1,2,3],
"agents": [2,3],
"event_detail": "program=\"python\"; module=\"hashlib.sha256()\"",
"file_uuid": "d8cc7af7-284a-42f5-b7f4-e181a0efc35f",
"event_outcome_detail": "d82448f154b9185bc777ecb0a3602760eb76ba85dd3098f073b2c91a03f571e9",
Expand All @@ -87,7 +87,7 @@
"fields": {
"event_type": "fixity check",
"event_id": "94ca2dee-b136-4a74-b477-a0b938bb49e9",
"agents": [1,2,3],
"agents": [2,3],
"event_detail": "program=\"python\"; module=\"hashlib.sha256()\"",
"file_uuid": "d8cc7af7-284a-42f5-b7f4-e181a0efc35f",
"event_outcome_detail": "d82448f154b9185bc777ecb0a3602760eb76ba85dd3098f073b2c91a03f571e9 verified",
Expand Down
1 change: 1 addition & 0 deletions worker/tests/test_normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -696,6 +696,7 @@ def test_normalization_fallbacks_to_default_thumbnail_rule_if_initial_command_fa
fprule_thumbnail: fprmodels.FPRule,
fpcommand_thumbnail: fprmodels.FPCommand,
fprule_default_thumbnail: fprmodels.FPRule,
settings: pytest_django.fixtures.SettingsWrapper,
) -> None:
expected_thumbnail_content = b"thumbnail image content"
expected_thumbnail_path = (
Expand Down
14 changes: 12 additions & 2 deletions worker/tests/test_reingest_mets.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,12 @@
class TestUpdateObject(TestCase):
"""Test updating the PREMIS:OBJECT in the techMD. (update_object)."""

fixture_files = ["sip-reingest.json", "files.json", "events-reingest.json"]
fixture_files = [
"agents.json",
"sip-reingest.json",
"files.json",
"events-reingest.json",
]
fixtures = [os.path.join(FIXTURES_DIR, p) for p in fixture_files]

def setUp(self):
Expand Down Expand Up @@ -1840,7 +1845,12 @@ def test_new_preservation_file(self):
class TestDeleteFiles(TestCase):
"""Test marking files in the METS as deleted. (delete_files)"""

fixture_files = ["sip-reingest.json", "files.json", "events-reingest.json"]
fixture_files = [
"agents.json",
"sip-reingest.json",
"files.json",
"events-reingest.json",
]
fixtures = [os.path.join(FIXTURES_DIR, p) for p in fixture_files]

sip_uuid = "4060ee97-9c3f-4822-afaf-ebdf838284c3"
Expand Down
30 changes: 27 additions & 3 deletions worker/tests/utils/test_execute_functions.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import pathlib
import shlex
import tempfile
from typing import Generator
from unittest.mock import ANY
from unittest.mock import Mock
from unittest.mock import patch

import pytest
Expand All @@ -9,7 +12,7 @@
from worker.utils.executeOrRunSubProcess import launchSubProcess


def test_capture_output():
def test_capture_output() -> None:
"""Tests behaviour of capture_output when executing sub processes."""

# Test that stdout and stderr are not captured by default
Expand Down Expand Up @@ -51,7 +54,7 @@ def test_capture_output():


@pytest.fixture
def temp_path(tmp_path):
def temp_path(tmp_path: pathlib.Path) -> Generator[str, None, None]:
"""Creates custom temp path, yields the value, and resets to original value."""

original_tempdir = tempfile.tempdir
Expand All @@ -63,7 +66,9 @@ def temp_path(tmp_path):


@patch("worker.utils.executeOrRunSubProcess.launchSubProcess")
def test_createAndRunScript_creates_tmpfile_in_custom_dir(launchSubProcess, temp_path):
def test_createAndRunScript_creates_tmpfile_in_custom_dir(
launchSubProcess: Mock, temp_path: str
) -> None:
"""Tests execution of launchSubProcess when executing createAndRunScript."""

script_content = "#!/bin/bash\necho 'Script output'\nexit 0"
Expand All @@ -79,3 +84,22 @@ def test_createAndRunScript_creates_tmpfile_in_custom_dir(launchSubProcess, temp
)
args, _ = launchSubProcess.call_args
assert args[0][0].startswith(temp_path)


@patch("subprocess.Popen")
def test_launchSubProcess_replaces_non_utf8_output_with_replacement_characters(
popen: Mock,
) -> None:
communicate_return_code = 0
communicate_output = b"Output \xae"
communicate_error = b"Error \xae"
popen.return_value = Mock(
returncode=communicate_return_code,
**{"communicate.return_value": (communicate_output, communicate_error)},
)

code, stdout, stderr = launchSubProcess("mycommand", capture_output=True)

assert code == communicate_return_code
assert stdout == communicate_output.decode(errors="replace")
assert stderr == communicate_error.decode(errors="replace")
1 change: 0 additions & 1 deletion worker/worker/client/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
"archivematicaclamscan_v0.0": "archivematica_clamscan",
"createevent_v0.0": "create_event",
"examinecontents_v0.0": "examine_contents",
"fits_v0.0": "fits",
"identifydspacefiles_v0.0": "identify_dspace_files",
"identifydspacemetsfiles_v0.0": "identify_dspace_mets_files",
"identifyfileformat_v0.0": "identify_file_format",
Expand Down
2 changes: 1 addition & 1 deletion worker/worker/clientScripts/characterize_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# b) Prints the tool's stdout, for tools which do not output XML
#
# If a tool has no defined characterization commands, then the default
# will be run instead (currently FITS).
# will be run instead.
import argparse
import dataclasses
import multiprocessing
Expand Down
Loading

0 comments on commit 183488e

Please sign in to comment.