Skip to content

Commit

Permalink
Use clamav-client instead of clamd
Browse files Browse the repository at this point in the history
  • Loading branch information
sevein committed Sep 12, 2024
1 parent 476e00c commit 91b9c34
Show file tree
Hide file tree
Showing 7 changed files with 162 additions and 727 deletions.
4 changes: 2 additions & 2 deletions justfile
Original file line number Diff line number Diff line change
Expand Up @@ -98,5 +98,5 @@ worker-test-application *args:
uv run pytest {{args}}
# Run pre-commit.
pre-commit:
uvx pre-commit run --all-files
pre-commit *args:
uvx pre-commit run --all-files {{args}}
42 changes: 14 additions & 28 deletions worker/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ dependencies = [
"agentarchives>=0.9.0",
"ammcpc>=0.2.0",
"bagit",
"clamd>=1.0.2",
"clamav-client>=0.5.0",
"django-autoslug>=1.9.9",
"django-tastypie>=0.14.7",
"gearman3",
Expand Down Expand Up @@ -59,9 +59,7 @@ omit = [
"**/wsgi.py",
"**/manage.py",
]
include = [
"**/worker/*",
]
include = ["**/worker/*"]
branch = true

[tool.ruff]
Expand All @@ -72,22 +70,8 @@ target-version = "py38"

[tool.ruff.lint]
# Rule reference: https://docs.astral.sh/ruff/rules/
select = [
"B",
"C4",
"E",
"F",
"I",
"UP",
"W",
]
ignore = [
"B018",
"B904",
"E402",
"E501",
"UP031",
]
select = ["B", "C4", "E", "F", "I", "UP", "W"]
ignore = ["B018", "B904", "E402", "E501", "UP031"]

[tool.ruff.lint.per-file-ignores]
"settings/*" = ["F403"]
Expand All @@ -104,21 +88,23 @@ warn_unused_configs = true
[[tool.mypy.overrides]]
module = [
"worker.client.*",
"worker.clientScripts.archivematica_clamscan",
"worker.clientScripts.characterize_file",
"worker.clientScripts.has_packages",
"worker.clientScripts.identify_file_format",
"worker.clientScripts.normalize",
"worker.clientScripts.policy_check",
"worker.clientScripts.transcribe_file",
"worker.clientScripts.validate_file",
"tests.conftest",
"tests.test_characterize_file",
"tests.test_has_packages",
"tests.test_identify_file_format",
"tests.test_normalize",
"tests.test_policy_check",
"tests.test_transcribe_file",
"tests.test_validate_file",
"worker.tests.conftest",
"worker.tests.test_antivirus",
"worker.tests.test_characterize_file",
"worker.tests.test_has_packages",
"worker.tests.test_identify_file_format",
"worker.tests.test_normalize",
"worker.tests.test_policy_check",
"worker.tests.test_transcribe_file",
"worker.tests.test_validate_file",
]
check_untyped_defs = true
disallow_any_generics = true
Expand Down
211 changes: 19 additions & 192 deletions worker/tests/test_antivirus.py
Original file line number Diff line number Diff line change
@@ -1,199 +1,26 @@
"""Tests for the archivematica_clamscan.py client script."""

from collections import OrderedDict
from collections import namedtuple
from unittest import mock

import pytest

from tests import test_antivirus_clamdscan
from worker.client.job import Job
from worker.clientScripts import archivematica_clamscan


def test_get_scanner(settings):
"""Test that get_scanner returns the correct instance of antivirus
per the user's configuration. Test return of clamdscanner by default."""

# Ensure that environment settings are available to the mock classes.
test_antivirus_clamdscan.setup_clamdscanner(settings)

# Testing to ensure clamscanner is returned when explicitly set.
settings.CLAMAV_CLIENT_BACKEND = "clamscanner"
scanner = archivematica_clamscan.get_scanner()
assert isinstance(scanner, archivematica_clamscan.ClamScanner)

# Testing to ensure that clamdscanner is returned when explicitly set.
settings.CLAMAV_CLIENT_BACKEND = "clamdscanner"
scanner = archivematica_clamscan.get_scanner()
assert isinstance(scanner, archivematica_clamscan.ClamdScanner)

# Testing to ensure that clamdscanner is the default returned scanner.
settings.CLAMAV_CLIENT_BACKEND = "fprot"
scanner = archivematica_clamscan.get_scanner()
assert isinstance(scanner, archivematica_clamscan.ClamdScanner)

# Testing to ensure that clamdscanner is the default returned scanner when
# the user configures an empty string.
settings.CLAMAV_CLIENT_BACKEND = ""
scanner = archivematica_clamscan.get_scanner()
assert isinstance(scanner, archivematica_clamscan.ClamdScanner)

# Testing to ensure that clamdscanner is returned when the environment
# hasn't been configured appropriately and None is returned.
settings.CLAMAV_CLIENT_BACKEND = None
scanner = archivematica_clamscan.get_scanner()
assert isinstance(scanner, archivematica_clamscan.ClamdScanner)

# Testing to ensure that clamdscanner is returned when another variable
# type is specified, e.g. in this instance, an integer.
settings.CLAMAV_CLIENT_BACKEND = 10
scanner = archivematica_clamscan.get_scanner()
assert isinstance(scanner, archivematica_clamscan.ClamdScanner)


args = OrderedDict()
args["file_uuid"] = "ec26199f-72a4-4fd8-a94a-29144b02ddd8"
args["path"] = "/path"
args["date"] = "2019-12-01"
args["task_uuid"] = "c380e94e-7a7b-4ab8-aa72-ec0644cc3f5d"


class FileMock:
def __init__(self, size):
self.size = size


class ScannerMock(archivematica_clamscan.ScannerBase):
PROGRAM = "Mock"

def __init__(self, should_except=False, passed=False):
self.should_except = should_except
self.passed = passed

def scan(self, path):
if self.should_except:
raise Exception("Something really bad happened!")
return self.passed, None, None

def version_attrs(self):
return ("version", "virus_definitions")


def setup_test_scan_file_mocks(
mocker,
file_already_scanned=False,
file_size=1024,
scanner_should_except=False,
scanner_passed=False,
):
deps = namedtuple("deps", ["file_already_scanned", "file_get", "scanner"])(
file_already_scanned=mocker.patch(
"worker.clientScripts.archivematica_clamscan.file_already_scanned",
return_value=file_already_scanned,
),
file_get=mocker.patch(
"worker.main.models.File.objects.get", return_value=FileMock(size=file_size)
),
scanner=ScannerMock(should_except=scanner_should_except, passed=scanner_passed),
)

mocker.patch(
"worker.clientScripts.archivematica_clamscan.get_scanner",
return_value=deps.scanner,
from worker.main.models import File


@pytest.mark.django_db
def test_antivirus(sip_file: File) -> None:
job = mock.Mock(
args=[
"archivematica_clamscan.py",
str(sip_file.pk),
"path",
"date",
"task_uuid_not_used",
],
JobContext=mock.MagicMock(),
spec=Job,
)

return deps


def test_scan_file_already_scanned(mocker):
deps = setup_test_scan_file_mocks(mocker, file_already_scanned=True)

exit_code = archivematica_clamscan.scan_file([], **dict(args))

assert exit_code == 0
deps.file_already_scanned.assert_called_once_with(args["file_uuid"])


QueueEventParams = namedtuple("QueueEventParams", ["scanner_is_None", "passed"])


@pytest.mark.parametrize(
"setup_kwargs, exit_code, queue_event_params",
[
# File size too big for given file_size param
(
{"file_size": 43, "scanner_passed": None},
0,
QueueEventParams(scanner_is_None=None, passed=None),
),
# File size too big for given file_scan param
(
{"file_size": 85, "scanner_passed": None},
0,
QueueEventParams(scanner_is_None=None, passed=None),
),
# File size within given file_size param, and file_scan param
(
{"file_size": 42, "scanner_passed": True},
0,
QueueEventParams(scanner_is_None=False, passed=True),
),
# Scan returns None with no-error, e.g. Broken Pipe
(
{"scanner_passed": None},
0,
QueueEventParams(scanner_is_None=None, passed=None),
),
# Zero byte file passes
(
{"file_size": 0, "scanner_passed": True},
0,
QueueEventParams(scanner_is_None=False, passed=True),
),
# Virus found
(
{"scanner_passed": False},
1,
QueueEventParams(scanner_is_None=False, passed=False),
),
# Passed
(
{"scanner_passed": True},
0,
QueueEventParams(scanner_is_None=False, passed=True),
),
],
)
def test_scan_file(mocker, setup_kwargs, exit_code, queue_event_params, settings):
setup_test_scan_file_mocks(mocker, **setup_kwargs)

# Here the user configurable thresholds for maimum file size, and maximum
# scan size are being tested. The scan size is offset so as to enable the
# test to fall through correctly and eventually return None for
# not-scanned.
settings.CLAMAV_CLIENT_MAX_FILE_SIZE = 42
settings.CLAMAV_CLIENT_MAX_SCAN_SIZE = 84

event_queue = []

ret = archivematica_clamscan.scan_file(event_queue, **dict(args))

# The integer returned by scan_file() is going to be used as the exit code
# of the archivematica_clamscan.py script which is important for the AM
# workflow in order to control what to do next.
assert exit_code == ret

# A side effect of scan_file() is to queue an event to be created in the
# database.
if queue_event_params.passed is None:
assert len(event_queue) == 0
else:
assert len(event_queue) == 1
archivematica_clamscan.call([job])

event = event_queue[0]
assert event["eventType"] == "virus check"
assert event["fileUUID"] == args["file_uuid"]
assert (
event["eventOutcome"] == "Pass"
if setup_kwargs["scanner_passed"]
else "Fail"
)
job.set_status.assert_called_once_with(1)
Loading

0 comments on commit 91b9c34

Please sign in to comment.