diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000..fc4b161 --- /dev/null +++ b/.flake8 @@ -0,0 +1,3 @@ +[flake8] +ignore = E501 +exclude = .git,__pycache__,sip_assembly/migrations diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..6295a14 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,16 @@ +repos: +- repo: https://github.com/pre-commit/mirrors-autopep8 + rev: v1.5 + hooks: + - id: autopep8 + args: + - --in-place + - --aggressive +- repo: https://gitlab.com/pycqa/flake8 + rev: 3.7.9 + hooks: + - id: flake8 +- repo: git://github.com/doublify/pre-commit-isort + rev: v4.3.0 + hooks: + - id: isort diff --git a/.travis.yml b/.travis.yml index 68a392f..28382b1 100644 --- a/.travis.yml +++ b/.travis.yml @@ -13,13 +13,15 @@ before_install: - cp fornax/config.py.example fornax/config.py - docker-compose up -d - sleep 20s - - docker-compose exec fornax-web pip install coverage + - docker-compose exec fornax-web pip install coverage pre-commit + - docker-compose exec fornax-web pre-commit install install: true before_script: - curl -L https://codeclimate.com/downloads/test-reporter/test-reporter-latest-linux-amd64 > ./cc-test-reporter - chmod +x ./cc-test-reporter - ./cc-test-reporter before-build script: + - docker-compose exec fornax-web pre-commit run --all-files --show-diff-on-failure - docker-compose exec fornax-web coverage run manage.py test after_script: - docker-compose exec fornax-web coverage xml diff --git a/README.md b/README.md index da2417b..8aa32be 100644 --- a/README.md +++ b/README.md @@ -54,8 +54,6 @@ fornax has six services, all of which are exposed via HTTP endpoints (see [Route * Cleanup - removes files from the destination directory. * Request Cleanup - sends a POST request to another service requesting cleanup of the source directory. fornax only has read access for this directory. - ![SIP Assembly diagram](fornax-services.png) - For an example of the data fornax expects to receive (both bags and JSON), see the `fixtures/` directory diff --git a/docker-compose.yml b/docker-compose.yml index e53ef36..506406e 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -10,7 +10,6 @@ services: entrypoint: /code/entrypoint.sh volumes: - .:/code - - ~/.am/ss-location-data:/code/archivematica_transfer_source/ ports: - "8003:8003" depends_on: diff --git a/entrypoint.sh b/entrypoint.sh index e41e25c..3e8dd16 100755 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -14,8 +14,5 @@ fi echo "Apply database migrations" python manage.py migrate -echo "Create dirs" -python manage.py shell < make_dirs.py - echo "Starting server" python manage.py runserver 0.0.0.0:8003 diff --git a/fornax-services.png b/fornax-services.png deleted file mode 100644 index b1217c9..0000000 Binary files a/fornax-services.png and /dev/null differ diff --git a/fornax/config.py.example b/fornax/config.py.example index 26b4593..0fedc6c 100644 --- a/fornax/config.py.example +++ b/fornax/config.py.example @@ -15,11 +15,8 @@ ALLOWED_HOSTS = ['fornax-web', 'localhost'] BASE_DIR = '' SRC_DIR = '{}/src'.format(BASE_DIR) -TEST_SRC_DIR = '{}/src_test'.format(BASE_DIR) TMP_DIR = '{}/tmp'.format(BASE_DIR) -TEST_TMP_DIR = '{}/tmp_test'.format(BASE_DIR) DEST_DIR = '{}/dest'.format(BASE_DIR) -TEST_DEST_DIR = '{}/dest_test'.format(BASE_DIR) PROCESSING_CONFIG_DIR = 'processing_configs' PROCESSING_CONFIG = 'processingMCP.xml' @@ -51,4 +48,6 @@ ARCHIVEMATICA = { } } -STATIC_ROOT = '/static' +STATIC_ROOT = "/static" + +CLEANUP_URL = "http://ursa-major-web:8005/cleanup/" diff --git a/fornax/settings.py b/fornax/settings.py index 1d9f153..0cceb65 100644 --- a/fornax/settings.py +++ b/fornax/settings.py @@ -11,6 +11,7 @@ """ import os + from fornax import config as CF # Build paths inside the project like this: os.path.join(BASE_DIR, ...) @@ -40,6 +41,7 @@ 'sip_assembly', 'rest_framework', 'health_check', + 'asterism', ] MIDDLEWARE = [ @@ -115,14 +117,12 @@ STATIC_ROOT = CF.STATIC_ROOT SRC_DIR = CF.SRC_DIR -TEST_SRC_DIR = CF.TEST_SRC_DIR TMP_DIR = CF.TMP_DIR -TEST_TMP_DIR = CF.TEST_TMP_DIR DEST_DIR = CF.DEST_DIR -TEST_DEST_DIR = CF.TEST_DEST_DIR PROCESSING_CONFIG_DIR = CF.PROCESSING_CONFIG_DIR PROCESSING_CONFIG = CF.PROCESSING_CONFIG ARCHIVEMATICA = CF.ARCHIVEMATICA +CLEANUP_URL = CF.CLEANUP_URL REST_FRAMEWORK = { 'DEFAULT_PAGINATION_CLASS': 'rest_framework.pagination.PageNumberPagination', diff --git a/fornax/urls.py b/fornax/urls.py index 2da309e..6c6318f 100644 --- a/fornax/urls.py +++ b/fornax/urls.py @@ -15,16 +15,12 @@ """ from django.conf.urls import url from django.urls import include -from sip_assembly.views import ( - SIPViewSet, - SIPAssemblyView, - CreatePackageView, - RemoveCompletedTransfersView, - RemoveCompletedIngestsView, - CleanupRoutineView, - CleanupRequestView) from rest_framework import routers from rest_framework.schemas import get_schema_view +from sip_assembly.views import (CleanupRequestView, CleanupRoutineView, + CreatePackageView, RemoveCompletedIngestsView, + RemoveCompletedTransfersView, SIPAssemblyView, + SIPViewSet) router = routers.DefaultRouter() router.register(r'sips', SIPViewSet) diff --git a/make_dirs.py b/make_dirs.py deleted file mode 100644 index d508b41..0000000 --- a/make_dirs.py +++ /dev/null @@ -1,11 +0,0 @@ -import os -from fornax import settings - -""" -This file is called by entrypoint.sh (when running this application in a -container) to ensure that the necessary directories exist. -""" - -for dir in [settings.SRC_DIR, settings.TMP_DIR, settings.DEST_DIR]: - if not os.path.isdir(dir): - os.makedirs(dir) diff --git a/requirements.txt b/requirements.txt index a0ad5a7..68baad5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ amclient==1.0.0 +asterism==0.5.2 bagit==1.7.0 certifi==2019.11.28 chardet==3.0.4 @@ -6,9 +7,8 @@ click==7.0 clinner==1.12.3 colorlog==4.0.2 csvvalidator==1.2 -Django==2.2.8 +Django==2.2.10 djangorestframework==3.10.3 -git+https://github.com/RockefellerArchiveCenter/asterism@v0.1#egg=asterism gitdb2==2.0.6 GitPython==3.0.5 health-check==3.4.1 diff --git a/sip_assembly/admin.py b/sip_assembly/admin.py index 8c38f3f..e69de29 100644 --- a/sip_assembly/admin.py +++ b/sip_assembly/admin.py @@ -1,3 +0,0 @@ -from django.contrib import admin - -# Register your models here. diff --git a/sip_assembly/migrations/0001_initial.py b/sip_assembly/migrations/0001_initial.py index 6c1962f..0ceb265 100644 --- a/sip_assembly/migrations/0001_initial.py +++ b/sip_assembly/migrations/0001_initial.py @@ -15,13 +15,34 @@ class Migration(migrations.Migration): migrations.CreateModel( name='SIP', fields=[ - ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), - ('process_status', models.CharField(choices=[(10, 'New SIP created'), (20, 'SIP files moved to processing'), (30, 'SIP validated according to BagIt'), (30, 'SIP restructured'), (40, 'PREMIS CSV rights added to SIP'), (50, 'Submission documentation added to SIP'), (60, 'SIP bag-info.txt updated'), (70, 'SIP Manifests updated'), (90, 'SIP Delivered to Archivematica Transfer Source')], max_length=100)), + ('id', models.AutoField(auto_created=True, + primary_key=True, serialize=False, verbose_name='ID')), + ('process_status', + models.CharField(choices=[(10, + 'New SIP created'), + (20, + 'SIP files moved to processing'), + (30, + 'SIP validated according to BagIt'), + (30, + 'SIP restructured'), + (40, + 'PREMIS CSV rights added to SIP'), + (50, + 'Submission documentation added to SIP'), + (60, + 'SIP bag-info.txt updated'), + (70, + 'SIP Manifests updated'), + (90, + 'SIP Delivered to Archivematica Transfer Source')], + max_length=100)), ('bag_path', models.CharField(max_length=100)), ('bag_identifier', models.CharField(max_length=255, unique=True)), ('created', models.DateTimeField(auto_now=True)), ('last_modified', models.DateTimeField(auto_now_add=True)), - ('data', django.contrib.postgres.fields.jsonb.JSONField(blank=True, null=True)), + ('data', django.contrib.postgres.fields.jsonb.JSONField( + blank=True, null=True)), ], ), ] diff --git a/sip_assembly/migrations/0002_auto_20180910_1744.py b/sip_assembly/migrations/0002_auto_20180910_1744.py index d18dfbf..75395c8 100644 --- a/sip_assembly/migrations/0002_auto_20180910_1744.py +++ b/sip_assembly/migrations/0002_auto_20180910_1744.py @@ -13,6 +13,28 @@ class Migration(migrations.Migration): migrations.AlterField( model_name='sip', name='process_status', - field=models.CharField(choices=[(10, 'New SIP created'), (20, 'SIP files moved to processing'), (30, 'SIP validated according to BagIt'), (30, 'SIP restructured'), (40, 'PREMIS CSV rights added to SIP'), (50, 'Submission documentation added to SIP'), (60, 'SIP bag-info.txt updated'), (70, 'Archivematica processing config added'), (80, 'SIP Manifests updated'), (90, 'SIP Delivered to Archivematica Transfer Source')], max_length=100), + field=models.CharField( + choices=[ + (10, + 'New SIP created'), + (20, + 'SIP files moved to processing'), + (30, + 'SIP validated according to BagIt'), + (30, + 'SIP restructured'), + (40, + 'PREMIS CSV rights added to SIP'), + (50, + 'Submission documentation added to SIP'), + (60, + 'SIP bag-info.txt updated'), + (70, + 'Archivematica processing config added'), + (80, + 'SIP Manifests updated'), + (90, + 'SIP Delivered to Archivematica Transfer Source')], + max_length=100), ), ] diff --git a/sip_assembly/migrations/0003_auto_20181112_2040.py b/sip_assembly/migrations/0003_auto_20181112_2040.py index 16ae488..d2b38e7 100644 --- a/sip_assembly/migrations/0003_auto_20181112_2040.py +++ b/sip_assembly/migrations/0003_auto_20181112_2040.py @@ -13,6 +13,16 @@ class Migration(migrations.Migration): migrations.AlterField( model_name='sip', name='process_status', - field=models.CharField(choices=[(10, 'New SIP created'), (20, 'SIP assembled and delivered to Archivematica'), (30, 'SIP started in Archivematica'), (40, 'SIP approved in Archivematica')], max_length=100), + field=models.CharField( + choices=[ + (10, + 'New SIP created'), + (20, + 'SIP assembled and delivered to Archivematica'), + (30, + 'SIP started in Archivematica'), + (40, + 'SIP approved in Archivematica')], + max_length=100), ), ] diff --git a/sip_assembly/migrations/0004_auto_20181118_2151.py b/sip_assembly/migrations/0004_auto_20181118_2151.py index 998e6b5..33bfeb2 100644 --- a/sip_assembly/migrations/0004_auto_20181118_2151.py +++ b/sip_assembly/migrations/0004_auto_20181118_2151.py @@ -13,6 +13,18 @@ class Migration(migrations.Migration): migrations.AlterField( model_name='sip', name='process_status', - field=models.CharField(choices=[(10, 'New SIP created'), (20, 'SIP assembled and delivered to Archivematica'), (30, 'SIP started in Archivematica'), (40, 'SIP approved in Archivematica'), (50, 'SIP removed from src directory')], max_length=100), + field=models.CharField( + choices=[ + (10, + 'New SIP created'), + (20, + 'SIP assembled and delivered to Archivematica'), + (30, + 'SIP started in Archivematica'), + (40, + 'SIP approved in Archivematica'), + (50, + 'SIP removed from src directory')], + max_length=100), ), ] diff --git a/sip_assembly/migrations/0005_sip_origin.py b/sip_assembly/migrations/0005_sip_origin.py index ecb44f5..048edbd 100644 --- a/sip_assembly/migrations/0005_sip_origin.py +++ b/sip_assembly/migrations/0005_sip_origin.py @@ -13,6 +13,15 @@ class Migration(migrations.Migration): migrations.AddField( model_name='sip', name='origin', - field=models.CharField(choices=[('aurora', 'Aurora'), ('legacy_digital', 'Legacy Digital Processing'), ('digitization', 'Digitization')], default='aurora', max_length=20), + field=models.CharField( + choices=[ + ('aurora', + 'Aurora'), + ('legacy_digital', + 'Legacy Digital Processing'), + ('digitization', + 'Digitization')], + default='aurora', + max_length=20), ), ] diff --git a/sip_assembly/migrations/0006_auto_20200317_0220.py b/sip_assembly/migrations/0006_auto_20200317_0220.py new file mode 100644 index 0000000..c4ed677 --- /dev/null +++ b/sip_assembly/migrations/0006_auto_20200317_0220.py @@ -0,0 +1,38 @@ +# Generated by Django 2.2.10 on 2020-03-17 02:20 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('sip_assembly', '0005_sip_origin'), + ] + + operations = [ + migrations.AddField( + model_name='sip', + name='type', + field=models.CharField(blank=True, choices=[('aip', 'Archival Information Package'), ('dip', 'Dissemination Information Package')], max_length=50, null=True), + ), + migrations.AlterField( + model_name='sip', + name='bag_path', + field=models.CharField(blank=True, max_length=255, null=True), + ), + migrations.AlterField( + model_name='sip', + name='created', + field=models.DateTimeField(auto_now_add=True), + ), + migrations.AlterField( + model_name='sip', + name='last_modified', + field=models.DateTimeField(auto_now=True), + ), + migrations.AlterField( + model_name='sip', + name='process_status', + field=models.IntegerField(), + ), + ] diff --git a/sip_assembly/models.py b/sip_assembly/models.py index 2bbf9c7..507450a 100644 --- a/sip_assembly/models.py +++ b/sip_assembly/models.py @@ -1,8 +1,7 @@ -from django.contrib.postgres.fields import JSONField -from django.db import models +from asterism.models import BasePackage -class SIP(models.Model): +class SIP(BasePackage): CREATED = 10 ASSEMBLED = 20 STARTED = 30 @@ -15,15 +14,3 @@ class SIP(models.Model): (APPROVED, "SIP approved in Archivematica"), (CLEANED_UP, "SIP removed from src directory") ) - process_status = models.CharField(max_length=100, choices=PROCESS_STATUS_CHOICES) - bag_path = models.CharField(max_length=100) - bag_identifier = models.CharField(max_length=255, unique=True) - created = models.DateTimeField(auto_now=True) - last_modified = models.DateTimeField(auto_now_add=True) - data = JSONField(null=True, blank=True) - ORIGIN_CHOICES = ( - ('aurora', 'Aurora'), - ('legacy_digital', 'Legacy Digital Processing'), - ('digitization', 'Digitization') - ) - origin = models.CharField(max_length=20, choices=ORIGIN_CHOICES, default='aurora') diff --git a/sip_assembly/routines.py b/sip_assembly/routines.py index 355a473..9ab7a72 100644 --- a/sip_assembly/routines.py +++ b/sip_assembly/routines.py @@ -2,11 +2,12 @@ from os import remove from os.path import isdir, isfile, join -from amclient import AMClient, errors import requests - +from amclient import AMClient, errors +from asterism import bagit_helpers from fornax import settings from sip_assembly import routines_helpers as helpers + from .models import SIP @@ -45,11 +46,11 @@ def get_processing_config(self, client): class SIPAssembler(ArchivematicaRoutine): """Creates an Archivematica-compliant SIP.""" - def __init__(self, dirs=None): + def __init__(self): super(SIPAssembler, self).__init__() - self.src_dir = dirs['src'] if dirs else settings.SRC_DIR - self.tmp_dir = dirs['tmp'] if dirs else settings.TMP_DIR - self.dest_dir = dirs['dest'] if dirs else settings.DEST_DIR + self.src_dir = settings.SRC_DIR + self.tmp_dir = settings.TMP_DIR + self.dest_dir = settings.DEST_DIR for dir in [self.src_dir, self.tmp_dir, self.dest_dir]: if not isdir(dir): raise SIPAssemblyError("Directory does not exist", dir) @@ -61,7 +62,7 @@ def run(self): try: helpers.copy_to_directory(sip, self.tmp_dir) helpers.extract_all(sip, self.tmp_dir) - helpers.validate(sip.bag_path) + bagit_helpers.validate(sip.bag_path) except Exception as e: raise SIPAssemblyError( "Error moving SIP to processing directory: {}".format(e), @@ -86,12 +87,12 @@ def run(self): sip.bag_identifier) try: - helpers.update_bag_info( + bagit_helpers.update_bag_info( sip.bag_path, { 'Internal-Sender-Identifier': sip.bag_identifier}) helpers.add_processing_config( sip.bag_path, self.get_processing_config(client)) - helpers.update_manifests(sip.bag_path) + bagit_helpers.update_manifests(sip.bag_path) helpers.create_targz_package(sip) except Exception as e: raise SIPAssemblyError( @@ -168,14 +169,11 @@ class CleanupRequester: another service. """ - def __init__(self, url): - self.url = url - def run(self): sip_ids = [] for sip in SIP.objects.filter(process_status=SIP.APPROVED): r = requests.post( - self.url, + settings.CLEANUP_URL, data=json.dumps({"identifier": sip.bag_identifier}), headers={"Content-Type": "application/json"}, ) @@ -191,9 +189,9 @@ def run(self): class CleanupRoutine: """Removes files in destination directory.""" - def __init__(self, identifier, dirs): + def __init__(self, identifier): self.identifier = identifier - self.dest_dir = dirs['dest'] if dirs else settings.DEST_DIR + self.dest_dir = settings.DEST_DIR if not self.identifier: raise CleanupError( "No identifier submitted, unable to perform CleanupRoutine.",) diff --git a/sip_assembly/routines_helpers.py b/sip_assembly/routines_helpers.py index 4554607..bcb2372 100644 --- a/sip_assembly/routines_helpers.py +++ b/sip_assembly/routines_helpers.py @@ -1,39 +1,36 @@ -import bagit import csv -from csvvalidator import CSVValidator, RecordError, enumeration import datetime import os -import shutil -import tarfile + +from asterism import file_helpers +from csvvalidator import CSVValidator, RecordError, enumeration def copy_to_directory(sip, dest): """Moves a bag to the `dest` directory and updates the object's bag_path.""" - shutil.copyfile( - sip.bag_path, os.path.join( - dest, "{}.tar.gz".format( - sip.bag_identifier))) - sip.bag_path = os.path.join(dest, "{}.tar.gz".format(sip.bag_identifier)) - sip.save() + dest_path = os.path.join(dest, "{}.tar.gz".format(sip.bag_identifier)) + copied = file_helpers.copy_file_or_dir(sip.bag_path, dest_path) + if copied: + sip.bag_path = dest_path + sip.save() def move_to_directory(sip, dest): """Moves a bag to the `dest` directory and updates the object's bag_path""" - shutil.move( - sip.bag_path, os.path.join( - dest, "{}.tar.gz".format( - sip.bag_identifier))) - sip.bag_path = os.path.join(dest, "{}.tar.gz".format(sip.bag_identifier)) - sip.save() + dest_path = os.path.join(dest, "{}.tar.gz".format(sip.bag_identifier)) + moved = file_helpers.move_file_or_dir(sip.bag_path, dest_path) + if moved: + sip.bag_path = os.path.join(dest_path) + sip.save() def extract_all(sip, extract_dir): """Extracts a tar.gz file to the `extract dir` directory""" ext = os.path.splitext(sip.bag_path)[-1] if ext in ['.tgz', '.tar.gz', '.gz']: - tf = tarfile.open(sip.bag_path, 'r') - tf.extractall(extract_dir) - tf.close() + extracted = file_helpers.tar_extract_all(sip.bag_path, extract_dir) + if not extracted: + raise Exception("Error extracting TAR file.") os.remove(sip.bag_path) sip.bag_path = os.path.join(extract_dir, sip.bag_identifier) sip.save() @@ -52,12 +49,6 @@ def move_objects_dir(bag_path): os.rename(os.path.join(src, fname), os.path.join(dest, fname)) -def validate(bag_path): - """Validates a bag against the BagIt specification""" - bag = bagit.Bag(bag_path) - return bag.validate() - - def create_structure(bag_path): """Creates Archivematica-compliant directory structure within a bag""" log_dir = os.path.join(bag_path, 'data', 'logs') @@ -111,7 +102,9 @@ def write_rights_row(bag_dir, filenames, rights_statement, csvwriter): for file in filenames: for rights_granted in rights_statement.get('rights_granted'): csvwriter.writerow( - [os.path.join(bag_dir, file).lstrip('/'), rights_statement.get('rights_basis', ''), rights_statement.get('status', ''), + [os.path.join(bag_dir, file).lstrip('/'), + rights_statement.get('rights_basis', ''), + rights_statement.get('status', ''), rights_statement.get( 'determination_date', ''), rights_statement.get( 'jurisdiction', ''), @@ -195,31 +188,16 @@ def create_submission_docs(sip): return True -def update_bag_info(bag_path, data): - """Adds metadata to `bag-info.txt`""" - bag = bagit.Bag(bag_path) - for k, v in data.items(): - bag.info[k] = v - bag.save() - - def add_processing_config(bag_path, data): """Adds pre-defined Archivematica processing configuration file""" with open(os.path.join(bag_path, 'processingMCP.xml'), 'w') as f: f.write(data) -def update_manifests(bag_path): - """Updates bag manifests according to BagIt specification""" - bag = bagit.Bag(bag_path) - bag.save(manifests=True) - - def create_targz_package(sip): """Creates a compressed archive file from a bag""" - with tarfile.open('{}.tar.gz'.format(sip.bag_path), "w:gz") as tar: - tar.add(sip.bag_path, arcname=os.path.basename(sip.bag_path)) - tar.close() - shutil.rmtree(sip.bag_path) - sip.bag_path = '{}.tar.gz'.format(sip.bag_path) + tar_path = "{}.tar.gz".format(sip.bag_path) + file_helpers.make_tarfile( + sip.bag_path, tar_path, compressed=True, remove_src=True) + sip.bag_path = tar_path sip.save() diff --git a/sip_assembly/serializers.py b/sip_assembly/serializers.py index 658b37d..19634dd 100644 --- a/sip_assembly/serializers.py +++ b/sip_assembly/serializers.py @@ -6,11 +6,24 @@ class SIPSerializer(serializers.HyperlinkedModelSerializer): class Meta: model = SIP - fields = ('url', 'bag_identifier', 'bag_path', 'process_status', 'data', 'created', 'last_modified') + fields = ( + 'url', + 'bag_identifier', + 'bag_path', + 'process_status', + 'data', + 'created', + 'last_modified') class SIPListSerializer(serializers.HyperlinkedModelSerializer): class Meta: model = SIP - fields = ('url', 'bag_identifier', 'bag_path', 'process_status', 'created', 'last_modified') + fields = ( + 'url', + 'bag_identifier', + 'bag_path', + 'process_status', + 'created', + 'last_modified') diff --git a/sip_assembly/templates/sip_assembly/base.html b/sip_assembly/templates/sip_assembly/base.html deleted file mode 100644 index 831aaac..0000000 --- a/sip_assembly/templates/sip_assembly/base.html +++ /dev/null @@ -1,27 +0,0 @@ - - -
- {% include 'sip_assembly/head.html' %} - {% block extra_css %}{% endblock %} - - -