Skip to content

Commit

Permalink
Merge pull request #40 from RockefellerArchiveCenter/debug
Browse files Browse the repository at this point in the history
Improves error handling
  • Loading branch information
helrond authored Dec 3, 2018
2 parents c6a57e3 + 7206d8a commit cf0f9ba
Show file tree
Hide file tree
Showing 4 changed files with 71 additions and 118 deletions.
19 changes: 14 additions & 5 deletions sip_assembly/assemblers.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,7 @@


class SIPAssemblyError(Exception): pass


class SIPActionError(Exception): pass
class CleanupError(Exception): pass


Expand All @@ -31,6 +30,16 @@ def __init__(self, dirs=None):
for dir in [self.src_dir, self.tmp_dir, self.dest_dir]:
if not isdir(dir):
raise SIPAssemblyError("Directory {} does not exist".format(dir))
try:
self.processing_config = ArchivematicaClient(
settings.ARCHIVEMATICA['username'],
settings.ARCHIVEMATICA['api_key'],
settings.ARCHIVEMATICA['baseurl'],
settings.ARCHIVEMATICA['location_uuid']).retrieve(
'processing-configuration/{}/'.format(
settings.ARCHIVEMATICA['processing_config']))
except requests.exceptions.ConnectionError as e:
raise SIPAssemblyError("Cannot connect to Archivematica: {}".format(e))

def run(self):
self.log = logger.new(request_id=str(uuid4()))
Expand Down Expand Up @@ -60,7 +69,7 @@ def run(self):

try:
library.update_bag_info(sip)
library.add_processing_config(sip)
library.add_processing_config(sip, self.processing_config)
library.update_manifests(sip)
library.create_package(sip)
except Exception as e:
Expand Down Expand Up @@ -98,7 +107,7 @@ def start_transfer(self):
sip.save()
return "{} started.".format(sip.bag_identifier)
except Exception as e:
raise SIPAssemblyError("Error starting transfer in Archivematica: {}".format(e))
raise SIPActionError("Error starting transfer in Archivematica: {}".format(e))
else:
return "No transfers to start."

Expand All @@ -113,7 +122,7 @@ def approve_transfer(self):
sip.save()
return "{} approved.".format(sip.bag_identifier)
except Exception as e:
raise SIPAssemblyError("Error approving transfer in Archivematica: {}".format(e))
raise SIPActionError("Error approving transfer in Archivematica: {}".format(e))
else:
return "No transfers to approve."

Expand Down
3 changes: 2 additions & 1 deletion sip_assembly/clients.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ def send_start_transfer_request(self, sip):
'paths[]': base64.b64encode(bagpaths.encode())}
start = requests.post(full_url, headers=self.headers, data=params)
if start.status_code != 200:
raise ArchivematicaClientException(start.json()['message'])
message = start.json()['message'] if start.json()['message'] else start.reason
raise ArchivematicaClientException(message)

def send_approve_transfer_request(self, sip):
approve_transfer = requests.post(join(self.baseurl, 'transfer/approve_transfer/'),
Expand Down
158 changes: 50 additions & 108 deletions sip_assembly/library.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,36 +9,21 @@
import subprocess
import tarfile

from fornax import settings
from .clients import ArchivematicaClient

logger = wrap_logger(logger=logging.getLogger(__name__))


def copy_to_directory(sip, dest):
"""Moves a bag to the `dest` directory"""
try:
shutil.copyfile(sip.bag_path, os.path.join(dest, "{}.tar.gz".format(sip.bag_identifier)))
sip.bag_path = os.path.join(dest, "{}.tar.gz".format(sip.bag_identifier))
sip.save()
return True
except Exception as e:
logger.error("Error copying SIP to directory {}: {}".format(dest, e), object=sip)
return False
shutil.copyfile(sip.bag_path, os.path.join(dest, "{}.tar.gz".format(sip.bag_identifier)))
sip.bag_path = os.path.join(dest, "{}.tar.gz".format(sip.bag_identifier))
sip.save()


def move_to_directory(sip, dest):
"""Moves a bag to the `dest` directory"""
try:
if not os.path.exists(dest):
os.makedirs(dest)
shutil.move(sip.bag_path, os.path.join(dest, "{}.tar.gz".format(sip.bag_identifier)))
sip.bag_path = os.path.join(dest, "{}.tar.gz".format(sip.bag_identifier))
sip.save()
return True
except Exception as e:
logger.error("Error copying SIP to directory {}: {}".format(dest, e), object=sip)
return False
shutil.move(sip.bag_path, os.path.join(dest, "{}.tar.gz".format(sip.bag_identifier)))
sip.bag_path = os.path.join(dest, "{}.tar.gz".format(sip.bag_identifier))
sip.save()


def extract_all(sip, extract_dir):
Expand All @@ -51,26 +36,19 @@ def extract_all(sip, extract_dir):
os.remove(sip.bag_path)
sip.bag_path = os.path.join(extract_dir, sip.bag_identifier)
sip.save()
return True
else:
logger.error("Unrecognized archive format: {}".format(ext), object=sip)
return False
raise Exception("Unrecognized archive format")


def move_objects_dir(sip):
"""Moves the objects directory within a bag"""
src = os.path.join(sip.bag_path, 'data')
dest = os.path.join(sip.bag_path, 'data', 'objects')
try:
if not os.path.exists(dest):
os.makedirs(dest)
for fname in os.listdir(src):
if fname != 'objects':
os.rename(os.path.join(src, fname), os.path.join(dest, fname))
return True
except Exception as e:
logger.error("Error moving objects directory: {}".format(e), object=sip)
return False
if not os.path.exists(dest):
os.makedirs(dest)
for fname in os.listdir(src):
if fname != 'objects':
os.rename(os.path.join(src, fname), os.path.join(dest, fname))


def validate(sip):
Expand All @@ -84,14 +62,9 @@ def create_structure(sip):
log_dir = os.path.join(sip.bag_path, 'data', 'logs')
md_dir = os.path.join(sip.bag_path, 'data', 'metadata')
docs_dir = os.path.join(sip.bag_path, 'data', 'metadata', 'submissionDocumentation')
try:
for dir in [log_dir, md_dir, docs_dir]:
if not os.path.exists(dir):
os.makedirs(dir)
return True
except Exception as e:
logger.error("Error creating new SIP structure: {}".format(e), object=sip)
return False
for dir in [log_dir, md_dir, docs_dir]:
if not os.path.exists(dir):
os.makedirs(dir)


def create_rights_csv(sip):
Expand All @@ -106,31 +79,26 @@ def create_rights_csv(sip):
if os.path.isfile(filepath):
mode = 'a'
firstrow = None
try:
if not os.path.exists(os.path.dirname(filepath)):
os.makedirs(os.path.dirname(filepath))
with open(filepath, mode) as csvfile:
csvwriter = csv.writer(csvfile)
if firstrow:
csvwriter.writerow(firstrow)
for file in os.listdir(os.path.join(sip.bag_path, 'data', 'objects')):
for rights_granted in rights_statement.get('rights_granted'):
csvwriter.writerow(
["data/objects/{}".format(file), rights_statement.get('rights_basis', ''), rights_statement.get('status', ''),
rights_statement.get('determination_date', ''), rights_statement.get('jurisdiction', ''),
rights_statement.get('start_date', ''), rights_statement.get('end_date', ''),
rights_statement.get('terms', ''), rights_statement.get('citation', ''),
rights_statement.get('note', ''), rights_granted.get('act', ''),
rights_granted.get('restriction', ''), rights_granted.get('start_date', ''),
rights_granted.get('end_date', ''), rights_granted.get('note', ''),
rights_statement.get('doc_id_type', ''), rights_statement.get('doc_id_value', ''),
rights_statement.get('doc_id_role', '')])
logger.debug("Row for Rights Statement created in rights.csv", object=rights_statement)
logger.debug("rights.csv saved", object=filepath)
except Exception as e:
logger.error("Error saving rights.csv: {}".format(e), object=sip)
return False
return True
if not os.path.exists(os.path.dirname(filepath)):
os.makedirs(os.path.dirname(filepath))
with open(filepath, mode) as csvfile:
csvwriter = csv.writer(csvfile)
if firstrow:
csvwriter.writerow(firstrow)
for file in os.listdir(os.path.join(sip.bag_path, 'data', 'objects')):
for rights_granted in rights_statement.get('rights_granted'):
csvwriter.writerow(
["data/objects/{}".format(file), rights_statement.get('rights_basis', ''), rights_statement.get('status', ''),
rights_statement.get('determination_date', ''), rights_statement.get('jurisdiction', ''),
rights_statement.get('start_date', ''), rights_statement.get('end_date', ''),
rights_statement.get('terms', ''), rights_statement.get('citation', ''),
rights_statement.get('note', ''), rights_granted.get('act', ''),
rights_granted.get('restriction', ''), rights_granted.get('start_date', ''),
rights_granted.get('end_date', ''), rights_granted.get('note', ''),
rights_statement.get('doc_id_type', ''), rights_statement.get('doc_id_value', ''),
rights_statement.get('doc_id_role', '')])
logger.debug("Row for Rights Statement created in rights.csv", object=rights_statement)
logger.debug("rights.csv saved", object=filepath)


def validate_rights_csv(sip):
Expand Down Expand Up @@ -176,9 +144,7 @@ def check_dates(r):
if problems:
for problem in problems:
logger.error(problem)
return False
else:
return True
raise Exception(problems)


# Right now this is a placeholder. There is currently no use case for adding
Expand All @@ -190,52 +156,28 @@ def create_submission_docs(sip):

def update_bag_info(sip):
"""Adds metadata to `bag-info.txt`"""
try:
bag = bagit.Bag(sip.bag_path)
bag.info['Internal-Sender-Identifier'] = sip.data['identifier']
bag.save()
return True
except Exception as e:
logger.error("Error updating bag-info metadata: {}".format(e), object=sip)
return False
bag = bagit.Bag(sip.bag_path)
bag.info['Internal-Sender-Identifier'] = sip.data['identifier']
bag.save()


def add_processing_config(sip):
def add_processing_config(sip, response):
"""Adds pre-defined Archivematica processing configuration file"""
try:
response = ArchivematicaClient(settings.ARCHIVEMATICA['username'],
settings.ARCHIVEMATICA['api_key'],
settings.ARCHIVEMATICA['baseurl'],
settings.ARCHIVEMATICA['location_uuid']).retrieve('processing-configuration/{}/'.format(settings.ARCHIVEMATICA['processing_config']))
with open(os.path.join(sip.bag_path, 'processingMCP.xml'), 'wb') as f:
f.write(response.content)
return True
except Exception as e:
logger.error("Error creating processing config: {}".format(e), object=sip)
return False
with open(os.path.join(sip.bag_path, 'processingMCP.xml'), 'wb') as f:
f.write(response.content)


def update_manifests(sip):
"""Updates bag manifests according to BagIt specification"""
try:
bag = bagit.Bag(sip.bag_path)
bag.save(manifests=True)
return True
except Exception as e:
logger.error("Error updating bag manifests: {}".format(e), object=sip)
return False
bag = bagit.Bag(sip.bag_path)
bag.save(manifests=True)


def create_package(sip):
"""Creates a compressed archive file from a bag"""
try:
with tarfile.open('{}.tar.gz'.format(sip.bag_path), "w:gz") as tar:
tar.add(sip.bag_path, arcname=os.path.basename(sip.bag_path))
tar.close()
shutil.rmtree(sip.bag_path)
sip.bag_path = '{}.tar.gz'.format(sip.bag_path)
sip.save()
return True
except Exception as e:
logger.error("Error creating .tar.gz archive: {}".format(e), object=sip)
return False
with tarfile.open('{}.tar.gz'.format(sip.bag_path), "w:gz") as tar:
tar.add(sip.bag_path, arcname=os.path.basename(sip.bag_path))
tar.close()
shutil.rmtree(sip.bag_path)
sip.bag_path = '{}.tar.gz'.format(sip.bag_path)
sip.save()
9 changes: 5 additions & 4 deletions sip_assembly/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,10 +85,11 @@ def request_cleanup(self):
self.assertNotEqual(False, cleanup)

def run_view(self):
print('*** Test run view ***')
request = self.factory.post(reverse('assemble-sip'), {"test": True})
response = SIPAssemblyView.as_view()(request)
self.assertEqual(response.status_code, 200, "Wrong HTTP code")
with assembly_vcr.use_cassette('process_sip.json'):
print('*** Test run view ***')
request = self.factory.post(reverse('assemble-sip'), {"test": True})
response = SIPAssemblyView.as_view()(request)
self.assertEqual(response.status_code, 200, "Wrong HTTP code")

def cleanup_view(self):
print('*** Test cleanup view ***')
Expand Down

0 comments on commit cf0f9ba

Please sign in to comment.