forked from smith-special-collections/sc-workflow-scripts
-
Notifications
You must be signed in to change notification settings - Fork 0
/
archived_websites_add_dos.py
125 lines (92 loc) · 5.06 KB
/
archived_websites_add_dos.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import requests
import json
import csv
import os
import getpass
import uuid
#This script has been modified from the duke_create_do_from_ao_uri.py script written by Noah Huffman at Duke. It has been updated to work in python3 and has had the method of publishing the digital object altered.
# Starting with an input CSV, this script will use the ArchivesSpace API to batch create digital object records and link them as instances of specified archival objects.
# This script assumes you have the Archival Object URIs already.
# The script will write out a CSV containing the same information as the starting CSV plus
# the refIDs and URIs for the archival objects and the the URIs for the created digital objects.
# The 5 column input csv should include a header row and be formatted with the columns identified on line 66:
# Input CSV can be modified to supply additional input metadata for forming the digital objects
#AUTHENTICATION STUFF:
secretsVersion = input('To edit production server, enter the name of the \
secrets file: ')
if secretsVersion != '':
try:
secrets = __import__(secretsVersion)
print('Editing Production')
except ImportError:
secrets = __import__('secrets')
print('Editing Development')
else:
print('Editing Development')
baseURL = secrets.baseURL
user = secrets.user
password = secrets.password
repository = secrets.repository
auth = requests.post(baseURL + '/users/' + user + '/login?password='
+ password).json()
session = auth['session']
headers = {'X-ArchivesSpace-Session': session,
'Content_Type': 'application/json'}
#FILE INPUT / OUTPUT STUFF:
#prompt for input file path
archival_object_csv = input("Path to input CSV: ")
#prompt for output path
updated_archival_object_csv = input("Path to output CSV: ")
#Open Input CSV and iterate over rows
with open(archival_object_csv,'r') as csvfile, open(updated_archival_object_csv,'w') as csvout:
csvin = csv.reader(csvfile)
next(csvin, None) #ignore header row
csvout = csv.writer(csvout)
for row in csvin:
#INPUT CSV STUFF. This assumes you have URIs for the already created archival objects. The URI should be formatted like: /repositories/2/archival_objects/407720
ao_uri = row[0]
new_do_url = row[1]
new_do_caption = row[2]
new_do_id = row[3]
new_do_title = row[4]
publish_true_false = row[5]
repo_id = row[6]
print ('Found AO: ' + ao_uri)
# Submit a get request for the archival object and store the JSON
archival_object_json = requests.get(baseURL+ao_uri,headers=headers).json()
# Add the archival object uri to the row from the csv to write it out at the end
row.append(ao_uri)
#Publish the digital object or not.
if publish_true_false.lower() == 'false':
publish = False
else:
publish = True
# If you want to reuse the display string from the archival object as the digital object title, uncomment line 87 and replace
# 'title':new_do_title in line 91 with 'title':display_string
# Note: this also does not copy over any notes from the archival object
display_string = archival_object_json['display_string']
# Form the digital object JSON using the display string from the archival object and the identifier and the file_uri from the csv
new_digital_object_json = {'title':display_string,'digital_object_id':new_do_id,'publish':publish,'file_versions':[{'file_uri':new_do_url,'xlink_actuate_attribute':'onRequest','xlink_show_attribute':'new','caption':new_do_caption,'publish':publish}]}
dig_obj_data = json.dumps(new_digital_object_json)
# Post the digital object
dig_obj_post = requests.post(baseURL+'/repositories/'+repo_id+'/digital_objects',headers=headers,data=dig_obj_data).json()
print ('New DO: ', dig_obj_post)
# Grab the digital object uri
dig_obj_uri = dig_obj_post['uri']
print ('New DO URI: ' + dig_obj_uri)
# Add the digital object uri to the row from the csv to write it out at the end
row.append(dig_obj_uri)
# Build a new instance to add to the archival object, linking to the digital object
dig_obj_instance = {'instance_type':'digital_object', 'digital_object':{'ref':dig_obj_uri}}
# Append the new instance to the existing archival object record's instances
archival_object_json['instances'].append(dig_obj_instance)
archival_object_data = json.dumps(archival_object_json)
# Repost the archival object
archival_object_update = requests.post(baseURL+ao_uri,headers=headers,data=archival_object_data).json()
print ('New DO added as instance of new AO: ', archival_object_update['status'])
# Write a new csv with all the info from the initial csv + the ArchivesSpace uris for the archival and digital objects
with open(updated_archival_object_csv,'a') as csvout:
writer = csv.writer(csvout)
writer.writerow(row)
#print a new line for readability in console
print ('\n')