-
Notifications
You must be signed in to change notification settings - Fork 0
/
buildSelections.py
111 lines (92 loc) · 4.97 KB
/
buildSelections.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import os
import json
import requests
from asnake.client import ASnakeClient
def buildSelections(colID, refID=None, filter=None, date=False, verbose=False):
client = ASnakeClient()
client.authorize()
collection = []
page = 1
if os.name == "nt":
outDir = "\\\\Lincoln\\Library\\SPE_Uploads"
else:
outDir = "/media/Library/SPE_Uploads"
if refID:
url = "https://archives.albany.edu/catalog?f[record_parent_sim][]=" + refID + "&format=json&per_page=100"
outFile = os.path.join(outDir, refID + ".json")
descriptionURL = "https://archives.albany.edu/description/catalog/" + colID.replace(".", "-") + "aspace_" + refID
outDesc = os.path.join(outDir, "desc_" + refID + ".json")
else:
url = "https://archives.albany.edu/catalog?f[collection_number_sim][]=" + colID + "&format=json&per_page=100"
outFile = os.path.join(outDir, colID.replace(".", "-") + ".json")
descriptionURL = "https://archives.albany.edu/description/catalog/" + colID.replace(".", "-")
outDesc = os.path.join(outDir, "desc_" + colID.replace(".", "-") + ".json")
if filter:
url = url + "&" + filter
print (descriptionURL + "?format=json")
r = requests.get(descriptionURL + "?format=json", verify=False)
print (r.status_code)
#with open(outDesc, 'w', encoding='utf-8', newline='') as f:
# json.dump(r.json()["response"], f, ensure_ascii=True, indent=4)
def getPage(page, collection, url):
r = requests.get(url + "&page=" + str(page), verify=False)
print (r.status_code)
for item in r.json()["response"]["docs"]:
obj = {}
obj["title"] = item["title_tesim"][0]
obj["date"] = item["date_created_tesim"][0]
#print (item)
ref_id = item["archivesspace_record_tesim"][0]
obj["thumb"] = "https://archives.albany.edu" + item["thumbnail_path_ss"]
obj["url"] = "https://archives.albany.edu/concern/" + item["has_model_ssim"][0].lower() + "s/" + item["id"]
record = client.get("repositories/2/find_by_id/archival_objects?ref_id[]=" + ref_id).json()
ao = client.get(record["archival_objects"][0]["ref"]).json()
print (ao["ref_id"])
dateNormal = ao["dates"][0]["begin"]
if "end" in ao["dates"][0].keys():
dateNormal = dateNormal + "/" + ao["dates"][0]["end"]
if "undated" in ao["dates"][0]["expression"].lower():
obj["date_normal"] = "9999"
else:
obj["date_normal"] = dateNormal
if date:
if not obj["date"].lower() == "undated":
if obj["date"].lower().startswith("ca."):
objDate = obj["date"].split(" ")[1]
else:
if "-" in obj["date"]:
objDate = obj["date"].split("-")[0]
else:
objDate = obj["date"].split(" ")[0]
print (objDate)
try:
if "-" in date:
if int(objDate) >= int(date.split("-")[0]) and int(objDate) <= int(date.split("-")[1]):
collection.append(obj)
else:
if int(objDate) < int(date):
collection.append(obj)
except:
print ("Date Error: " + objDate)
else:
collection.append(obj)
if r.json()["response"]["pages"]["last_page?"] == False:
getPage(page + 1, collection, url)
getPage(page, collection, url)
#print (collection)
sortedTitle = sorted(collection, key = lambda i: i['title'].split(" ")[0])
sortedCollection = sorted(sortedTitle, key = lambda i: i['date_normal'].split(" ")[0])
print (len(sortedCollection))
with open(outFile, 'w', encoding='utf-8', newline='') as f:
json.dump(sortedCollection, f, ensure_ascii=True, indent=4)
# for running with command line args
if __name__ == '__main__':
import argparse
argParse = argparse.ArgumentParser()
argParse.add_argument("colID", help="ID for a package in Processing directory.")
argParse.add_argument("-id", help="Optional ref_id for components below the collection level.", default=None)
argParse.add_argument("-f", "--filter", help="Hyrax filter to limit results, such as \"f[resource_type_sim][]=Periodical\"", default=None)
argParse.add_argument("-d", "--date", help="Only return items prior to a certain year of creation.", default=None)
#argParse.add_argument("-v", "--verbose", help="lists all files written.", default=False)
args = argParse.parse_args()
buildSelections(args.colID, args.id, args.filter, args.date, True)