Skip to content

Commit

Permalink
Batch upload folders to separate deposits
Browse files Browse the repository at this point in the history
- Expects a folder containing template.txt and data.csv (as per example)
- In the folder, subfolders of csv files
- Uploads each folder of csv files as a separate deposit
- Metadata defined in data.csv per subfolder
  • Loading branch information
willu47 committed Mar 18, 2021
1 parent 5ccdbb5 commit 8b26047
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 37 deletions.
19 changes: 11 additions & 8 deletions fill_template.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import sys
import csv
import os.path
import os


def fill_template(template_filename, data_filename):
Expand All @@ -14,32 +15,34 @@ def fill_template(template_filename, data_filename):
for row in csv_reader:
output_filename = row["FILENAME"]
print("Writing %s..." % output_filename)

# Performing replacements
filled_template = template
for column in csv_reader.fieldnames:
if column != "FILENAME":
filled_template = filled_template.replace("{%s}" % column, row[column])
print((" {%s} = %s" % (column, row[column])).encode().decode('cp850'))

# Write to output file
with open(output_filename, "w", encoding='utf-8') as output_file:

filepath = os.path.join(os.path.dirname(data_filename), output_filename)
with open(filepath, "w", encoding='utf-8') as output_file:
output_file.write(filled_template)


if __name__ == "__main__":
if len(sys.argv) != 3:
print("Usage: fill_template.py <template_filename> <data_filename>")
exit()

template_filename = sys.argv[1] # e.g. "template.json"
if not os.path.isfile(template_filename):
print("Invalid template filename.")
exit()

data_filename = sys.argv[2] # e.g. "data.csv"
if not os.path.isfile(data_filename):
print("Invalid data filename.")
exit()

fill_template(template_filename, data_filename)
11 changes: 7 additions & 4 deletions run.sh
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
#!/bin/bash

python ./fill_template.py ./template.txt data.csv
DATA_FOLDER=./data

python ./fill_template.py $DATA_FOLDER/template.txt $DATA_FOLDER/data.csv

echo Copy the .pdf files to the folder of .json files.
read -rsp $'Press enter to continue...\n'
# read -rs $'Press enter to continue...\n'

TOKEN=INSERT_YOUR_ZENODO_TOKEN_HERE
python ./upload_to_zenodo.py $TOKEN .
TOKEN=`cat .token2`
export TOKEN
python ./upload_to_zenodo.py $TOKEN ./$DATA_FOLDER
66 changes: 41 additions & 25 deletions upload_to_zenodo.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,56 +5,72 @@
import codecs

BASE_URL = "https://sandbox.zenodo.org" # TODO: once you are sure about what you are doing, remove the "sandbox." part
TOKEN = ""
TOKEN = os.getenv('TOKEN')
print(TOKEN)

def upload(metadata, pdf_path):
def upload(metadata, directory):
if not _is_valid_json(metadata):
return

# Create new paper submission
url = "{base_url}/api/deposit/depositions/?access_token={token}".format(base_url=BASE_URL, token=TOKEN)
# Create new submission
url = "{base_url}/api/deposit/depositions".format(base_url=BASE_URL)
headers = {"Content-Type": "application/json"}
response = requests.post(url, data=metadata, headers=headers)
params = {'access_token': TOKEN}
response = requests.post(url, params=params, json={}, headers=headers)
#print(response.text)
if response.status_code > 210:
print("Error happened during submission, status code: " + str(response.status_code))
return

# Get the submission ID
submission_id = json.loads(response.text)["id"]
submission_id = response.json()["id"]
bucket_url = response.json()["links"]["bucket"]

# Add metadata
response = requests.put('{base_url}/api/deposit/depositions/{id}'.format(base_url=BASE_URL, id=submission_id),
params=params,
data=metadata,
headers=headers)
if response.status_code > 210:
print("Error happened during metadata upload, status code: " + str(response.status_code))
print(response.json())
return

for csv_file in os.listdir(directory):
filepath = os.path.join(directory, csv_file)
with open(filepath, "rb") as fp:
response = requests.put(
"%s/%s" % (bucket_url, csv_file),
data=fp,
params=params,
)

# Upload the file
url = "{base_url}/api/deposit/depositions/{id}/files?access_token={token}".format(base_url=BASE_URL, id=str(submission_id), token=TOKEN)
upload_metadata = {'filename': 'paper.pdf'}
files = {'file': open(pdf_path, 'rb')}
response = requests.post(url, data=upload_metadata, files=files)
#print(response.text)
if response.status_code > 210:
print("Error happened during file upload, status code: " + str(response.status_code))
return
print("{file} submitted with submission ID = {id} (DOI: 10.5281/zenodo.{id})".format(file=pdf_path,id=submission_id))

print("{file} submitted with submission ID = {id} (DOI: 10.5281/zenodo.{id})".format(file=directory,id=submission_id))
# The submission needs an additional "Publish" step. This can also be done from a script, but to be on the safe side, it is not included. (The attached file cannot be changed after publication.)


def batch_upload(directory):
for metadata_file in os.listdir(directory):
metadata_file = os.path.join(directory, metadata_file)
if metadata_file.endswith(".json"):
pdf_file = metadata_file.replace(".json",".pdf")
if os.path.isfile(pdf_file):
print("Uploading %s & %s" % (metadata_file, pdf_file))
filename = metadata_file.replace(".json","")
if os.path.isdir(filename):
print("Uploading %s & %s" % (metadata_file, filename))
with codecs.open(metadata_file, 'r', 'utf-8') as f:
metadata = f.read()
# Re-encoding in order to support UTF-8 inputs
metadata_json = json.loads(metadata)
metadata = json.dumps(metadata_json, ensure_ascii=True)
#print(metadata)
upload(metadata, pdf_file)
upload(metadata, filename)
else:
print("The file %s might be a submission metadata file, but %s does not exist." % (metadata_file, pdf_file))
print("The file %s might be a submission metadata file, but %s does not exist." % (metadata_file, filename))


def _is_valid_json(text):
try:
json.loads(text)
Expand All @@ -69,11 +85,11 @@ def _is_valid_json(text):
print("Usage: upload_to_zenodo.py <token> <directory>")
print(" The directory contains .json metadata descriptors and .pdf files.")
exit()

TOKEN = sys.argv[1]
directory = sys.argv[2]
if not os.path.isdir(directory):
print("Invalid directory.")
exit()

batch_upload(directory)

0 comments on commit 8b26047

Please sign in to comment.