From 8b260471f1fb39d625adb7eeb6771465ca57c4d2 Mon Sep 17 00:00:00 2001 From: Will Usher Date: Thu, 18 Mar 2021 14:16:19 +0100 Subject: [PATCH] Batch upload folders to separate deposits - Expects a folder containing template.txt and data.csv (as per example) - In the folder, subfolders of csv files - Uploads each folder of csv files as a separate deposit - Metadata defined in data.csv per subfolder --- fill_template.py | 19 +++++++------ run.sh | 11 +++++--- upload_to_zenodo.py | 66 ++++++++++++++++++++++++++++----------------- 3 files changed, 59 insertions(+), 37 deletions(-) mode change 100644 => 100755 run.sh diff --git a/fill_template.py b/fill_template.py index 891a172..a20333f 100644 --- a/fill_template.py +++ b/fill_template.py @@ -1,6 +1,7 @@ import sys import csv import os.path +import os def fill_template(template_filename, data_filename): @@ -14,32 +15,34 @@ def fill_template(template_filename, data_filename): for row in csv_reader: output_filename = row["FILENAME"] print("Writing %s..." % output_filename) - + # Performing replacements filled_template = template for column in csv_reader.fieldnames: if column != "FILENAME": filled_template = filled_template.replace("{%s}" % column, row[column]) print((" {%s} = %s" % (column, row[column])).encode().decode('cp850')) - + # Write to output file - with open(output_filename, "w", encoding='utf-8') as output_file: + + filepath = os.path.join(os.path.dirname(data_filename), output_filename) + with open(filepath, "w", encoding='utf-8') as output_file: output_file.write(filled_template) - - + + if __name__ == "__main__": if len(sys.argv) != 3: print("Usage: fill_template.py ") exit() - + template_filename = sys.argv[1] # e.g. "template.json" if not os.path.isfile(template_filename): print("Invalid template filename.") exit() - + data_filename = sys.argv[2] # e.g. "data.csv" if not os.path.isfile(data_filename): print("Invalid data filename.") exit() - + fill_template(template_filename, data_filename) \ No newline at end of file diff --git a/run.sh b/run.sh old mode 100644 new mode 100755 index e49fe5e..828b6c5 --- a/run.sh +++ b/run.sh @@ -1,9 +1,12 @@ #!/bin/bash -python ./fill_template.py ./template.txt data.csv +DATA_FOLDER=./data + +python ./fill_template.py $DATA_FOLDER/template.txt $DATA_FOLDER/data.csv echo Copy the .pdf files to the folder of .json files. -read -rsp $'Press enter to continue...\n' +# read -rs $'Press enter to continue...\n' -TOKEN=INSERT_YOUR_ZENODO_TOKEN_HERE -python ./upload_to_zenodo.py $TOKEN . \ No newline at end of file +TOKEN=`cat .token2` +export TOKEN +python ./upload_to_zenodo.py $TOKEN ./$DATA_FOLDER diff --git a/upload_to_zenodo.py b/upload_to_zenodo.py index f09c627..28e805e 100644 --- a/upload_to_zenodo.py +++ b/upload_to_zenodo.py @@ -5,56 +5,72 @@ import codecs BASE_URL = "https://sandbox.zenodo.org" # TODO: once you are sure about what you are doing, remove the "sandbox." part -TOKEN = "" +TOKEN = os.getenv('TOKEN') +print(TOKEN) -def upload(metadata, pdf_path): +def upload(metadata, directory): if not _is_valid_json(metadata): return - # Create new paper submission - url = "{base_url}/api/deposit/depositions/?access_token={token}".format(base_url=BASE_URL, token=TOKEN) + # Create new submission + url = "{base_url}/api/deposit/depositions".format(base_url=BASE_URL) headers = {"Content-Type": "application/json"} - response = requests.post(url, data=metadata, headers=headers) + params = {'access_token': TOKEN} + response = requests.post(url, params=params, json={}, headers=headers) #print(response.text) if response.status_code > 210: print("Error happened during submission, status code: " + str(response.status_code)) return # Get the submission ID - submission_id = json.loads(response.text)["id"] + submission_id = response.json()["id"] + bucket_url = response.json()["links"]["bucket"] + + # Add metadata + response = requests.put('{base_url}/api/deposit/depositions/{id}'.format(base_url=BASE_URL, id=submission_id), + params=params, + data=metadata, + headers=headers) + if response.status_code > 210: + print("Error happened during metadata upload, status code: " + str(response.status_code)) + print(response.json()) + return + + for csv_file in os.listdir(directory): + filepath = os.path.join(directory, csv_file) + with open(filepath, "rb") as fp: + response = requests.put( + "%s/%s" % (bucket_url, csv_file), + data=fp, + params=params, + ) # Upload the file - url = "{base_url}/api/deposit/depositions/{id}/files?access_token={token}".format(base_url=BASE_URL, id=str(submission_id), token=TOKEN) - upload_metadata = {'filename': 'paper.pdf'} - files = {'file': open(pdf_path, 'rb')} - response = requests.post(url, data=upload_metadata, files=files) - #print(response.text) if response.status_code > 210: print("Error happened during file upload, status code: " + str(response.status_code)) return - - print("{file} submitted with submission ID = {id} (DOI: 10.5281/zenodo.{id})".format(file=pdf_path,id=submission_id)) + + print("{file} submitted with submission ID = {id} (DOI: 10.5281/zenodo.{id})".format(file=directory,id=submission_id)) # The submission needs an additional "Publish" step. This can also be done from a script, but to be on the safe side, it is not included. (The attached file cannot be changed after publication.) - - + + def batch_upload(directory): for metadata_file in os.listdir(directory): metadata_file = os.path.join(directory, metadata_file) if metadata_file.endswith(".json"): - pdf_file = metadata_file.replace(".json",".pdf") - if os.path.isfile(pdf_file): - print("Uploading %s & %s" % (metadata_file, pdf_file)) + filename = metadata_file.replace(".json","") + if os.path.isdir(filename): + print("Uploading %s & %s" % (metadata_file, filename)) with codecs.open(metadata_file, 'r', 'utf-8') as f: metadata = f.read() # Re-encoding in order to support UTF-8 inputs metadata_json = json.loads(metadata) metadata = json.dumps(metadata_json, ensure_ascii=True) - #print(metadata) - upload(metadata, pdf_file) + upload(metadata, filename) else: - print("The file %s might be a submission metadata file, but %s does not exist." % (metadata_file, pdf_file)) - - + print("The file %s might be a submission metadata file, but %s does not exist." % (metadata_file, filename)) + + def _is_valid_json(text): try: json.loads(text) @@ -69,11 +85,11 @@ def _is_valid_json(text): print("Usage: upload_to_zenodo.py ") print(" The directory contains .json metadata descriptors and .pdf files.") exit() - + TOKEN = sys.argv[1] directory = sys.argv[2] if not os.path.isdir(directory): print("Invalid directory.") exit() - + batch_upload(directory) \ No newline at end of file