Batch upload folders to separate deposits

- Expects a folder containing template.txt and data.csv (as per example) - In the folder, subfolders of csv files - Uploads each folder of csv files as a separate deposit - Metadata defined in data.csv per subfolder
ClimateCompatibleGrowth · Mar 18, 2021 · 8b26047 · 8b26047
1 parent 5ccdbb5
commit 8b26047
Show file tree

Hide file tree

Showing 3 changed files with 59 additions and 37 deletions.
diff --git a/fill_template.py b/fill_template.py
@@ -1,6 +1,7 @@
 import sys
 import csv
 import os.path
+import os
 
 
 def fill_template(template_filename, data_filename):
@@ -14,32 +15,34 @@ def fill_template(template_filename, data_filename):
         for row in csv_reader:
             output_filename = row["FILENAME"]
             print("Writing %s..." % output_filename)
-            
+
             # Performing replacements
             filled_template = template
             for column in csv_reader.fieldnames:
                 if column != "FILENAME":
                     filled_template = filled_template.replace("{%s}" % column, row[column])
                     print(("    {%s} = %s" % (column, row[column])).encode().decode('cp850'))
-                    
+
             # Write to output file
-            with open(output_filename, "w", encoding='utf-8') as output_file:
+
+            filepath = os.path.join(os.path.dirname(data_filename), output_filename)
+            with open(filepath, "w", encoding='utf-8') as output_file:
                 output_file.write(filled_template)
-                
-                
+
+
 if __name__ == "__main__":
     if len(sys.argv) != 3:
         print("Usage: fill_template.py <template_filename> <data_filename>")
         exit()
-    
+
     template_filename = sys.argv[1] # e.g. "template.json"
     if not os.path.isfile(template_filename):
         print("Invalid template filename.")
         exit()
-    
+
     data_filename = sys.argv[2] # e.g. "data.csv"
     if not os.path.isfile(data_filename):
         print("Invalid data filename.")
         exit()
-    
+
     fill_template(template_filename, data_filename)
diff --git a/run.sh b/run.sh
@@ -1,9 +1,12 @@
 #!/bin/bash
 
-python ./fill_template.py ./template.txt data.csv
+DATA_FOLDER=./data
+
+python ./fill_template.py $DATA_FOLDER/template.txt $DATA_FOLDER/data.csv
 
 echo Copy the .pdf files to the folder of .json files.
-read -rsp $'Press enter to continue...\n'
+# read -rs $'Press enter to continue...\n'
 
-TOKEN=INSERT_YOUR_ZENODO_TOKEN_HERE
-python ./upload_to_zenodo.py $TOKEN .
+TOKEN=`cat .token2`
+export TOKEN
+python ./upload_to_zenodo.py $TOKEN ./$DATA_FOLDER
diff --git a/upload_to_zenodo.py b/upload_to_zenodo.py
@@ -5,56 +5,72 @@
 import codecs
 
 BASE_URL = "https://sandbox.zenodo.org" # TODO: once you are sure about what you are doing, remove the "sandbox." part
-TOKEN = ""
+TOKEN = os.getenv('TOKEN')
+print(TOKEN)
 
-def upload(metadata, pdf_path):
+def upload(metadata, directory):
     if not _is_valid_json(metadata):
         return
 
-    # Create new paper submission
-    url = "{base_url}/api/deposit/depositions/?access_token={token}".format(base_url=BASE_URL, token=TOKEN)
+    # Create new submission
+    url = "{base_url}/api/deposit/depositions".format(base_url=BASE_URL)
     headers = {"Content-Type": "application/json"}
-    response = requests.post(url, data=metadata, headers=headers)
+    params = {'access_token': TOKEN}
+    response = requests.post(url, params=params, json={}, headers=headers)
     #print(response.text)
     if response.status_code > 210:
         print("Error happened during submission, status code: " + str(response.status_code))
         return
 
     # Get the submission ID
-    submission_id = json.loads(response.text)["id"]
+    submission_id = response.json()["id"]
+    bucket_url = response.json()["links"]["bucket"]
+
+    # Add metadata
+    response = requests.put('{base_url}/api/deposit/depositions/{id}'.format(base_url=BASE_URL, id=submission_id),
+                            params=params,
+                            data=metadata,
+                            headers=headers)
+    if response.status_code > 210:
+        print("Error happened during metadata upload, status code: " + str(response.status_code))
+        print(response.json())
+        return
+
+    for csv_file in os.listdir(directory):
+        filepath = os.path.join(directory, csv_file)
+        with open(filepath, "rb") as fp:
+            response = requests.put(
+                                    "%s/%s" % (bucket_url, csv_file),
+                                    data=fp,
+                                    params=params,
+                                )
 
     # Upload the file
-    url = "{base_url}/api/deposit/depositions/{id}/files?access_token={token}".format(base_url=BASE_URL, id=str(submission_id), token=TOKEN)
-    upload_metadata = {'filename': 'paper.pdf'}
-    files = {'file': open(pdf_path, 'rb')}
-    response = requests.post(url, data=upload_metadata, files=files)
-    #print(response.text)
     if response.status_code > 210:
         print("Error happened during file upload, status code: " + str(response.status_code))
         return
-    
-    print("{file} submitted with submission ID = {id} (DOI: 10.5281/zenodo.{id})".format(file=pdf_path,id=submission_id))    
+
+    print("{file} submitted with submission ID = {id} (DOI: 10.5281/zenodo.{id})".format(file=directory,id=submission_id))
     # The submission needs an additional "Publish" step. This can also be done from a script, but to be on the safe side, it is not included. (The attached file cannot be changed after publication.)
-    
-    
+
+
 def batch_upload(directory):
     for metadata_file in os.listdir(directory):
         metadata_file = os.path.join(directory, metadata_file)
         if metadata_file.endswith(".json"):
-            pdf_file = metadata_file.replace(".json",".pdf")
-            if os.path.isfile(pdf_file):
-                print("Uploading %s & %s" % (metadata_file, pdf_file))
+            filename = metadata_file.replace(".json","")
+            if os.path.isdir(filename):
+                print("Uploading %s & %s" % (metadata_file, filename))
                 with codecs.open(metadata_file, 'r', 'utf-8') as f:
                     metadata = f.read()
                     # Re-encoding in order to support UTF-8 inputs
                     metadata_json = json.loads(metadata)
                     metadata = json.dumps(metadata_json, ensure_ascii=True)
-                    #print(metadata)
-                upload(metadata, pdf_file)
+                upload(metadata, filename)
             else:
-                print("The file %s might be a submission metadata file, but %s does not exist." % (metadata_file, pdf_file))
-           
-           
+                print("The file %s might be a submission metadata file, but %s does not exist." % (metadata_file, filename))
+
+
 def _is_valid_json(text):
     try:
         json.loads(text)
@@ -69,11 +85,11 @@ def _is_valid_json(text):
         print("Usage: upload_to_zenodo.py <token> <directory>")
         print("  The directory contains .json metadata descriptors and .pdf files.")
         exit()
-    
+
     TOKEN = sys.argv[1]
     directory = sys.argv[2]
     if not os.path.isdir(directory):
         print("Invalid directory.")
         exit()
-   
+
     batch_upload(directory)