From 74a82fc0e163717f301b3c551b558decd8dbd200 Mon Sep 17 00:00:00 2001 From: zoidy Date: Sat, 28 Oct 2023 03:44:36 +0000 Subject: [PATCH 1/3] Add CC0 license to collections, improve error handling, avoid inconsistent data when collection package exists --- figshare/Collection.py | 39 +++++++++++++++++++++++++-------------- figshare/Integration.py | 7 ++++++- 2 files changed, 31 insertions(+), 15 deletions(-) diff --git a/figshare/Collection.py b/figshare/Collection.py index fbd3b41..14b0e08 100644 --- a/figshare/Collection.py +++ b/figshare/Collection.py @@ -1,4 +1,5 @@ import json +import shutil import os import requests import hashlib @@ -251,6 +252,10 @@ def process_collections(self, collections): author_name = re.sub("[^A-Za-z0-9]", "_", version['authors'][0]['full_name']) folder_name = str(collection) + "_" + version_no + "_" + author_name + "_" + version_md5 + "/" + version_no + "/METADATA" version["articles"] = articles + + # Collections don't have an explicit license. Make them CC0 + version["license"] = json.loads('{"value": 2,"name": "CC0","url": "https://creativecommons.org/publicdomain/zero/1.0/"}') + self.logs.write_log_in_file("info", f"------- Processing collection {collection} version {version['version']}.", True) self.__save_json_in_metadata(collection, version, folder_name) collection_preservation_path = self.preservation_storage_location + os.path.basename(os.path.dirname(os.path.dirname(folder_name))) @@ -272,20 +277,17 @@ def __save_json_in_metadata(self, collection_id, version_data, folder_name): self.article_obj.check_access_of_directories(preservation_storage_location, "preservation") complete_path = preservation_storage_location + folder_name - check_path_exists = os.path.exists(complete_path) - if (check_path_exists is False): - os.makedirs(complete_path, exist_ok=True) - json_data = json.dumps(version_data, indent=4) - filename_path = complete_path + "/" + str(collection_id) + ".json" - # Writing to json file - with open(filename_path, "w") as outfile: - outfile.write(json_data) - self.logs.write_log_in_file("info", "Saved collection data in json.", True) - else: - storage_collection_version_dir = os.listdir(complete_path) - file_name = f"{str(collection_id)}.json" - if (len(storage_collection_version_dir) == 0 or file_name not in storage_collection_version_dir): - self.logs.write_log_in_file("warning", f"{complete_path} path already exists but missing {file_name} file.") + if (os.path.exists(complete_path)): + self.delete_folder(complete_path) + + os.makedirs(complete_path, exist_ok=True) + json_data = json.dumps(version_data, indent=4) + filename_path = complete_path + "/" + str(collection_id) + ".json" + # Writing to json file + with open(filename_path, "w") as outfile: + outfile.write(json_data) + self.logs.write_log_in_file("info", "Saved collection data in json.", True) + def get_collection_api_url(self): collections_api_url = self.api_endpoint + '/collections' @@ -313,3 +315,12 @@ def fetch_by_collection_id(self): retries = self.article_obj.retries_if_error(e, 500, retries) if (retries > self.retries): break + + """ + Delete folder + """ + def delete_folder(self, folder_path): + check_exists = os.path.exists(folder_path) + if (check_exists is True): + shutil.rmtree(folder_path) + self.logs.write_log_in_file("info", f"Deleted {folder_path}", True) \ No newline at end of file diff --git a/figshare/Integration.py b/figshare/Integration.py index 6d2f7ca..406695a 100644 --- a/figshare/Integration.py +++ b/figshare/Integration.py @@ -101,7 +101,12 @@ def post_process_script_function(self, *args): bagger.run_dart(Path(args.path, _path)) else: self._rebachlogger.write_log_in_file("info", f"Processing preservation package '{preservation_package_name}' ", True) - status = bagger.run_dart(args.path) + try: + status = bagger.run_dart(args.path) + except Exception as e: + status = Status(1) + self._rebachlogger.write_log_in_file("error", f"bagger: {e.__class__.__name__}: {str(e)}.", True) + self._rebachlogger.write_log_in_file("info", f"Status: {status.name}.", True) self._rebachlogger.write_log_in_file("info", f"Exit code: {status}.", True) if (status == 0): From f26957dfae4dc787a2fb393fc5b4102c6d8f0cae Mon Sep 17 00:00:00 2001 From: zoidy Date: Sat, 28 Oct 2023 03:50:02 +0000 Subject: [PATCH 2/3] lint --- figshare/Collection.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/figshare/Collection.py b/figshare/Collection.py index 14b0e08..bb1aa4a 100644 --- a/figshare/Collection.py +++ b/figshare/Collection.py @@ -323,4 +323,4 @@ def delete_folder(self, folder_path): check_exists = os.path.exists(folder_path) if (check_exists is True): shutil.rmtree(folder_path) - self.logs.write_log_in_file("info", f"Deleted {folder_path}", True) \ No newline at end of file + self.logs.write_log_in_file("info", f"Deleted {folder_path}", True) From 7872ab605ca24e2029078f584ebefcaa123c51c0 Mon Sep 17 00:00:00 2001 From: zoidy Date: Sat, 28 Oct 2023 03:51:08 +0000 Subject: [PATCH 3/3] lint --- figshare/Collection.py | 1 - 1 file changed, 1 deletion(-) diff --git a/figshare/Collection.py b/figshare/Collection.py index bb1aa4a..b1433e6 100644 --- a/figshare/Collection.py +++ b/figshare/Collection.py @@ -288,7 +288,6 @@ def __save_json_in_metadata(self, collection_id, version_data, folder_name): outfile.write(json_data) self.logs.write_log_in_file("info", "Saved collection data in json.", True) - def get_collection_api_url(self): collections_api_url = self.api_endpoint + '/collections' if self.api_endpoint[-1] == "/":