From f5441d89a77b92d779fd78e55ed58e8886be6545 Mon Sep 17 00:00:00 2001 From: mikedarcy Date: Fri, 7 Apr 2023 14:11:42 -0700 Subject: [PATCH] Bulkk upload bugfixes and enhancements. Add option to launch web browser when using `deriva-globus-auth-utils logout` to complete Globus logout flow at https://app.globus.org/logout. --- deriva/core/utils/globus_auth_utils.py | 20 +++++++++-- deriva/transfer/upload/deriva_upload.py | 35 +++++++++++++------ deriva/transfer/upload/processors/__init__.py | 4 ++- .../processors/metadata_update_processor.py | 30 ++++++++++++++++ 4 files changed, 74 insertions(+), 15 deletions(-) create mode 100644 deriva/transfer/upload/processors/metadata_update_processor.py diff --git a/deriva/core/utils/globus_auth_utils.py b/deriva/core/utils/globus_auth_utils.py index 5c4d61d6..ed85476b 100644 --- a/deriva/core/utils/globus_auth_utils.py +++ b/deriva/core/utils/globus_auth_utils.py @@ -8,6 +8,7 @@ import importlib import datetime import tzlocal +import webbrowser from pprint import pprint from requests.exceptions import HTTPError, ConnectionError from bdbag.fetch.auth import keychain as bdbkc @@ -24,7 +25,7 @@ PUBLIC_GROUPS_API_URL = 'https://groups.api.globus.org/v2/groups/my_groups' CLIENT_CRED_FILE = '/home/secrets/oauth2/client_secret_globus.json' DEFAULT_SCOPES = ["openid", GROUPS_SCOPE_NAME] - +LOGOUT_URL = "https://app.globus.org/logout" class UsageException(ValueError): """Usage exception. @@ -710,7 +711,8 @@ def login(self, self.update_bdbag_keychain(token=access_token, host=host, keychain_file=bdbag_keychain_file) return tokens - def logout(self, hosts=(), requested_scopes=(), exclude_defaults=False, bdbag_keychain_file=None): + def logout(self, hosts=(), requested_scopes=(), exclude_defaults=False, bdbag_keychain_file=None, + include_browser_logout=False): tokens = self.client._load_raw_tokens() scopes = set(requested_scopes) @@ -738,6 +740,14 @@ def logout(self, hosts=(), requested_scopes=(), exclude_defaults=False, bdbag_ke self.client.revoke_token_set(token_set) self.client.token_storage.clear_tokens(scopes) + if include_browser_logout: + if bool(os.environ.get('SSH_TTY') or os.environ.get('SSH_CONNECTION')): + print("Browser logout flow not available with remote session. Visit %s to logout of Globus completely." + % LOGOUT_URL) + return + + webbrowser.open(LOGOUT_URL, new=1) + class DerivaGlobusAuthUtilCLIException(Exception): def __init__(self, message): @@ -1079,7 +1089,8 @@ def logout(args): self.gnl.logout(hosts=[args.host] if args.host else [], requested_scopes=args.requested_scopes, exclude_defaults=args.exclude_defaults, - bdbag_keychain_file=args.bdbag_keychain_file) + bdbag_keychain_file=args.bdbag_keychain_file, + include_browser_logout=args.full) return "You have been logged out." parser = self.subparsers.add_parser("logout", help="Revoke and clear tokens. If no arguments are specified, " @@ -1099,6 +1110,9 @@ def logout(args): "default set of scopes: [%s]" % ", ".join(DEFAULT_SCOPES)) parser.add_argument('--bdbag-keychain-file', metavar='', help="Non-default path to a bdbag keychain file.") + parser.add_argument('--full', action="store_true", + help="Also launch the default system web browser to logout of Globus and clear cached " + "identity-related browser cookies.") parser.set_defaults(func=logout) def user_info_init(self): diff --git a/deriva/transfer/upload/deriva_upload.py b/deriva/transfer/upload/deriva_upload.py index bb906b4b..bac5b53d 100644 --- a/deriva/transfer/upload/deriva_upload.py +++ b/deriva/transfer/upload/deriva_upload.py @@ -2,6 +2,7 @@ import os import re import sys +import datetime import errno import json import shutil @@ -33,7 +34,8 @@ class Enum(tuple): UploadState = Enum(["Success", "Failed", "Pending", "Running", "Paused", "Aborted", "Cancelled", "Timeout"]) FileUploadState = namedtuple("FileUploadState", ["State", "Status"]) UploadMetadataReservedKeyNames = ["URI", "file_name", "file_ext", "file_size", "content-disposition", "md5", "sha256", - "md5_base64", "sha256_base64", "schema", "table", "target_table"] + "md5_base64", "sha256_base64", "schema", "table", "target_table", "_upload_year_", + "_upload_month_", "_upload_day_", "_upload_time_"] DefaultConfig = { "version_compatibility": [">=%s" % VERSION], @@ -202,9 +204,9 @@ def setConfig(self, config_file): else: self._update_internal_config(read_config(config_file)) if not self.isVersionCompatible(): - raise DerivaRestoreError("Upload version incompatibility detected", - "Current version: [%s], required version(s): %s." % - (self.getVersion(), self.getVersionCompatibility())) + raise RuntimeError("Upload version incompatibility detected", + "Current version: [%s], required version(s): %s." % + (self.getVersion(), self.getVersionCompatibility())) @classmethod def getDefaultServer(cls): servers = cls.getServers() @@ -258,7 +260,7 @@ def isFileNewer(src, dst): if not (os.path.isfile(src) and os.path.isfile(dst)): return False - # This comparison wont work with PyInstaller single-file bundles because the bundle is extracted to a temp dir + # This comparison won't work with PyInstaller single-file bundles because the bundle is extracted to a temp dir # and every timestamp for every file in the bundle is reset to the bundle extraction/creation time. if getattr(sys, 'frozen', False): prefix = os.path.sep + "_MEI" @@ -578,7 +580,7 @@ def _uploadAsset(self, file_path, asset_mapping, match_groupdict, callback=None) self._queryFileMetadata(asset_mapping) # 5. If "create_record_before_upload" specified in asset_mapping, check for an existing record, creating a new - # one if necessary. Otherwise delay this logic until after the file upload. + # one if necessary. Otherwise, delay this logic until after the file upload. record = None if stob(asset_mapping.get("create_record_before_upload", False)): record = self._getFileRecord(asset_mapping) @@ -599,7 +601,12 @@ def _uploadAsset(self, file_path, asset_mapping, match_groupdict, callback=None) allow_versioning=stob(hatrac_options.get("allow_versioning", True)), callback=callback) logger.debug("Hatrac upload successful. Result object URI: %s" % versioned_uri) - if stob(hatrac_options.get("versioned_uris", True)): + versioned_uris = True + if "versioned_uris" in hatrac_options: + versioned_uris = stob(hatrac_options.get("versioned_uris", True)) + if "versioned_urls" in hatrac_options: + versioned_uris = stob(hatrac_options.get("versioned_urls", True)) + if versioned_uris: self.metadata["URI"] = versioned_uri else: self.metadata["URI"] = versioned_uri.rsplit(":")[0] @@ -694,6 +701,12 @@ def _initFileMetadata(self, file_path, asset_mapping, match_groupdict): self.metadata["file_name"] = self.getFileDisplayName(file_path) self.metadata["file_size"] = self.getFileSize(file_path) + time = datetime.datetime.now() + self.metadata["_upload_year_"] = time.year + self.metadata["_upload_month_"] = time.month + self.metadata["_upload_day_"] = time.day + self.metadata["_upload_time_"] = time.timestamp() + self._urlEncodeMetadata(asset_mapping.get("url_encoding_safe_overrides")) def _updateFileMetadata(self, src, strict=False, no_overwrite=False): @@ -707,7 +720,7 @@ def _updateFileMetadata(self, src, strict=False, no_overwrite=False): "ignoring value: %s " % (k, src[k])) del dst[k] continue - # dont overwrite any existing metadata field + # don't overwrite any existing metadata field if no_overwrite: if k in self.metadata: del dst[k] @@ -1021,8 +1034,8 @@ def find_file_in_dir_hierarchy(filename, path): for root, dirs, files in walk(path, followlinks=True): if filename in files: # found a file - found_path = os.path.join(root, filename) - file_paths.add(os.path.realpath(found_path)) + found_path = os.path.normcase(os.path.join(root, filename)) + file_paths.add(os.path.normcase(os.path.realpath(found_path))) continue # Next, ascend from the base path looking for the same filename in all parent dirs @@ -1033,7 +1046,7 @@ def find_file_in_dir_hierarchy(filename, path): break for entry in scandir(parent): if (entry.name == filename) and entry.is_file(): - file_paths.add(os.path.realpath(entry.path)) + file_paths.add(os.path.normcase(os.path.realpath(os.path.normcase(entry.path)))) current = parent return file_paths diff --git a/deriva/transfer/upload/processors/__init__.py b/deriva/transfer/upload/processors/__init__.py index 150e210b..703aaabe 100644 --- a/deriva/transfer/upload/processors/__init__.py +++ b/deriva/transfer/upload/processors/__init__.py @@ -4,9 +4,11 @@ from deriva.transfer.upload import DerivaUploadConfigurationError from deriva.transfer.upload.processors.base_processor import BaseProcessor from deriva.transfer.upload.processors.logging_processor import LoggingProcessor +from deriva.transfer.upload.processors.metadata_update_processor import MetadataUpdateProcessor DEFAULT_PROCESSORS = { - "LoggingProcessor": LoggingProcessor + "LoggingProcessor": LoggingProcessor, + "MetadataProcessor": MetadataUpdateProcessor } diff --git a/deriva/transfer/upload/processors/metadata_update_processor.py b/deriva/transfer/upload/processors/metadata_update_processor.py new file mode 100644 index 00000000..88b85e4d --- /dev/null +++ b/deriva/transfer/upload/processors/metadata_update_processor.py @@ -0,0 +1,30 @@ +import json +import logging +from deriva.transfer.upload.processors import BaseProcessor + +logger = logging.getLogger(__name__) + + +class MetadataUpdateProcessor(BaseProcessor): + """ + Upload processor used to load external JSON into a dict for use as upload metadata. + """ + def __init__(self, **kwargs): + super(MetadataUpdateProcessor, self).__init__(**kwargs) + self.metadata = kwargs.get("metadata", dict()) or dict + + def process(self): + input_file = self.parameters.get("input_file") + if not input_file: + return + + with open(input_file, "r") as input_data: + data = json.load(input_data) + if not isinstance(data, dict): + logger.warning("Type mismatch: expected dict object from loaded JSON file: %s" % input_file) + for key in data.keys(): + if key in self.metadata: + logger.warning( + "Duplicate key '%s' encountered in metadata input file [%s] existing key will be " + "overwritten." % (key, input_file)) + self.metadata.update(data)