Skip to content

Commit

Permalink
Bulkk upload bugfixes and enhancements.
Browse files Browse the repository at this point in the history
Add option to launch web browser when using `deriva-globus-auth-utils logout` to complete Globus logout flow at https://app.globus.org/logout.
  • Loading branch information
mikedarcy committed Apr 7, 2023
1 parent 1dc4e0c commit f5441d8
Show file tree
Hide file tree
Showing 4 changed files with 74 additions and 15 deletions.
20 changes: 17 additions & 3 deletions deriva/core/utils/globus_auth_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import importlib
import datetime
import tzlocal
import webbrowser
from pprint import pprint
from requests.exceptions import HTTPError, ConnectionError
from bdbag.fetch.auth import keychain as bdbkc
Expand All @@ -24,7 +25,7 @@
PUBLIC_GROUPS_API_URL = 'https://groups.api.globus.org/v2/groups/my_groups'
CLIENT_CRED_FILE = '/home/secrets/oauth2/client_secret_globus.json'
DEFAULT_SCOPES = ["openid", GROUPS_SCOPE_NAME]

LOGOUT_URL = "https://app.globus.org/logout"

class UsageException(ValueError):
"""Usage exception.
Expand Down Expand Up @@ -710,7 +711,8 @@ def login(self,
self.update_bdbag_keychain(token=access_token, host=host, keychain_file=bdbag_keychain_file)
return tokens

def logout(self, hosts=(), requested_scopes=(), exclude_defaults=False, bdbag_keychain_file=None):
def logout(self, hosts=(), requested_scopes=(), exclude_defaults=False, bdbag_keychain_file=None,
include_browser_logout=False):
tokens = self.client._load_raw_tokens()

scopes = set(requested_scopes)
Expand Down Expand Up @@ -738,6 +740,14 @@ def logout(self, hosts=(), requested_scopes=(), exclude_defaults=False, bdbag_ke
self.client.revoke_token_set(token_set)
self.client.token_storage.clear_tokens(scopes)

if include_browser_logout:
if bool(os.environ.get('SSH_TTY') or os.environ.get('SSH_CONNECTION')):
print("Browser logout flow not available with remote session. Visit %s to logout of Globus completely."
% LOGOUT_URL)
return

webbrowser.open(LOGOUT_URL, new=1)


class DerivaGlobusAuthUtilCLIException(Exception):
def __init__(self, message):
Expand Down Expand Up @@ -1079,7 +1089,8 @@ def logout(args):
self.gnl.logout(hosts=[args.host] if args.host else [],
requested_scopes=args.requested_scopes,
exclude_defaults=args.exclude_defaults,
bdbag_keychain_file=args.bdbag_keychain_file)
bdbag_keychain_file=args.bdbag_keychain_file,
include_browser_logout=args.full)
return "You have been logged out."

parser = self.subparsers.add_parser("logout", help="Revoke and clear tokens. If no arguments are specified, "
Expand All @@ -1099,6 +1110,9 @@ def logout(args):
"default set of scopes: [%s]" % ", ".join(DEFAULT_SCOPES))
parser.add_argument('--bdbag-keychain-file', metavar='<file>',
help="Non-default path to a bdbag keychain file.")
parser.add_argument('--full', action="store_true",
help="Also launch the default system web browser to logout of Globus and clear cached "
"identity-related browser cookies.")
parser.set_defaults(func=logout)

def user_info_init(self):
Expand Down
35 changes: 24 additions & 11 deletions deriva/transfer/upload/deriva_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import os
import re
import sys
import datetime
import errno
import json
import shutil
Expand Down Expand Up @@ -33,7 +34,8 @@ class Enum(tuple):
UploadState = Enum(["Success", "Failed", "Pending", "Running", "Paused", "Aborted", "Cancelled", "Timeout"])
FileUploadState = namedtuple("FileUploadState", ["State", "Status"])
UploadMetadataReservedKeyNames = ["URI", "file_name", "file_ext", "file_size", "content-disposition", "md5", "sha256",
"md5_base64", "sha256_base64", "schema", "table", "target_table"]
"md5_base64", "sha256_base64", "schema", "table", "target_table", "_upload_year_",
"_upload_month_", "_upload_day_", "_upload_time_"]

DefaultConfig = {
"version_compatibility": [">=%s" % VERSION],
Expand Down Expand Up @@ -202,9 +204,9 @@ def setConfig(self, config_file):
else:
self._update_internal_config(read_config(config_file))
if not self.isVersionCompatible():
raise DerivaRestoreError("Upload version incompatibility detected",
"Current version: [%s], required version(s): %s." %
(self.getVersion(), self.getVersionCompatibility()))
raise RuntimeError("Upload version incompatibility detected",
"Current version: [%s], required version(s): %s." %
(self.getVersion(), self.getVersionCompatibility()))
@classmethod
def getDefaultServer(cls):
servers = cls.getServers()
Expand Down Expand Up @@ -258,7 +260,7 @@ def isFileNewer(src, dst):
if not (os.path.isfile(src) and os.path.isfile(dst)):
return False

# This comparison wont work with PyInstaller single-file bundles because the bundle is extracted to a temp dir
# This comparison won't work with PyInstaller single-file bundles because the bundle is extracted to a temp dir
# and every timestamp for every file in the bundle is reset to the bundle extraction/creation time.
if getattr(sys, 'frozen', False):
prefix = os.path.sep + "_MEI"
Expand Down Expand Up @@ -578,7 +580,7 @@ def _uploadAsset(self, file_path, asset_mapping, match_groupdict, callback=None)
self._queryFileMetadata(asset_mapping)

# 5. If "create_record_before_upload" specified in asset_mapping, check for an existing record, creating a new
# one if necessary. Otherwise delay this logic until after the file upload.
# one if necessary. Otherwise, delay this logic until after the file upload.
record = None
if stob(asset_mapping.get("create_record_before_upload", False)):
record = self._getFileRecord(asset_mapping)
Expand All @@ -599,7 +601,12 @@ def _uploadAsset(self, file_path, asset_mapping, match_groupdict, callback=None)
allow_versioning=stob(hatrac_options.get("allow_versioning", True)),
callback=callback)
logger.debug("Hatrac upload successful. Result object URI: %s" % versioned_uri)
if stob(hatrac_options.get("versioned_uris", True)):
versioned_uris = True
if "versioned_uris" in hatrac_options:
versioned_uris = stob(hatrac_options.get("versioned_uris", True))
if "versioned_urls" in hatrac_options:
versioned_uris = stob(hatrac_options.get("versioned_urls", True))
if versioned_uris:
self.metadata["URI"] = versioned_uri
else:
self.metadata["URI"] = versioned_uri.rsplit(":")[0]
Expand Down Expand Up @@ -694,6 +701,12 @@ def _initFileMetadata(self, file_path, asset_mapping, match_groupdict):
self.metadata["file_name"] = self.getFileDisplayName(file_path)
self.metadata["file_size"] = self.getFileSize(file_path)

time = datetime.datetime.now()
self.metadata["_upload_year_"] = time.year
self.metadata["_upload_month_"] = time.month
self.metadata["_upload_day_"] = time.day
self.metadata["_upload_time_"] = time.timestamp()

self._urlEncodeMetadata(asset_mapping.get("url_encoding_safe_overrides"))

def _updateFileMetadata(self, src, strict=False, no_overwrite=False):
Expand All @@ -707,7 +720,7 @@ def _updateFileMetadata(self, src, strict=False, no_overwrite=False):
"ignoring value: %s " % (k, src[k]))
del dst[k]
continue
# dont overwrite any existing metadata field
# don't overwrite any existing metadata field
if no_overwrite:
if k in self.metadata:
del dst[k]
Expand Down Expand Up @@ -1021,8 +1034,8 @@ def find_file_in_dir_hierarchy(filename, path):
for root, dirs, files in walk(path, followlinks=True):
if filename in files:
# found a file
found_path = os.path.join(root, filename)
file_paths.add(os.path.realpath(found_path))
found_path = os.path.normcase(os.path.join(root, filename))
file_paths.add(os.path.normcase(os.path.realpath(found_path)))
continue

# Next, ascend from the base path looking for the same filename in all parent dirs
Expand All @@ -1033,7 +1046,7 @@ def find_file_in_dir_hierarchy(filename, path):
break
for entry in scandir(parent):
if (entry.name == filename) and entry.is_file():
file_paths.add(os.path.realpath(entry.path))
file_paths.add(os.path.normcase(os.path.realpath(os.path.normcase(entry.path))))
current = parent

return file_paths
Expand Down
4 changes: 3 additions & 1 deletion deriva/transfer/upload/processors/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@
from deriva.transfer.upload import DerivaUploadConfigurationError
from deriva.transfer.upload.processors.base_processor import BaseProcessor
from deriva.transfer.upload.processors.logging_processor import LoggingProcessor
from deriva.transfer.upload.processors.metadata_update_processor import MetadataUpdateProcessor

DEFAULT_PROCESSORS = {
"LoggingProcessor": LoggingProcessor
"LoggingProcessor": LoggingProcessor,
"MetadataProcessor": MetadataUpdateProcessor
}


Expand Down
30 changes: 30 additions & 0 deletions deriva/transfer/upload/processors/metadata_update_processor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import json
import logging
from deriva.transfer.upload.processors import BaseProcessor

logger = logging.getLogger(__name__)


class MetadataUpdateProcessor(BaseProcessor):
"""
Upload processor used to load external JSON into a dict for use as upload metadata.
"""
def __init__(self, **kwargs):
super(MetadataUpdateProcessor, self).__init__(**kwargs)
self.metadata = kwargs.get("metadata", dict()) or dict

def process(self):
input_file = self.parameters.get("input_file")
if not input_file:
return

with open(input_file, "r") as input_data:
data = json.load(input_data)
if not isinstance(data, dict):
logger.warning("Type mismatch: expected dict object from loaded JSON file: %s" % input_file)
for key in data.keys():
if key in self.metadata:
logger.warning(
"Duplicate key '%s' encountered in metadata input file [%s] existing key will be "
"overwritten." % (key, input_file))
self.metadata.update(data)

0 comments on commit f5441d8

Please sign in to comment.