diff --git a/datalad_next/config/__init__.py b/datalad_next/config/__init__.py index aa17805c..37ee453a 100644 --- a/datalad_next/config/__init__.py +++ b/datalad_next/config/__init__.py @@ -1,6 +1,12 @@ """Configuration query and manipulation -This modules imports the central ``ConfigManager`` class from DataLad core. +This modules provides the central ``ConfigManager`` class. + +.. currentmodule:: datalad_next.config +.. autosummary:: + :toctree: generated + + ConfigManager """ from datalad.config import ConfigManager diff --git a/datalad_next/consts/__init__.py b/datalad_next/consts/__init__.py index 37a60f6b..2d2e4876 100644 --- a/datalad_next/consts/__init__.py +++ b/datalad_next/consts/__init__.py @@ -1,4 +1,19 @@ """Common constants + +COPY_BUFSIZE + ``shutil`` buffer size default, with Windows platform default changes + backported from Python 3.10. + +PRE_INIT_COMMIT_SHA + SHA value for ``git hash-object -t tree /dev/null``, i.e. for nothing. + This corresponds to the state of a Git repository before the first commit + is made. + +on_linux + ``True`` if executed on the Linux platform. + +on_windows + ``True`` if executed on the Windows platform. """ # import from "utils", but these really are constants diff --git a/datalad_next/datasets/__init__.py b/datalad_next/datasets/__init__.py index 7c8baf33..4dcdf079 100644 --- a/datalad_next/datasets/__init__.py +++ b/datalad_next/datasets/__init__.py @@ -15,70 +15,35 @@ implements paradigms and behaviors that are no longer common to the rest of the DataLad API. :class:`LegacyGitRepo` and :class:`LegacyAnnexRepo` should no longer be used in new developments, and are not documented here. -""" -from pathlib import Path + +.. currentmodule:: datalad_next.datasets +.. autosummary:: + :toctree: generated + + Dataset + LeanGitRepo + LeanAnnexRepo + LegacyGitRepo + LegacyAnnexRepo +""" from datalad.distribution.dataset import ( Dataset, # this does nothing but provide documentation # only kept here until this command is converted to # pre-call parameter validation + # TODO REMOVE FOR V2.0 EnsureDataset as NoOpEnsureDataset, + # TODO REMOVE FOR V2.0 datasetmethod, + # TODO REMOVE FOR V2.0 resolve_path, ) from datalad.dataset.gitrepo import GitRepo as LeanGitRepo +from datalad.support.gitrepo import GitRepo as LegacyGitRepo from datalad.support.gitrepo import GitRepo as LegacyGitRepo from datalad.support.annexrepo import AnnexRepo as LegacyAnnexRepo - -class LeanAnnexRepo(LegacyAnnexRepo): - """git-annex repository representation with a minimized API - - This is a companion of :class:`LeanGitRepo`. In the same spirit, it - restricts its API to a limited set of method that extend - :class:`LeanGitRepo`. - - """ - #CA .. autosummary:: - - #CA call_annex - #CA call_annex_oneline - #CA call_annex_success - # list of attributes permitted in the "lean" API. This list extends - # the API of LeanGitRepo - # TODO extend whitelist of attributes as necessary - _lean_attrs = [ - #CA # these are the ones we intend to provide - #CA 'call_annex', - #CA 'call_annex_oneline', - #CA 'call_annex_success', - # and here are the ones that we need to permit in order to get them - # to run - '_check_git_version', - #CA '_check_git_annex_version', - # used by AnnexRepo.__init__() -- should be using `is_valid()` - 'is_valid_git', - 'is_valid_annex', - '_is_direct_mode_from_config', - #CA '_call_annex', - #CA 'call_annex_items_', - ] - - # intentionally limiting to just `path` as the only constructor argument - def __new__(cls, path: Path): - for attr in dir(cls): - if not hasattr(LeanGitRepo, attr) \ - and callable(getattr(cls, attr)) \ - and attr not in LeanAnnexRepo._lean_attrs: - setattr(cls, attr, _unsupported_method) - - obj = super(LegacyAnnexRepo, cls).__new__(cls) - - return obj - - -def _unsupported_method(self, *args, **kwargs): - raise NotImplementedError('method unsupported by LeanAnnexRepo') +from .annexrepo import LeanAnnexRepo diff --git a/datalad_next/datasets/annexrepo.py b/datalad_next/datasets/annexrepo.py new file mode 100644 index 00000000..26c51cc0 --- /dev/null +++ b/datalad_next/datasets/annexrepo.py @@ -0,0 +1,54 @@ +from pathlib import Path + +from datalad.dataset.gitrepo import GitRepo as LeanGitRepo +from datalad.support.annexrepo import AnnexRepo as LegacyAnnexRepo + + +class LeanAnnexRepo(LegacyAnnexRepo): + """git-annex repository representation with a minimized API + + This is a companion of :class:`LeanGitRepo`. In the same spirit, it + restricts its API to a limited set of method that extend + :class:`LeanGitRepo`. + + """ + #CA .. autosummary:: + + #CA call_annex + #CA call_annex_oneline + #CA call_annex_success + # list of attributes permitted in the "lean" API. This list extends + # the API of LeanGitRepo + # TODO extend whitelist of attributes as necessary + _lean_attrs = [ + #CA # these are the ones we intend to provide + #CA 'call_annex', + #CA 'call_annex_oneline', + #CA 'call_annex_success', + # and here are the ones that we need to permit in order to get them + # to run + '_check_git_version', + #CA '_check_git_annex_version', + # used by AnnexRepo.__init__() -- should be using `is_valid()` + 'is_valid_git', + 'is_valid_annex', + '_is_direct_mode_from_config', + #CA '_call_annex', + #CA 'call_annex_items_', + ] + + # intentionally limiting to just `path` as the only constructor argument + def __new__(cls, path: Path): + for attr in dir(cls): + if not hasattr(LeanGitRepo, attr) \ + and callable(getattr(cls, attr)) \ + and attr not in LeanAnnexRepo._lean_attrs: + setattr(cls, attr, _unsupported_method) + + obj = super(LegacyAnnexRepo, cls).__new__(cls) + + return obj + + +def _unsupported_method(self, *args, **kwargs): + raise NotImplementedError('method unsupported by LeanAnnexRepo') diff --git a/datalad_next/exceptions/__init__.py b/datalad_next/exceptions/__init__.py index 4ddb74d6..2e523514 100644 --- a/datalad_next/exceptions/__init__.py +++ b/datalad_next/exceptions/__init__.py @@ -1,4 +1,14 @@ -"""All custom exceptions used in datalad-next""" +"""Special purpose exceptions + +.. currentmodule:: datalad_next.exceptions +.. autosummary:: + :toctree: generated + + CapturedException + IncompleteResultsError + NoDatasetFound +""" +# we cannot have CommandError above, sphinx complains # TODO rethink the purpose of this module and possibly # make it about *external* custom exceptions @@ -9,6 +19,7 @@ NoDatasetFound, ) +# TODO REMOVE FOR V2.0 (they are specific to that module from datalad_next.url_operations import ( UrlOperationsRemoteError, UrlOperationsAuthenticationError, diff --git a/datalad_next/runners/__init__.py b/datalad_next/runners/__init__.py index b00b186d..6d68faf3 100644 --- a/datalad_next/runners/__init__.py +++ b/datalad_next/runners/__init__.py @@ -55,12 +55,15 @@ ) # runners +# TODO REMOVE FOR V2.0 from datalad.runner import ( GitRunner, Runner, ) +# TODO REMOVE FOR V2.0 from datalad.runner.nonasyncrunner import ThreadedRunner # protocols +# TODO REMOVE FOR V2.0 from datalad.runner import ( KillOutput, NoCapture, @@ -69,7 +72,9 @@ StdErrCapture, StdOutErrCapture, ) +# TODO REMOVE FOR V2.0 from datalad.runner.protocol import GeneratorMixIn +# TODO REMOVE FOR V2.0 from .protocols import ( NoCaptureGeneratorProtocol, StdOutCaptureGeneratorProtocol, @@ -81,13 +86,16 @@ from datalad_next.exceptions import CommandError # utilities +# TODO REMOVE FOR V2.0 from datalad.runner.nonasyncrunner import ( STDOUT_FILENO, STDERR_FILENO, ) +# TODO REMOVE FOR V2.0 from datalad.runner.utils import ( LineSplitter, ) +# TODO REMOVE FOR V2.0 from subprocess import ( DEVNULL, ) diff --git a/datalad_next/uis/__init__.py b/datalad_next/uis/__init__.py index a0ea9574..4c8c7565 100644 --- a/datalad_next/uis/__init__.py +++ b/datalad_next/uis/__init__.py @@ -1,6 +1,11 @@ """UI abstractions for user communication -This module imports all necessary components. +.. currentmodule:: datalad_next.uis +.. autosummary:: + :toctree: generated + + ansi_colors + ui_switcher """ # make more obvious that this is a frontend that behaves diff --git a/datalad_next/utils/__init__.py b/datalad_next/utils/__init__.py index c8a570d3..6729d0cd 100644 --- a/datalad_next/utils/__init__.py +++ b/datalad_next/utils/__init__.py @@ -1,31 +1,62 @@ -"""Assorted utility functions""" - -import logging -import os -from typing import ( - Any, - Dict, -) +"""Assorted utility functions + +.. currentmodule:: datalad_next.utils +.. autosummary:: + :toctree: generated + + DataladAuth + MultiHash + check_symlink_capability + chpwd + ensure_list + external_versions + log_progress + parse_www_authenticate + rmtree + get_specialremote_param_dict + get_specialremote_credential_properties + update_specialremote_credential + needs_specialremote_credential_envpatch + get_specialremote_credential_envpatch +""" from datalad.utils import ( + # TODO REMOVE FOR V2.0 Path, check_symlink_capability, chpwd, + # TODO REMOVE FOR V2.0 ensure_bool, ensure_list, + # TODO https://github.com/datalad/datalad-next/issues/626 get_dataset_root, + # TODO REMOVE FOR V2.0 + # only needed for `interface_utils` patch and better be imported + # in there getargspec, + # TODO REMOVE FOR V2.0 get_wrapped_class, + # TODO REMOVE FOR V2.0 knows_annex, + # TODO REMOVE FOR V2.0 + # in datalad_next.consts on_linux, + # TODO REMOVE FOR V2.0 + # in datalad_next.consts on_windows, + # TODO REMOVE FOR V2.0 rmtemp, rmtree, + # TODO REMOVE FOR V2.0 + # only a test utility and should move there swallow_outputs, ) +# TODO REMOVE FOR V2.0 +# internal helper of create_sibling_webdav from datalad.distribution.utils import _yield_ds_w_matching_siblings from datalad.support.external_versions import external_versions +# TODO REMOVE FOR V2.0 from datalad_next.credman import CredentialManager from .log import log_progress from .multihash import MultiHash @@ -33,180 +64,22 @@ DataladAuth, parse_www_authenticate, ) - - -lgr = logging.getLogger('datalad.utils') - - -def get_specialremote_param_dict(params): - """ - Parameters - ---------- - params : list - - Returns - ------- - dict - """ - return dict(p.split('=', maxsplit=1) for p in params) - - -def get_specialremote_credential_properties(params): - """Determine properties of credentials special remote configuration - - The input is a parameterization as it would be given to - `git annex initremote|enableremote ...`, or as stored in - `remote.log`. These parameters are inspected and a dictionary - of credential properties, suitable for `CredentialManager.query()` - is returned. This inspection may involve network activity, e.g. - HTTP requests. - - Parameters - ---------- - params : list or dict - Either a list of strings of the format 'param=value', or a dictionary - with parameter names as keys. - - Returns - ------- - dict or None - Credential property name-value mapping. This mapping can be passed to - `CredentialManager.query()`. If no credential properties could be - inferred, for example, because the special remote type is not recognized - `None` is returned. - """ - if isinstance(params, (list, tuple)): - params = get_specialremote_param_dict(params) - - props = {} - # no other way to do this specifically for each supported remote type - remote_type = params.get('type') - if remote_type == 'webdav': - from .http_helpers import get_auth_realm - from datalad_next.url_operations import HttpUrlOperations - url = params.get('url') - if not url: - return - url, urlprops = HttpUrlOperations().probe_url(url) - realm = get_auth_realm(url, urlprops.get('auth')) - if realm: - props['realm'] = realm - else: - return - - return props or None - - -def update_specialremote_credential( - srtype, credman, credname, credprops, credtype_hint=None, - duplicate_hint=None): - """ - Parameters - ---------- - srtype: str - credman: CredentialManager - credname: str or Name - credprops: dict - """ - if not credname: - # name could still be None, if this was entered - # create a default name, and check if it has not been used - credname = '{type}{udelim}{user}{delim}{realm}'.format( - type=srtype, - udelim='-' if 'user' in credprops else '', - user=credprops.get('user', ''), - delim='-' if 'realm' in credprops else '', - realm=credprops.get('realm', ''), - ) - if credman.get( - name=credname, - # give to make legacy credentials accessible - _type_hint=credtype_hint): - # this is already in use, do not override - lgr.warning( - 'The entered credential will not be stored, ' - 'a credential with the default name %r already exists.%s', - credname, f' {duplicate_hint}' if duplicate_hint else '') - return - # we have used a credential, store it with updated usage info - try: - credman.set(credname, _lastused=True, **credprops) - except Exception as e: - from datalad_next.exceptions import CapturedException - - # we do not want to crash for any failure to store a - # credential - lgr.warn( - 'Exception raised when storing credential %r %r: %s', - credname, credprops, CapturedException(e), - ) - - -# mapping for credential properties for specific special remote -# types. this is unpleasantly non-generic, but only a small -# subset of git-annex special remotes require credentials to be -# given via ENV vars, and all of rclone's handle it internally -specialremote_credential_envmap = dict( - # it makes no sense to pull a short-lived access token from - # a credential store, it can be given via AWS_SESSION_TOKEN - # in any case - glacier=dict( - user='AWS_ACCESS_KEY_ID', # nosec - secret='AWS_SECRET_ACCESS_KEY'), # nosec - s3=dict( - user='AWS_ACCESS_KEY_ID', # nosec - secret='AWS_SECRET_ACCESS_KEY'), # nosec - webdav=dict( - user='WEBDAV_USERNAME', # nosec - secret='WEBDAV_PASSWORD'), # nosec +from .specialremote import ( + get_specialremote_param_dict, + get_specialremote_credential_properties, + update_specialremote_credential, + specialremote_credential_envmap, + needs_specialremote_credential_envpatch, + get_specialremote_credential_envpatch, ) -def needs_specialremote_credential_envpatch(remote_type): - """Returns whether the environment needs to be patched with credentials - - Returns - ------- - bool - False, if the special remote type is not recognized as one needing - credentials, or if there are credentials already present. - True, otherwise. - """ - if remote_type not in specialremote_credential_envmap: - lgr.debug('Special remote type %r not supported for credential setup', - remote_type) - return False - - # retrieve deployment mapping - env_map = specialremote_credential_envmap[remote_type] - if all(k in os.environ for k in env_map.values()): - # the ENV is fully set up - # let's prefer the environment to behave like git-annex - lgr.debug( - 'Not deploying credentials for special remote type %r, ' - 'already present in environment', remote_type) - return False - - # no counterevidence - return True - - -def get_specialremote_credential_envpatch(remote_type, cred): - """Create an environment path for a particular remote type and credential - - Returns - ------- - dict or None - A dict with all required items to patch the environment, or None - if not enough information is available, or nothing needs to be patched. - """ - env_map = specialremote_credential_envmap.get(remote_type, {}) - return { - # take whatever partial setup the ENV has already - v: cred[k] - for k, v in env_map.items() - if v not in os.environ - } or None +# TODO REMOVE EVERYTHING BELOW FOR V2.0 +# https://github.com/datalad/datalad-next/issues/611 +from typing import ( + Any, + Dict, +) class ParamDictator: diff --git a/datalad_next/utils/specialremote.py b/datalad_next/utils/specialremote.py new file mode 100644 index 00000000..3336ce1c --- /dev/null +++ b/datalad_next/utils/specialremote.py @@ -0,0 +1,175 @@ +import logging +import os + +lgr = logging.getLogger('datalad.utils.specialremote') + + +def get_specialremote_param_dict(params): + """ + Parameters + ---------- + params : list + + Returns + ------- + dict + """ + return dict(p.split('=', maxsplit=1) for p in params) + + +def get_specialremote_credential_properties(params): + """Determine properties of credentials special remote configuration + + The input is a parameterization as it would be given to + `git annex initremote|enableremote ...`, or as stored in + `remote.log`. These parameters are inspected and a dictionary + of credential properties, suitable for `CredentialManager.query()` + is returned. This inspection may involve network activity, e.g. + HTTP requests. + + Parameters + ---------- + params : list or dict + Either a list of strings of the format 'param=value', or a dictionary + with parameter names as keys. + + Returns + ------- + dict or None + Credential property name-value mapping. This mapping can be passed to + `CredentialManager.query()`. If no credential properties could be + inferred, for example, because the special remote type is not recognized + `None` is returned. + """ + if isinstance(params, (list, tuple)): + params = get_specialremote_param_dict(params) + + props = {} + # no other way to do this specifically for each supported remote type + remote_type = params.get('type') + if remote_type == 'webdav': + from .http_helpers import get_auth_realm + from datalad_next.url_operations import HttpUrlOperations + url = params.get('url') + if not url: + return + url, urlprops = HttpUrlOperations().probe_url(url) + realm = get_auth_realm(url, urlprops.get('auth')) + if realm: + props['realm'] = realm + else: + return + + return props or None + + +def update_specialremote_credential( + srtype, credman, credname, credprops, credtype_hint=None, + duplicate_hint=None): + """ + Parameters + ---------- + srtype: str + credman: CredentialManager + credname: str or Name + credprops: dict + """ + if not credname: + # name could still be None, if this was entered + # create a default name, and check if it has not been used + credname = '{type}{udelim}{user}{delim}{realm}'.format( + type=srtype, + udelim='-' if 'user' in credprops else '', + user=credprops.get('user', ''), + delim='-' if 'realm' in credprops else '', + realm=credprops.get('realm', ''), + ) + if credman.get( + name=credname, + # give to make legacy credentials accessible + _type_hint=credtype_hint): + # this is already in use, do not override + lgr.warning( + 'The entered credential will not be stored, ' + 'a credential with the default name %r already exists.%s', + credname, f' {duplicate_hint}' if duplicate_hint else '') + return + # we have used a credential, store it with updated usage info + try: + credman.set(credname, _lastused=True, **credprops) + except Exception as e: + from datalad_next.exceptions import CapturedException + + # we do not want to crash for any failure to store a + # credential + lgr.warn( + 'Exception raised when storing credential %r %r: %s', + credname, credprops, CapturedException(e), + ) + + +# mapping for credential properties for specific special remote +# types. this is unpleasantly non-generic, but only a small +# subset of git-annex special remotes require credentials to be +# given via ENV vars, and all of rclone's handle it internally +specialremote_credential_envmap = dict( + # it makes no sense to pull a short-lived access token from + # a credential store, it can be given via AWS_SESSION_TOKEN + # in any case + glacier=dict( + user='AWS_ACCESS_KEY_ID', # nosec + secret='AWS_SECRET_ACCESS_KEY'), # nosec + s3=dict( + user='AWS_ACCESS_KEY_ID', # nosec + secret='AWS_SECRET_ACCESS_KEY'), # nosec + webdav=dict( + user='WEBDAV_USERNAME', # nosec + secret='WEBDAV_PASSWORD'), # nosec +) + + +def needs_specialremote_credential_envpatch(remote_type): + """Returns whether the environment needs to be patched with credentials + + Returns + ------- + bool + False, if the special remote type is not recognized as one needing + credentials, or if there are credentials already present. + True, otherwise. + """ + if remote_type not in specialremote_credential_envmap: + lgr.debug('Special remote type %r not supported for credential setup', + remote_type) + return False + + # retrieve deployment mapping + env_map = specialremote_credential_envmap[remote_type] + if all(k in os.environ for k in env_map.values()): + # the ENV is fully set up + # let's prefer the environment to behave like git-annex + lgr.debug( + 'Not deploying credentials for special remote type %r, ' + 'already present in environment', remote_type) + return False + + # no counterevidence + return True + + +def get_specialremote_credential_envpatch(remote_type, cred): + """Create an environment path for a particular remote type and credential + + Returns + ------- + dict or None + A dict with all required items to patch the environment, or None + if not enough information is available, or nothing needs to be patched. + """ + env_map = specialremote_credential_envmap.get(remote_type, {}) + return { + # take whatever partial setup the ENV has already + v: cred[k] + for k, v in env_map.items() + if v not in os.environ + } or None diff --git a/docs/source/conf.py b/docs/source/conf.py index aa0645d3..e4b0ea7c 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -26,12 +26,6 @@ import datalad_next -# this cheats sphinx into thinking that LeanGit repo is not -# merely imported, and convinces it to document it -import datalad_next.datasets as dnd -dnd.LeanGitRepo.__module__ = dnd.__name__ -dnd.LeanGitRepo.__name__ = 'LeanGitRepo' - # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. diff --git a/docs/source/index.rst b/docs/source/index.rst index fea42812..435f2bd6 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -30,8 +30,13 @@ Doing so will enable the extension to also alter the behavior the core DataLad package and its commands. -Provided functionality -====================== +Functionality provided by DataLad NEXT +====================================== + +The following table of contents offers entry points to the main components +provided by this extension. The `project README +`__ +offers a more detailed summary in a different format. .. toctree:: :maxdepth: 1 @@ -45,6 +50,48 @@ Provided functionality patches.rst +Developing with DataLad NEXT +============================ + +This extension package moves fast in comparison to the DataLad core package. +Nevertheless, attention is paid to API stability, adequate semantic versioning, +and informative changelogs. + +Besides the DataLad commands shipped with this extension package, a number of +Python utilities are provided that facilitate the implementation of workflows +and additional functionality. An overview is available in the +:ref:`reference manual `. + +Public vs internal Python API +----------------------------- + +Anything that can be imported directly from any of the top-level sub-packages in +`datalad_next` is considered to be part of the public API. Changes to this API +determine the versioning, and development is done with the aim to keep this API +as stable as possible. This includes signatures and return value behavior. + +As an example:: + + from datalad_next.runners import iter_git_subproc + +imports a part of the public API, but:: + + from datalad_next.runners.git import iter_git_subproc + +does not. + +Use of the internal API +----------------------- + +Developers can obviously use parts of the non-public API. However, this should +only be done with the understanding that these components may change from one +release to another, with no guarantee of transition periods, deprecation +warnings, etc. + +Developers are advised to never reuse any components with names starting with +`_` (underscore). Their use should be limited to their individual sub-package. + + Contributor information =======================