From 636ddb9210b82a7100900204ea70d0840cc71196 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Sun, 4 Sep 2022 23:54:39 +0100 Subject: [PATCH 01/72] Support accessing remote registries via ssh The URL has to be given as "ssh://[user@]host.xz[:port]/path/to/repo.git" rather than the shorter version "[user@]host.xz:path/to/repo.git" --- shpc/main/registry/provider.py | 10 ++++++++-- shpc/main/registry/remote.py | 6 +++++- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/shpc/main/registry/provider.py b/shpc/main/registry/provider.py index c396c99bb..f138730ae 100644 --- a/shpc/main/registry/provider.py +++ b/shpc/main/registry/provider.py @@ -33,7 +33,11 @@ class Provider: """ def __init__(self, source, *args, **kwargs): - if not (source.startswith("https://") or os.path.exists(source)): + if not ( + source.startswith("https://") + or source.startswith("ssh://") + or os.path.exists(source) + ): raise ValueError( "Registry source must exist on the filesystem or be given as https://." ) @@ -44,7 +48,9 @@ def exists(self, name): @property def is_filesystem_registry(self): - return not self.source.startswith("http") and os.path.exists(self.source) + return not ( + self.source.startswith("http") or self.source.startswith("ssh") + ) and os.path.exists(self.source) @property def name(self): diff --git a/shpc/main/registry/remote.py b/shpc/main/registry/remote.py index 6053a079d..c99801680 100644 --- a/shpc/main/registry/remote.py +++ b/shpc/main/registry/remote.py @@ -104,7 +104,9 @@ def __init__(self, *args, **kwargs): @classmethod def matches(cls, source): - return cls.provider_name in source and source.startswith("http") + return cls.provider_name in source and ( + source.startswith("http") or source.startswith("ssh") + ) @property def source_url(self): @@ -190,6 +192,8 @@ def _update_cache(self, force=False): if self._cache and not force: return + if self.source.startswith("ssh"): + return self._update_clone_cache() # Check for exposed library API on GitHub or GitLab pages response = requests.get(self.web_url) if response.status_code != 200: From 5ca9a194f97ee9e67443afed0fe82850d2781772 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Mon, 5 Sep 2022 18:31:48 +0100 Subject: [PATCH 02/72] Factored out a function that tells whether a registry path is local --- shpc/main/registry/__init__.py | 4 ++-- shpc/main/registry/provider.py | 6 +++--- shpc/main/registry/remote.py | 6 ++++++ shpc/main/settings.py | 12 ++++++------ 4 files changed, 17 insertions(+), 11 deletions(-) diff --git a/shpc/main/registry/__init__.py b/shpc/main/registry/__init__.py index f98185c68..d7dd1e969 100644 --- a/shpc/main/registry/__init__.py +++ b/shpc/main/registry/__init__.py @@ -14,7 +14,7 @@ from shpc.main.settings import SettingsBase from .filesystem import Filesystem, FilesystemResult -from .remote import GitHub, GitLab, get_module_config_url +from .remote import GitHub, GitLab, get_module_config_url, is_path_local def update_container_module(module, from_path, existing_path): @@ -174,7 +174,7 @@ def sync_from_remote( local = Filesystem(self.settings.filesystem_registry) tmpdir = remote.source - if tmpdir.startswith("http") or not os.path.exists(tmpdir): + if not is_path_local(tmpdir): tmpdir = remote.clone() # These are modules to update diff --git a/shpc/main/registry/provider.py b/shpc/main/registry/provider.py index f138730ae..11bcbed5f 100644 --- a/shpc/main/registry/provider.py +++ b/shpc/main/registry/provider.py @@ -5,6 +5,8 @@ import os +from .remote import is_path_local + class Result: @property @@ -48,9 +50,7 @@ def exists(self, name): @property def is_filesystem_registry(self): - return not ( - self.source.startswith("http") or self.source.startswith("ssh") - ) and os.path.exists(self.source) + return is_path_local(self.source) @property def name(self): diff --git a/shpc/main/registry/remote.py b/shpc/main/registry/remote.py index c99801680..01e61f658 100644 --- a/shpc/main/registry/remote.py +++ b/shpc/main/registry/remote.py @@ -16,6 +16,12 @@ from .provider import Provider, Result +def is_path_local(path: str): + return not (path.startswith("http") or path.startswith("ssh")) and os.path.exists( + path + ) + + def get_module_config_url(registry, module_name, branch="main"): """ Get the raw address of the config (container.yaml) diff --git a/shpc/main/settings.py b/shpc/main/settings.py index b7d6ee401..1ea820ad2 100644 --- a/shpc/main/settings.py +++ b/shpc/main/settings.py @@ -5,6 +5,7 @@ import shutil +import shpc.main.registry import shpc.defaults as defaults import shpc.main.schemas import shpc.utils as utils @@ -247,9 +248,8 @@ def filesystem_registry(self): Return the first found filesystem registry """ for path in self.registry: - if path.startswith("http") or not os.path.exists(path): - continue - return path + if shpc.main.registry.is_path_local(path): + return path def ensure_filesystem_registry(self): """ @@ -257,9 +257,9 @@ def ensure_filesystem_registry(self): """ found = False for path in self.registry: - if path.startswith("http") or not os.path.exists(path): - continue - found = True + if shpc.main.registry.is_path_local(path): + found = True + break # Cut out early if registry isn't on the filesystem if not found: From 4cdc2069c13c5a97b867bdf624ff4e24c06420b2 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Mon, 5 Sep 2022 18:44:27 +0100 Subject: [PATCH 03/72] Make sure the URL is used, not self.source which could be a local path --- shpc/main/registry/remote.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/shpc/main/registry/remote.py b/shpc/main/registry/remote.py index 01e61f658..36dfff6ea 100644 --- a/shpc/main/registry/remote.py +++ b/shpc/main/registry/remote.py @@ -106,7 +106,7 @@ def __init__(self, *args, **kwargs): # E.g., subdirectory with registry files self.subdir = kwargs.get("subdir") super().__init__(*args, **kwargs) - self._url = self.source + self.url = self.source @classmethod def matches(cls, source): @@ -119,7 +119,7 @@ def source_url(self): """ Retrieve a parsed / formatted url, ensuring https and without git. """ - url = self.source + url = self.url if not url.startswith("http"): url = "https://%s" % url if url.endswith(".git"): @@ -159,7 +159,7 @@ def clone(self, tmpdir=None): cmd = ["git", "clone", "--depth", "1"] if self.tag: cmd += ["-b", self.tag] - cmd += [self._url, tmpdir] + cmd += [self.url, tmpdir] self.source = tmpdir try: sp.run(cmd, check=True) @@ -198,7 +198,7 @@ def _update_cache(self, force=False): if self._cache and not force: return - if self.source.startswith("ssh"): + if self.url.startswith("ssh"): return self._update_clone_cache() # Check for exposed library API on GitHub or GitLab pages response = requests.get(self.web_url) @@ -212,12 +212,12 @@ def _update_clone_cache(self): """ logger.warning( "Remote %s is not deploying a Registry API, falling back to clone." - % self.source + % self.url ) tmpdir = self.clone() for dirname, module in self.iter_modules(): # Minimum amount of metadata to function here - config_url = get_module_config_url(self.source, module) + config_url = get_module_config_url(self.url, module) self._cache[module] = { "config": shpc.utils.read_yaml( os.path.join(dirname, module, "container.yaml") From d5569d57973b8dc01af913866bdbf2bf3b6083db Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Mon, 5 Sep 2022 18:47:03 +0100 Subject: [PATCH 04/72] Make sure the URL is used, not self.source which could be a local path --- shpc/main/registry/remote.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/shpc/main/registry/remote.py b/shpc/main/registry/remote.py index 36dfff6ea..239de73d1 100644 --- a/shpc/main/registry/remote.py +++ b/shpc/main/registry/remote.py @@ -164,7 +164,7 @@ def clone(self, tmpdir=None): try: sp.run(cmd, check=True) except sp.CalledProcessError as e: - raise ValueError("Failed to clone repository {}:\n{}", self.source, e) + raise ValueError("Failed to clone repository {}:\n{}", self.url, e) return tmpdir def iter_modules(self): From 1e755a3294dac3931002d6260af3a7268e7ef6cb Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Mon, 5 Sep 2022 18:47:27 +0100 Subject: [PATCH 05/72] Make self.source include the subdir from the start. Allows implementing iter_modules in the base class --- shpc/main/registry/filesystem.py | 7 ------- shpc/main/registry/provider.py | 10 +++++++++- shpc/main/registry/remote.py | 19 ++----------------- 3 files changed, 11 insertions(+), 25 deletions(-) diff --git a/shpc/main/registry/filesystem.py b/shpc/main/registry/filesystem.py index 5c7b6f6e3..9dee71b56 100644 --- a/shpc/main/registry/filesystem.py +++ b/shpc/main/registry/filesystem.py @@ -83,13 +83,6 @@ def __init__(self, *args, **kwargs): def matches(cls, source): return os.path.exists(source) or source == "." - def iter_modules(self): - for filename in shpc.utils.recursive_find(self.source, "container.yaml"): - module = os.path.dirname(filename).replace(self.source, "").strip(os.sep) - if not module: - continue - yield self.source, module - def find(self, name): """ Find and load a container.yaml from the filesystem. diff --git a/shpc/main/registry/provider.py b/shpc/main/registry/provider.py index 11bcbed5f..36209572b 100644 --- a/shpc/main/registry/provider.py +++ b/shpc/main/registry/provider.py @@ -70,4 +70,12 @@ def iter_registry(self): pass def iter_modules(self): - pass + """ + yield module names + """ + # Find modules based on container.yaml + for filename in shpc.utils.recursive_find(self.source, "container.yaml"): + module = os.path.dirname(filename).replace(self.source, "").strip(os.sep) + if not module: + continue + yield self.source, module diff --git a/shpc/main/registry/remote.py b/shpc/main/registry/remote.py index 239de73d1..e291730a1 100644 --- a/shpc/main/registry/remote.py +++ b/shpc/main/registry/remote.py @@ -146,8 +146,6 @@ def exists(self, name): if self._cache and name in self._cache: return True dirname = self.source - if self.subdir: - dirname = os.path.join(dirname, self.subdir) return os.path.exists(os.path.join(dirname, name)) def clone(self, tmpdir=None): @@ -161,27 +159,14 @@ def clone(self, tmpdir=None): cmd += ["-b", self.tag] cmd += [self.url, tmpdir] self.source = tmpdir + if self.subdir: + self.source = os.path.join(tmpdir, self.subdir) try: sp.run(cmd, check=True) except sp.CalledProcessError as e: raise ValueError("Failed to clone repository {}:\n{}", self.url, e) return tmpdir - def iter_modules(self): - """ - yield module names - """ - dirname = self.source - if self.subdir: - dirname = os.path.join(dirname, self.subdir) - - # Find modules based on container.yaml - for filename in shpc.utils.recursive_find(dirname, "container.yaml"): - module = os.path.dirname(filename).replace(dirname, "").strip(os.sep) - if not module: - continue - yield dirname, module - def find(self, name): """ Find a particular entry in a registry From 724eee8617c2435422617a75ca1d4194b97269b5 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Mon, 5 Sep 2022 19:20:58 +0100 Subject: [PATCH 06/72] bugfix: this should be the local registry --- shpc/main/registry/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/shpc/main/registry/__init__.py b/shpc/main/registry/__init__.py index d7dd1e969..876819cf9 100644 --- a/shpc/main/registry/__init__.py +++ b/shpc/main/registry/__init__.py @@ -140,7 +140,7 @@ def _sync( if not local.is_filesystem_registry: logger.exit( "sync is only supported for a remote to a filesystem registry: %s" - % sync_registry.source + % local.source ) # Upgrade the current registry from the remote From bef33d6be2835c3285ea6700c56b424db78ef3ec Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Mon, 5 Sep 2022 20:50:19 +0100 Subject: [PATCH 07/72] Made VersionControl generic enough to deal by itself with GitHub and GitLab Also supports all transport protocols Git understands. --- shpc/main/registry/__init__.py | 16 +++++----- shpc/main/registry/provider.py | 8 ++--- shpc/main/registry/remote.py | 54 +++++++++++++--------------------- shpc/tests/helpers.py | 4 +-- 4 files changed, 32 insertions(+), 50 deletions(-) diff --git a/shpc/main/registry/__init__.py b/shpc/main/registry/__init__.py index 876819cf9..271b9b4aa 100644 --- a/shpc/main/registry/__init__.py +++ b/shpc/main/registry/__init__.py @@ -14,7 +14,7 @@ from shpc.main.settings import SettingsBase from .filesystem import Filesystem, FilesystemResult -from .remote import GitHub, GitLab, get_module_config_url, is_path_local +from .remote import VersionControl, get_module_config_url, is_path_local def update_container_module(module, from_path, existing_path): @@ -84,7 +84,7 @@ def iter_modules(self): for registry, module in reg.iter_modules(): yield registry, module - def get_registry(self, source): + def get_registry(self, source, **kwargs): """ A registry is a local or remote registry. @@ -92,7 +92,7 @@ def get_registry(self, source): """ for Registry in PROVIDERS: if Registry.matches(source): - return Registry(source) + return Registry(source, **kwargs) raise ValueError("No matching registry provider for %s" % source) def sync( @@ -128,12 +128,10 @@ def _sync( local=None, sync_registry=None, ): - # Registry to sync from - sync_registry = sync_registry or self.settings.sync_registry - # Create a remote registry with settings preference - Remote = GitHub if "github.com" in sync_registry else GitLab - remote = Remote(sync_registry, tag=tag) + remote = self.get_registry( + sync_registry or self.settings.sync_registry, tag=tag + ) local = self.get_registry(local or self.settings.filesystem_registry) # We sync to our first registry - if not filesystem, no go @@ -206,4 +204,4 @@ def sync_from_remote( # We only currently allow Filesystem registries to be used in settings -PROVIDERS = [GitHub, Filesystem, GitLab] +PROVIDERS = [Filesystem, VersionControl] diff --git a/shpc/main/registry/provider.py b/shpc/main/registry/provider.py index 36209572b..1af52eb3f 100644 --- a/shpc/main/registry/provider.py +++ b/shpc/main/registry/provider.py @@ -35,13 +35,9 @@ class Provider: """ def __init__(self, source, *args, **kwargs): - if not ( - source.startswith("https://") - or source.startswith("ssh://") - or os.path.exists(source) - ): + if not (("://" in source) or os.path.exists(source)): raise ValueError( - "Registry source must exist on the filesystem or be given as https://." + "Registry source must exist on the filesystem or contain '://' (remote path)." ) self.source = source diff --git a/shpc/main/registry/remote.py b/shpc/main/registry/remote.py index e291730a1..5d5516da9 100644 --- a/shpc/main/registry/remote.py +++ b/shpc/main/registry/remote.py @@ -17,9 +17,7 @@ def is_path_local(path: str): - return not (path.startswith("http") or path.startswith("ssh")) and os.path.exists( - path - ) + return ("://" not in path) and os.path.exists(path) def get_module_config_url(registry, module_name, branch="main"): @@ -97,6 +95,13 @@ def override_exists(self, tag): class VersionControl(Provider): + + # The URL is substituted with user, repo + library_url_schemes = { + "github.com": "https://%s.github.io/%s/library.json", + "gitlab.com": "https://%s.gitlab.io/%s/library.json", + } + def __init__(self, *args, **kwargs): self.tag = kwargs.get("tag") @@ -110,33 +115,23 @@ def __init__(self, *args, **kwargs): @classmethod def matches(cls, source): - return cls.provider_name in source and ( - source.startswith("http") or source.startswith("ssh") - ) + return "://" in source @property - def source_url(self): + def library_url(self): """ - Retrieve a parsed / formatted url, ensuring https and without git. + Retrieve the web url, either pages or (eventually) custom. """ url = self.url - if not url.startswith("http"): - url = "https://%s" % url if url.endswith(".git"): url = url[:-4] - return url - - @property - def web_url(self): - """ - Retrieve the web url, either pages or (eventually) custom. - """ - parts = self.source_url.split("/")[3:] - return "https://%s.%s.io/%s/library.json" % ( - parts[0], - self.provider_name, - "/".join(parts[1:]), - ) + parts = url.split("/") + domain = parts[2] + if domain in self.library_url_schemes: + return self.library_url_schemes[domain] % ( + parts[3], + "/".join(parts[4:]), + ) def exists(self, name): """ @@ -183,10 +178,11 @@ def _update_cache(self, force=False): if self._cache and not force: return - if self.url.startswith("ssh"): + library_url = self.library_url + if library_url is None: return self._update_clone_cache() # Check for exposed library API on GitHub or GitLab pages - response = requests.get(self.web_url) + response = requests.get(library_url) if response.status_code != 200: return self._update_clone_cache() self._cache = response.json() @@ -225,11 +221,3 @@ def iter_registry(self, filter_string=None): continue # Assemble a faux config with tags so we don't hit remote yield RemoteResult(uri, entry, load=False, config=entry["config"]) - - -class GitHub(VersionControl): - provider_name = "github" - - -class GitLab(VersionControl): - provider_name = "gitlab" diff --git a/shpc/tests/helpers.py b/shpc/tests/helpers.py index 775bbdf70..a0372ff34 100644 --- a/shpc/tests/helpers.py +++ b/shpc/tests/helpers.py @@ -10,7 +10,7 @@ import shutil from shpc.main import get_client -from shpc.main.registry import GitHub +from shpc.main.registry import VersionControl here = os.path.dirname(os.path.abspath(__file__)) root = os.path.dirname(here) @@ -41,7 +41,7 @@ def init_client(tmpdir, module_sys, container_tech, remote=True): # If it's not remote, create a temporary filesystem registry if not remote: clone_dir = os.path.join(tmpdir, "registry") - reg = GitHub("https://github.com/singularityhub/shpc-registry") + reg = VersionControl("https://github.com/singularityhub/shpc-registry") reg.clone(clone_dir) client.settings.registry = [clone_dir] client.reload_registry() From ede973fbc38ec1250bd79074a2eb2a400ccf6f68 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Mon, 5 Sep 2022 20:51:09 +0100 Subject: [PATCH 08/72] Not needed ? --- shpc/main/registry/provider.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/shpc/main/registry/provider.py b/shpc/main/registry/provider.py index 1af52eb3f..8ef76890d 100644 --- a/shpc/main/registry/provider.py +++ b/shpc/main/registry/provider.py @@ -48,10 +48,6 @@ def exists(self, name): def is_filesystem_registry(self): return is_path_local(self.source) - @property - def name(self): - return self.__class__.__name__.lower() - @classmethod def matches(cls, source_url: str): pass From 5d7087fe51d15e35c3b50c544eecd3b049fb1e79 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Mon, 5 Sep 2022 20:53:54 +0100 Subject: [PATCH 09/72] Factored out and parametrised the code that builds the URL of the container.yaml --- shpc/main/modules/base.py | 6 ++--- shpc/main/registry/__init__.py | 2 +- shpc/main/registry/remote.py | 46 +++++++++++++++++++++------------- 3 files changed, 31 insertions(+), 23 deletions(-) diff --git a/shpc/main/modules/base.py b/shpc/main/modules/base.py index 435a14c02..a467d8592 100644 --- a/shpc/main/modules/base.py +++ b/shpc/main/modules/base.py @@ -235,10 +235,8 @@ def docgen(self, module_name, registry=None, out=None, branch="main"): aliases = config.get_aliases() template = self.template.load("docs.md") registry = registry or defaults.github_url - github_url = "%s/blob/%s/%s/container.yaml" % (registry, branch, module_name) - raw_github_url = shpc.main.registry.get_module_config_url( - registry, module_name, branch - ) + git_registry = shpc.main.registry.VersionControl(registry, tag=branch) + (raw_github_url, github_url) = git_registry.get_module_config_url(module_name) # Currently one doc is rendered for all containers result = template.render( diff --git a/shpc/main/registry/__init__.py b/shpc/main/registry/__init__.py index 271b9b4aa..924e4bfd5 100644 --- a/shpc/main/registry/__init__.py +++ b/shpc/main/registry/__init__.py @@ -14,7 +14,7 @@ from shpc.main.settings import SettingsBase from .filesystem import Filesystem, FilesystemResult -from .remote import VersionControl, get_module_config_url, is_path_local +from .remote import VersionControl, is_path_local def update_container_module(module, from_path, existing_path): diff --git a/shpc/main/registry/remote.py b/shpc/main/registry/remote.py index 5d5516da9..60de7cf8c 100644 --- a/shpc/main/registry/remote.py +++ b/shpc/main/registry/remote.py @@ -20,23 +20,6 @@ def is_path_local(path: str): return ("://" not in path) and os.path.exists(path) -def get_module_config_url(registry, module_name, branch="main"): - """ - Get the raw address of the config (container.yaml) - """ - registry_bare = registry.split(".com")[-1] - raw = ( - "https://gitlab.com/%s/-/raw/%s/%s/container.yaml" - if "gitlab" in registry - else "https://raw.githubusercontent.com/%s/%s/%s/container.yaml" - ) - return raw % ( - registry_bare, - branch, - module_name, - ) - - class RemoteResult(Result): """ A remote result provides courtesy functions for interacting with @@ -102,6 +85,18 @@ class VersionControl(Provider): "gitlab.com": "https://%s.gitlab.io/%s/library.json", } + # The URL is substituted with repo, branch, module_name + web_container_url_schemes = { + "github.com": "https://github.com/%s/blob/%s/%s/container.yaml", + "gitlab.com": "https://gitlab.com/%s/-/blob/%s/%s/container.yaml", + } + + # The URL is substituted with repo, branch, module_name + raw_container_url_schemes = { + "github.com": "https://raw.githubusercontent.com/%s/%s/%s/container.yaml", + "gitlab.com": "https://gitlab.com/%s/-/raw/%s/%s/container.yaml", + } + def __init__(self, *args, **kwargs): self.tag = kwargs.get("tag") @@ -198,7 +193,7 @@ def _update_clone_cache(self): tmpdir = self.clone() for dirname, module in self.iter_modules(): # Minimum amount of metadata to function here - config_url = get_module_config_url(self.url, module) + config_url = self.get_module_config_url(module)[0] self._cache[module] = { "config": shpc.utils.read_yaml( os.path.join(dirname, module, "container.yaml") @@ -207,6 +202,21 @@ def _update_clone_cache(self): } shutil.rmtree(tmpdir) + def get_module_config_url(self, module_name): + """ + Get the raw address of the config (container.yaml) + """ + parts = self.url.split("/") + domain = parts[2] + if domain in self.raw_container_url_schemes: + t = ("/".join(parts[3:]), self.tag, module_name) + return ( + self.raw_container_url_schemes[domain] % t, + self.web_container_url_schemes[domain] % t, + ) + else: + return (None, None) + def iter_registry(self, filter_string=None): """ Yield metadata about containers in a remote registry. From 1cadf597504ce3c6c6071b5aad191a620836555a Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Mon, 5 Sep 2022 20:59:15 +0100 Subject: [PATCH 10/72] The check already happens in _update_cache() --- shpc/main/registry/remote.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/shpc/main/registry/remote.py b/shpc/main/registry/remote.py index 60de7cf8c..bed35e9d1 100644 --- a/shpc/main/registry/remote.py +++ b/shpc/main/registry/remote.py @@ -161,8 +161,7 @@ def find(self, name): """ Find a particular entry in a registry """ - if not self._cache: - self._update_cache() + self._update_cache() if name in self._cache: return RemoteResult(name, self._cache[name]) From db2a14f66630171e70abbdaefe7565ed38a42795 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Tue, 6 Sep 2022 21:29:18 +0100 Subject: [PATCH 11/72] Moved is_path_local to shpc.utils --- shpc/main/registry/__init__.py | 4 ++-- shpc/main/registry/provider.py | 4 ++-- shpc/main/registry/remote.py | 4 ---- shpc/main/settings.py | 5 ++--- shpc/utils/__init__.py | 1 + shpc/utils/fileio.py | 4 ++++ 6 files changed, 11 insertions(+), 11 deletions(-) diff --git a/shpc/main/registry/__init__.py b/shpc/main/registry/__init__.py index 924e4bfd5..ce353663f 100644 --- a/shpc/main/registry/__init__.py +++ b/shpc/main/registry/__init__.py @@ -14,7 +14,7 @@ from shpc.main.settings import SettingsBase from .filesystem import Filesystem, FilesystemResult -from .remote import VersionControl, is_path_local +from .remote import VersionControl def update_container_module(module, from_path, existing_path): @@ -172,7 +172,7 @@ def sync_from_remote( local = Filesystem(self.settings.filesystem_registry) tmpdir = remote.source - if not is_path_local(tmpdir): + if not shpc.utils.is_path_local(tmpdir): tmpdir = remote.clone() # These are modules to update diff --git a/shpc/main/registry/provider.py b/shpc/main/registry/provider.py index 8ef76890d..ab2a9a653 100644 --- a/shpc/main/registry/provider.py +++ b/shpc/main/registry/provider.py @@ -5,7 +5,7 @@ import os -from .remote import is_path_local +import shpc.utils class Result: @@ -46,7 +46,7 @@ def exists(self, name): @property def is_filesystem_registry(self): - return is_path_local(self.source) + return shpc.utils.is_path_local(self.source) @classmethod def matches(cls, source_url: str): diff --git a/shpc/main/registry/remote.py b/shpc/main/registry/remote.py index bed35e9d1..32e4678e5 100644 --- a/shpc/main/registry/remote.py +++ b/shpc/main/registry/remote.py @@ -16,10 +16,6 @@ from .provider import Provider, Result -def is_path_local(path: str): - return ("://" not in path) and os.path.exists(path) - - class RemoteResult(Result): """ A remote result provides courtesy functions for interacting with diff --git a/shpc/main/settings.py b/shpc/main/settings.py index 1ea820ad2..1d6dbbac0 100644 --- a/shpc/main/settings.py +++ b/shpc/main/settings.py @@ -5,7 +5,6 @@ import shutil -import shpc.main.registry import shpc.defaults as defaults import shpc.main.schemas import shpc.utils as utils @@ -248,7 +247,7 @@ def filesystem_registry(self): Return the first found filesystem registry """ for path in self.registry: - if shpc.main.registry.is_path_local(path): + if utils.is_path_local(path): return path def ensure_filesystem_registry(self): @@ -257,7 +256,7 @@ def ensure_filesystem_registry(self): """ found = False for path in self.registry: - if shpc.main.registry.is_path_local(path): + if utils.is_path_local(path): found = True break diff --git a/shpc/utils/__init__.py b/shpc/utils/__init__.py index 1ee938a98..43db0d172 100644 --- a/shpc/utils/__init__.py +++ b/shpc/utils/__init__.py @@ -5,6 +5,7 @@ get_file_hash, get_tmpdir, get_tmpfile, + is_path_local, mkdir_p, mkdirp, print_json, diff --git a/shpc/utils/fileio.py b/shpc/utils/fileio.py index a1c4d23dd..25a272ccd 100644 --- a/shpc/utils/fileio.py +++ b/shpc/utils/fileio.py @@ -19,6 +19,10 @@ from ruamel.yaml import YAML +def is_path_local(path: str): + return ("://" not in path) and os.path.exists(path) + + def can_be_deleted(path, ignore_files=None): """ A path can be deleted if it contains no entries, *or* From 8c1ad754ae3c9d3ae8f9cf08c032b52e8cb6a765 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Tue, 6 Sep 2022 22:47:00 +0100 Subject: [PATCH 12/72] Added a safeguard to prevent cloning multiple times --- shpc/main/registry/remote.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/shpc/main/registry/remote.py b/shpc/main/registry/remote.py index 32e4678e5..859390ef3 100644 --- a/shpc/main/registry/remote.py +++ b/shpc/main/registry/remote.py @@ -94,6 +94,8 @@ class VersionControl(Provider): } def __init__(self, *args, **kwargs): + self.is_cloned = False + self.tag = kwargs.get("tag") # Cache of remote container metadata @@ -138,6 +140,8 @@ def clone(self, tmpdir=None): """ Clone the known source URL to a temporary directory """ + if self.is_cloned: + return tmpdir = tmpdir or shpc.utils.get_tmpdir() cmd = ["git", "clone", "--depth", "1"] @@ -151,6 +155,7 @@ def clone(self, tmpdir=None): sp.run(cmd, check=True) except sp.CalledProcessError as e: raise ValueError("Failed to clone repository {}:\n{}", self.url, e) + self.is_cloned = True return tmpdir def find(self, name): From 06fd1a608ca35f57c01c9c8ad9f5cce059ab3df3 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Tue, 6 Sep 2022 22:47:44 +0100 Subject: [PATCH 13/72] clone() is actually only supported by VersionControl --- shpc/main/registry/__init__.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/shpc/main/registry/__init__.py b/shpc/main/registry/__init__.py index ce353663f..d6cdd8712 100644 --- a/shpc/main/registry/__init__.py +++ b/shpc/main/registry/__init__.py @@ -171,9 +171,8 @@ def sync_from_remote( if not local: local = Filesystem(self.settings.filesystem_registry) - tmpdir = remote.source - if not shpc.utils.is_path_local(tmpdir): - tmpdir = remote.clone() + if isinstance(remote, VersionControl) and not remote.is_cloned: + remote.clone() # These are modules to update for regpath, module in remote.iter_modules(): From 3d9cae26f4ddf1cf1cb053a68b32cc7cd0ad9ca6 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Tue, 6 Sep 2022 22:49:46 +0100 Subject: [PATCH 14/72] The main Registry object, not the settings, should decide whether there is a local registry or not --- shpc/main/modules/base.py | 9 +++++++-- shpc/main/registry/__init__.py | 13 +++++++++++-- shpc/main/settings.py | 25 ------------------------- shpc/tests/test_sync.py | 10 +++++----- 4 files changed, 23 insertions(+), 34 deletions(-) diff --git a/shpc/main/modules/base.py b/shpc/main/modules/base.py index a467d8592..0b0876120 100644 --- a/shpc/main/modules/base.py +++ b/shpc/main/modules/base.py @@ -172,7 +172,12 @@ def add(self, image, module_name=None, **kwargs): """ Add a container to the registry to enable install. """ - self.settings.ensure_filesystem_registry() + local_registry = self.registry.filesystem_registry + + if not local_registry: + logger.exit( + "This command is only supported for a filesystem registry! Add one or use --registry." + ) # Docker module name is always the same namespace as the image if image.startswith("docker"): @@ -185,7 +190,7 @@ def add(self, image, module_name=None, **kwargs): # Assume adding to default registry dest = os.path.join( - self.settings.filesystem_registry, + local_registry.source, module_name.split(":")[0], "container.yaml", ) diff --git a/shpc/main/registry/__init__.py b/shpc/main/registry/__init__.py index d6cdd8712..b85e32e8a 100644 --- a/shpc/main/registry/__init__.py +++ b/shpc/main/registry/__init__.py @@ -44,6 +44,15 @@ def __init__(self, settings=None): # and they must exist. self.registries = [self.get_registry(r) for r in self.settings.registry] + @property + def filesystem_registry(self): + """ + Return the first found filesystem registry. + """ + for registry in self.registries: + if isinstance(registry, Filesystem): + return registry + def exists(self, name): """ Determine if a module name *exists* in any local registry, return path @@ -132,7 +141,7 @@ def _sync( remote = self.get_registry( sync_registry or self.settings.sync_registry, tag=tag ) - local = self.get_registry(local or self.settings.filesystem_registry) + local = self.get_registry(local) if local else self.filesystem_registry # We sync to our first registry - if not filesystem, no go if not local.is_filesystem_registry: @@ -169,7 +178,7 @@ def sync_from_remote( # No local registry provided, use default if not local: - local = Filesystem(self.settings.filesystem_registry) + local = self.filesystem_registry if isinstance(remote, VersionControl) and not remote.is_cloned: remote.clone() diff --git a/shpc/main/settings.py b/shpc/main/settings.py index 1d6dbbac0..892024866 100644 --- a/shpc/main/settings.py +++ b/shpc/main/settings.py @@ -241,31 +241,6 @@ def change_validate(self, key, value): "%s:%s cannot be added to config: %s" % (key, value, error.message) ) - @property - def filesystem_registry(self): - """ - Return the first found filesystem registry - """ - for path in self.registry: - if utils.is_path_local(path): - return path - - def ensure_filesystem_registry(self): - """ - Ensure that the settings has a filesystem registry. - """ - found = False - for path in self.registry: - if utils.is_path_local(path): - found = True - break - - # Cut out early if registry isn't on the filesystem - if not found: - logger.exit( - "This command is only supported for a filesystem registry! Add one or use --registry." - ) - def _substitutions(self, value): """ Given a value, make substitutions diff --git a/shpc/tests/test_sync.py b/shpc/tests/test_sync.py index 7008a8e26..ed387403e 100644 --- a/shpc/tests/test_sync.py +++ b/shpc/tests/test_sync.py @@ -33,12 +33,12 @@ def test_filesystem_upgrade(tmp_path): os.makedirs(registry_path) client.reload_registry() - assert client.settings.filesystem_registry == registry_path + local = client.registry.filesystem_registry + assert local + assert isinstance(local, registry.Filesystem) + assert local.source == registry_path - # Test interacting with local filesystem registry - local = registry.Filesystem(client.settings.filesystem_registry) - - # It should be empty + # Local filesystem registry should be empty assert not list(local.iter_modules()) # Create filesystem registry with test data From 54d45ad906f6af72b37067020d1bed39a4993e46 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Tue, 6 Sep 2022 22:51:07 +0100 Subject: [PATCH 15/72] To avoid duplicating the code that assesses whether a path or local or not, check which sub-class of Provider is used --- shpc/main/registry/__init__.py | 2 +- shpc/main/registry/provider.py | 4 ---- shpc/tests/test_sync.py | 2 +- shpc/utils/__init__.py | 1 - shpc/utils/fileio.py | 4 ---- 5 files changed, 2 insertions(+), 11 deletions(-) diff --git a/shpc/main/registry/__init__.py b/shpc/main/registry/__init__.py index b85e32e8a..5b97ec81e 100644 --- a/shpc/main/registry/__init__.py +++ b/shpc/main/registry/__init__.py @@ -144,7 +144,7 @@ def _sync( local = self.get_registry(local) if local else self.filesystem_registry # We sync to our first registry - if not filesystem, no go - if not local.is_filesystem_registry: + if not isinstance(local, Filesystem): logger.exit( "sync is only supported for a remote to a filesystem registry: %s" % local.source diff --git a/shpc/main/registry/provider.py b/shpc/main/registry/provider.py index ab2a9a653..b51d1b74f 100644 --- a/shpc/main/registry/provider.py +++ b/shpc/main/registry/provider.py @@ -44,10 +44,6 @@ def __init__(self, source, *args, **kwargs): def exists(self, name): return os.path.exists(os.path.join(self.source, name)) - @property - def is_filesystem_registry(self): - return shpc.utils.is_path_local(self.source) - @classmethod def matches(cls, source_url: str): pass diff --git a/shpc/tests/test_sync.py b/shpc/tests/test_sync.py index ed387403e..a9c28a92b 100644 --- a/shpc/tests/test_sync.py +++ b/shpc/tests/test_sync.py @@ -153,7 +153,7 @@ def test_registry_interaction(tmp_path, remote): client = init_client(str(tmp_path), "lmod", "singularity") reg = client.registry.get_registry(remote) - assert not reg.is_filesystem_registry + assert isinstance(reg, registry.VersionControl) # This will hit the underlying logic to list/show mods = list(reg.iter_registry()) diff --git a/shpc/utils/__init__.py b/shpc/utils/__init__.py index 43db0d172..1ee938a98 100644 --- a/shpc/utils/__init__.py +++ b/shpc/utils/__init__.py @@ -5,7 +5,6 @@ get_file_hash, get_tmpdir, get_tmpfile, - is_path_local, mkdir_p, mkdirp, print_json, diff --git a/shpc/utils/fileio.py b/shpc/utils/fileio.py index 25a272ccd..a1c4d23dd 100644 --- a/shpc/utils/fileio.py +++ b/shpc/utils/fileio.py @@ -19,10 +19,6 @@ from ruamel.yaml import YAML -def is_path_local(path: str): - return ("://" not in path) and os.path.exists(path) - - def can_be_deleted(path, ignore_files=None): """ A path can be deleted if it contains no entries, *or* From 59c01cc1a9da27b039cc3182dad695c8b32dff62 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Tue, 6 Sep 2022 22:55:55 +0100 Subject: [PATCH 16/72] The parent class shouldn't know that much about the subclasses --- shpc/main/registry/filesystem.py | 10 +++++++--- shpc/main/registry/provider.py | 7 ------- shpc/main/registry/remote.py | 12 +++++++----- 3 files changed, 14 insertions(+), 15 deletions(-) diff --git a/shpc/main/registry/filesystem.py b/shpc/main/registry/filesystem.py index 9dee71b56..12076be10 100644 --- a/shpc/main/registry/filesystem.py +++ b/shpc/main/registry/filesystem.py @@ -75,9 +75,13 @@ def override_exists(self, tag): class Filesystem(Provider): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.source = os.path.abspath(self.source) + def __init__(self, source): + if not self.matches(source): + raise ValueError( + "Filesystem registry source must exist on the filesystem. Got %" + % source + ) + self.source = os.path.abspath(source) @classmethod def matches(cls, source): diff --git a/shpc/main/registry/provider.py b/shpc/main/registry/provider.py index b51d1b74f..befef3469 100644 --- a/shpc/main/registry/provider.py +++ b/shpc/main/registry/provider.py @@ -34,13 +34,6 @@ class Provider: A general provider should retrieve and provide registry files. """ - def __init__(self, source, *args, **kwargs): - if not (("://" in source) or os.path.exists(source)): - raise ValueError( - "Registry source must exist on the filesystem or contain '://' (remote path)." - ) - self.source = source - def exists(self, name): return os.path.exists(os.path.join(self.source, name)) diff --git a/shpc/main/registry/remote.py b/shpc/main/registry/remote.py index 859390ef3..f33466035 100644 --- a/shpc/main/registry/remote.py +++ b/shpc/main/registry/remote.py @@ -93,18 +93,20 @@ class VersionControl(Provider): "gitlab.com": "https://gitlab.com/%s/-/raw/%s/%s/container.yaml", } - def __init__(self, *args, **kwargs): + def __init__(self, source, tag=None, subdir=None): + if "://" not in source: + raise ValueError("'%s' is not a valid URL." % source) + self.is_cloned = False - self.tag = kwargs.get("tag") + self.tag = tag # Cache of remote container metadata self._cache = {} # E.g., subdirectory with registry files - self.subdir = kwargs.get("subdir") - super().__init__(*args, **kwargs) - self.url = self.source + self.subdir = subdir + self.url = source @classmethod def matches(cls, source): From db48626f589a7b09258cf39ce7075a26e2880c29 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Tue, 6 Sep 2022 22:57:13 +0100 Subject: [PATCH 17/72] Restored back the automatic addition of https:// --- shpc/main/registry/remote.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/shpc/main/registry/remote.py b/shpc/main/registry/remote.py index f33466035..15eab6d25 100644 --- a/shpc/main/registry/remote.py +++ b/shpc/main/registry/remote.py @@ -95,7 +95,18 @@ class VersionControl(Provider): def __init__(self, source, tag=None, subdir=None): if "://" not in source: - raise ValueError("'%s' is not a valid URL." % source) + if os.path.exists(source): + raise ValueError( + "VersionControl registry must be a remote path, not a local one." + ) + # Normalise the URL + # Heuristics: if there is a @, it's probably ssh + if "@" in self.url: + self.url = "ssh://" + source + else: + self.url = "https://" + source + else: + self.url = source self.is_cloned = False @@ -106,11 +117,10 @@ def __init__(self, source, tag=None, subdir=None): # E.g., subdirectory with registry files self.subdir = subdir - self.url = source @classmethod def matches(cls, source): - return "://" in source + return ("://" in source) or not os.path.exists(source) @property def library_url(self): From 583e7d4ed80d94296015908b76f1140fad43f9e4 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Wed, 7 Sep 2022 13:23:41 +0100 Subject: [PATCH 18/72] Restructured to avoid an unnecessary else --- shpc/main/registry/remote.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/shpc/main/registry/remote.py b/shpc/main/registry/remote.py index 15eab6d25..d4cf167d1 100644 --- a/shpc/main/registry/remote.py +++ b/shpc/main/registry/remote.py @@ -94,6 +94,7 @@ class VersionControl(Provider): } def __init__(self, source, tag=None, subdir=None): + self.url = source if "://" not in source: if os.path.exists(source): raise ValueError( @@ -101,12 +102,10 @@ def __init__(self, source, tag=None, subdir=None): ) # Normalise the URL # Heuristics: if there is a @, it's probably ssh - if "@" in self.url: + if "@" in source: self.url = "ssh://" + source else: self.url = "https://" + source - else: - self.url = source self.is_cloned = False From bd67e13c6cd966d3bae4f7d9e1e3732b15b185f2 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Wed, 7 Sep 2022 13:24:07 +0100 Subject: [PATCH 19/72] shpc convention: no else when the then ends with a return --- shpc/main/registry/remote.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/shpc/main/registry/remote.py b/shpc/main/registry/remote.py index d4cf167d1..df94ee803 100644 --- a/shpc/main/registry/remote.py +++ b/shpc/main/registry/remote.py @@ -225,8 +225,7 @@ def get_module_config_url(self, module_name): self.raw_container_url_schemes[domain] % t, self.web_container_url_schemes[domain] % t, ) - else: - return (None, None) + return (None, None) def iter_registry(self, filter_string=None): """ From fb2ed7cf4b8264143674b953ec2ac1729f8de68b Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Wed, 7 Sep 2022 13:29:34 +0100 Subject: [PATCH 20/72] Unnecessary due to operator precedence rule --- shpc/main/registry/remote.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/shpc/main/registry/remote.py b/shpc/main/registry/remote.py index df94ee803..28952447c 100644 --- a/shpc/main/registry/remote.py +++ b/shpc/main/registry/remote.py @@ -119,7 +119,7 @@ def __init__(self, source, tag=None, subdir=None): @classmethod def matches(cls, source): - return ("://" in source) or not os.path.exists(source) + return "://" in source or not os.path.exists(source) @property def library_url(self): From b54bbfea0095878a7ca300452cf0fbb38e2f1138 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Wed, 7 Sep 2022 21:08:31 +0100 Subject: [PATCH 21/72] Added a cache in `library_url` --- shpc/main/registry/remote.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/shpc/main/registry/remote.py b/shpc/main/registry/remote.py index 28952447c..40c80884d 100644 --- a/shpc/main/registry/remote.py +++ b/shpc/main/registry/remote.py @@ -106,7 +106,7 @@ def __init__(self, source, tag=None, subdir=None): self.url = "ssh://" + source else: self.url = "https://" + source - + self._library_url = None self.is_cloned = False self.tag = tag @@ -126,16 +126,21 @@ def library_url(self): """ Retrieve the web url, either pages or (eventually) custom. """ + if self._library_url is not None: + return self._library_url url = self.url if url.endswith(".git"): url = url[:-4] parts = url.split("/") domain = parts[2] if domain in self.library_url_schemes: - return self.library_url_schemes[domain] % ( + self._library_url = self.library_url_schemes[domain] % ( parts[3], "/".join(parts[4:]), ) + else: + self._library_url = "" + return self._library_url def exists(self, name): """ @@ -184,11 +189,10 @@ def _update_cache(self, force=False): if self._cache and not force: return - library_url = self.library_url - if library_url is None: + if not self.library_url: return self._update_clone_cache() # Check for exposed library API on GitHub or GitLab pages - response = requests.get(library_url) + response = requests.get(self.library_url) if response.status_code != 200: return self._update_clone_cache() self._cache = response.json() From 5c912a6c77c4769435252fb9a1548a5a4beeeeba Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Wed, 7 Sep 2022 17:26:54 +0100 Subject: [PATCH 22/72] Fixed the implementation of the cache in VersionControl.exists --- shpc/main/registry/remote.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/shpc/main/registry/remote.py b/shpc/main/registry/remote.py index 40c80884d..7e47a8a45 100644 --- a/shpc/main/registry/remote.py +++ b/shpc/main/registry/remote.py @@ -147,10 +147,8 @@ def exists(self, name): Determine if a module exists in the registry. """ name = name.split(":")[0] - if self._cache and name in self._cache: - return True - dirname = self.source - return os.path.exists(os.path.join(dirname, name)) + self._update_cache() + return name in self._cache def clone(self, tmpdir=None): """ From 99a2d57674a84b1fe559a389f2c591c3c6b07ef9 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Wed, 7 Sep 2022 17:38:55 +0100 Subject: [PATCH 23/72] exists has its own implementation in VersionControl, so this implementation is in fact specific to Filesystem --- shpc/main/registry/filesystem.py | 3 +++ shpc/main/registry/provider.py | 3 --- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/shpc/main/registry/filesystem.py b/shpc/main/registry/filesystem.py index 12076be10..367ab70c7 100644 --- a/shpc/main/registry/filesystem.py +++ b/shpc/main/registry/filesystem.py @@ -87,6 +87,9 @@ def __init__(self, source): def matches(cls, source): return os.path.exists(source) or source == "." + def exists(self, name): + return os.path.exists(os.path.join(self.source, name)) + def find(self, name): """ Find and load a container.yaml from the filesystem. diff --git a/shpc/main/registry/provider.py b/shpc/main/registry/provider.py index befef3469..998f7aa28 100644 --- a/shpc/main/registry/provider.py +++ b/shpc/main/registry/provider.py @@ -34,9 +34,6 @@ class Provider: A general provider should retrieve and provide registry files. """ - def exists(self, name): - return os.path.exists(os.path.join(self.source, name)) - @classmethod def matches(cls, source_url: str): pass From 486f12b275dd510d376298ed0eca7997da409cf8 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Wed, 7 Sep 2022 17:51:00 +0100 Subject: [PATCH 24/72] iter_registry is basically iter_modules with an extra filter --- shpc/main/registry/filesystem.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/shpc/main/registry/filesystem.py b/shpc/main/registry/filesystem.py index 367ab70c7..acd74e34d 100644 --- a/shpc/main/registry/filesystem.py +++ b/shpc/main/registry/filesystem.py @@ -110,14 +110,9 @@ def iter_registry(self, filter_string=None): """ Iterate over content in filesystem registry. """ - for filename in shpc.utils.recursive_find(self.source): - if not filename.endswith("container.yaml"): - continue - module_name = ( - os.path.dirname(filename).replace(self.source, "").strip(os.sep) - ) - + for dirname, module_name in self.iter_modules(): # If the user has provided a filter, honor it if filter_string and not re.search(filter_string, module_name): continue + filename = os.path.join(dirname, module_name) yield FilesystemResult(module_name, filename) From 0b65fa6112b54f9ddc97a68dd8a07606312c7dfa Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Wed, 7 Sep 2022 18:35:18 +0100 Subject: [PATCH 25/72] Yield relative paths rather than full paths since *all* consumers need relative paths --- shpc/main/modules/base.py | 7 +++---- shpc/main/registry/__init__.py | 5 ++--- shpc/main/registry/provider.py | 2 +- shpc/utils/fileio.py | 8 +++++--- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/shpc/main/modules/base.py b/shpc/main/modules/base.py index 0b0876120..fd94638f4 100644 --- a/shpc/main/modules/base.py +++ b/shpc/main/modules/base.py @@ -309,10 +309,9 @@ def _get_module_lookup(self, base, filename, pattern=None): A shared function to get a lookup of installed modules or registry entries """ modules = {} - for fullpath in utils.recursive_find(base, pattern): - if fullpath.endswith(filename): - module_name, version = os.path.dirname(fullpath).rsplit(os.sep, 1) - module_name = module_name.replace(base, "").strip(os.sep) + for relpath in utils.recursive_find(base, pattern): + if relpath.endswith(filename): + module_name, version = os.path.dirname(relpath).rsplit(os.sep, 1) if module_name not in modules: modules[module_name] = set() modules[module_name].add(version) diff --git a/shpc/main/registry/__init__.py b/shpc/main/registry/__init__.py index 5b97ec81e..a3d0ae746 100644 --- a/shpc/main/registry/__init__.py +++ b/shpc/main/registry/__init__.py @@ -23,13 +23,12 @@ def update_container_module(module, from_path, existing_path): """ if not os.path.exists(existing_path): shpc.utils.mkdir_p(existing_path) - for filename in shpc.utils.recursive_find(from_path): - relative_path = filename.replace(from_path, "").strip("/") + for relative_path in shpc.utils.recursive_find(from_path): to_path = os.path.join(existing_path, relative_path) if os.path.exists(to_path): shutil.rmtree(to_path) shpc.utils.mkdir_p(os.path.dirname(to_path)) - shutil.copy2(filename, to_path) + shutil.copy2(os.path.join(from_path, relative_path), to_path) class Registry: diff --git a/shpc/main/registry/provider.py b/shpc/main/registry/provider.py index 998f7aa28..5da29d307 100644 --- a/shpc/main/registry/provider.py +++ b/shpc/main/registry/provider.py @@ -53,7 +53,7 @@ def iter_modules(self): """ # Find modules based on container.yaml for filename in shpc.utils.recursive_find(self.source, "container.yaml"): - module = os.path.dirname(filename).replace(self.source, "").strip(os.sep) + module = os.path.dirname(filename) if not module: continue yield self.source, module diff --git a/shpc/utils/fileio.py b/shpc/utils/fileio.py index a1c4d23dd..20dae1b80 100644 --- a/shpc/utils/fileio.py +++ b/shpc/utils/fileio.py @@ -129,12 +129,14 @@ def recursive_find(base, pattern=None): """ # We can identify modules by finding module.lua for root, folders, files in os.walk(base): + assert root.startswith(base) + subdir = root[len(base) + 1 :] for file in files: - fullpath = os.path.abspath(os.path.join(root, file)) + relpath = os.path.join(subdir, file) - if pattern and not re.search(pattern, fullpath): + if pattern and not re.search(pattern, relpath): continue - yield fullpath + yield relpath def get_file_hash(image_path, algorithm="sha256"): From b064026c4f0469a1adce96ebaced36dab460ffcc Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Wed, 7 Sep 2022 19:03:51 +0100 Subject: [PATCH 26/72] Proper method to cleanup a clone Removed a cleanup() call that was expected to do nothing --- shpc/main/registry/__init__.py | 1 - shpc/main/registry/remote.py | 13 ++++++++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/shpc/main/registry/__init__.py b/shpc/main/registry/__init__.py index a3d0ae746..86b68e62c 100644 --- a/shpc/main/registry/__init__.py +++ b/shpc/main/registry/__init__.py @@ -158,7 +158,6 @@ def _sync( add_new=add_new, local=local, ) - remote.cleanup() def sync_from_remote( self, remote, name=None, overwrite=False, dryrun=False, add_new=True, local=None diff --git a/shpc/main/registry/remote.py b/shpc/main/registry/remote.py index 7e47a8a45..bf2192260 100644 --- a/shpc/main/registry/remote.py +++ b/shpc/main/registry/remote.py @@ -170,8 +170,19 @@ def clone(self, tmpdir=None): except sp.CalledProcessError as e: raise ValueError("Failed to clone repository {}:\n{}", self.url, e) self.is_cloned = True + assert os.path.exists(self.source) return tmpdir + def cleanup(self): + """ + Cleanup the temporary clone (this must be intentionally called) + """ + if not self.is_cloned: + return + shutil.rmtree(self.source) + self.is_cloned = False + delattr(self, "source") + def find(self, name): """ Find a particular entry in a registry @@ -213,7 +224,7 @@ def _update_clone_cache(self): ), "config_url": config_url, } - shutil.rmtree(tmpdir) + self.cleanup() def get_module_config_url(self, module_name): """ From 5d290f74695a92c8517ba2c4723a19e18fd38f23 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Wed, 7 Sep 2022 19:05:30 +0100 Subject: [PATCH 27/72] No need to yield self.source in iter_modules since it's constant and accessible from outside (and not all callers want it !) --- shpc/main/client.py | 2 +- shpc/main/registry/__init__.py | 4 ++-- shpc/main/registry/filesystem.py | 4 ++-- shpc/main/registry/provider.py | 2 +- shpc/main/registry/remote.py | 4 ++-- shpc/tests/test_sync.py | 3 +-- 6 files changed, 9 insertions(+), 10 deletions(-) diff --git a/shpc/main/client.py b/shpc/main/client.py index 088424c72..71cff32a4 100644 --- a/shpc/main/client.py +++ b/shpc/main/client.py @@ -126,7 +126,7 @@ def update(self, name=None, dryrun=False, filters=None): if name: modules = [name] else: - modules = [x[1] for x in list(self.registry.iter_modules())] + modules = list(self.registry.iter_modules()) for module_name in modules: config = self._load_container(module_name) diff --git a/shpc/main/registry/__init__.py b/shpc/main/registry/__init__.py index 86b68e62c..28ecad0b7 100644 --- a/shpc/main/registry/__init__.py +++ b/shpc/main/registry/__init__.py @@ -182,11 +182,11 @@ def sync_from_remote( remote.clone() # These are modules to update - for regpath, module in remote.iter_modules(): + for module in remote.iter_modules(): if name and module != name: continue - from_path = os.path.join(regpath, module) + from_path = os.path.join(remote.source, module) existing_path = local.exists(module) # If we have an existing module and we want to replace all files diff --git a/shpc/main/registry/filesystem.py b/shpc/main/registry/filesystem.py index acd74e34d..62fe6b101 100644 --- a/shpc/main/registry/filesystem.py +++ b/shpc/main/registry/filesystem.py @@ -110,9 +110,9 @@ def iter_registry(self, filter_string=None): """ Iterate over content in filesystem registry. """ - for dirname, module_name in self.iter_modules(): + for module_name in self.iter_modules(): # If the user has provided a filter, honor it if filter_string and not re.search(filter_string, module_name): continue - filename = os.path.join(dirname, module_name) + filename = os.path.join(self.source, module_name) yield FilesystemResult(module_name, filename) diff --git a/shpc/main/registry/provider.py b/shpc/main/registry/provider.py index 5da29d307..a9459eda7 100644 --- a/shpc/main/registry/provider.py +++ b/shpc/main/registry/provider.py @@ -56,4 +56,4 @@ def iter_modules(self): module = os.path.dirname(filename) if not module: continue - yield self.source, module + yield module diff --git a/shpc/main/registry/remote.py b/shpc/main/registry/remote.py index bf2192260..9eae8005b 100644 --- a/shpc/main/registry/remote.py +++ b/shpc/main/registry/remote.py @@ -215,12 +215,12 @@ def _update_clone_cache(self): % self.url ) tmpdir = self.clone() - for dirname, module in self.iter_modules(): + for module in self.iter_modules(): # Minimum amount of metadata to function here config_url = self.get_module_config_url(module)[0] self._cache[module] = { "config": shpc.utils.read_yaml( - os.path.join(dirname, module, "container.yaml") + os.path.join(tmpdir, module, "container.yaml") ), "config_url": config_url, } diff --git a/shpc/tests/test_sync.py b/shpc/tests/test_sync.py index a9c28a92b..0d7806120 100644 --- a/shpc/tests/test_sync.py +++ b/shpc/tests/test_sync.py @@ -49,8 +49,7 @@ def test_filesystem_upgrade(tmp_path): # Should have one module installed mods = list(test_registry.iter_modules()) assert len(mods) == 1 - module = mods[0][1] - assert mods[0][0] == test_registry_path + module = mods[0] assert module == "dinosaur/salad" # Upgrade the current registry from the "remote" (test registry) From 289000a3b77371b6a658ad64c1fcceb48b00f518 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Fri, 9 Sep 2022 16:07:25 +0100 Subject: [PATCH 28/72] It's more practical to yield the the registry object (provider) rather than using the "source" path (which is undefined for remote registries anyway) --- shpc/main/registry/__init__.py | 8 ++++---- shpc/tests/test_sync.py | 3 ++- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/shpc/main/registry/__init__.py b/shpc/main/registry/__init__.py index 28ecad0b7..91167e521 100644 --- a/shpc/main/registry/__init__.py +++ b/shpc/main/registry/__init__.py @@ -54,11 +54,11 @@ def filesystem_registry(self): def exists(self, name): """ - Determine if a module name *exists* in any local registry, return path + Determine if a module name *exists* in any registry, return the first one """ for reg in self.registries: if reg.exists(name): - return os.path.join(reg.source, name) + return reg def iter_registry(self, filter_string=None): """ @@ -88,8 +88,8 @@ def iter_modules(self): """ Iterate over modules found across the registry """ - for reg in self.registries: - for registry, module in reg.iter_modules(): + for registry in self.registries: + for module in registry.iter_modules(): yield registry, module def get_registry(self, source, **kwargs): diff --git a/shpc/tests/test_sync.py b/shpc/tests/test_sync.py index 0d7806120..94fc4686f 100644 --- a/shpc/tests/test_sync.py +++ b/shpc/tests/test_sync.py @@ -57,7 +57,8 @@ def test_filesystem_upgrade(tmp_path): client.registry.sync_from_remote(test_registry, module) existing = client.registry.exists(module) assert existing is not None - assert os.path.exists(existing) + assert existing == local + assert os.path.exists(os.path.join(local.source, module)) def test_sync_from_file(tmp_path): From d356ab11db99f77204a5de5f409634524d7bffba Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Fri, 9 Sep 2022 15:59:04 +0100 Subject: [PATCH 29/72] Optimised the "update all" mode by directly using Result objects from the registries Otherwise, it wastes time finding modules that we know are there --- shpc/main/client.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/shpc/main/client.py b/shpc/main/client.py index 71cff32a4..0e215c58d 100644 --- a/shpc/main/client.py +++ b/shpc/main/client.py @@ -124,13 +124,12 @@ def update(self, name=None, dryrun=False, filters=None): """ # No name provided == "update all" if name: - modules = [name] - else: - modules = list(self.registry.iter_modules()) - - for module_name in modules: - config = self._load_container(module_name) + config = self._load_container(name) config.update(dryrun=dryrun, filters=filters) + else: + for result in self.registry.iter_registry(): + config = container.ContainerConfig(result) + config.update(dryrun=dryrun, filters=filters) def test( self, From 3d3e6b9253611350c4692d6c80832b0bcc0eeb06 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Fri, 9 Sep 2022 15:56:36 +0100 Subject: [PATCH 30/72] syntactic sugar --- shpc/main/registry/__init__.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/shpc/main/registry/__init__.py b/shpc/main/registry/__init__.py index 91167e521..8eaa91920 100644 --- a/shpc/main/registry/__init__.py +++ b/shpc/main/registry/__init__.py @@ -65,8 +65,7 @@ def iter_registry(self, filter_string=None): Iterate over all known registries defined in settings. """ for reg in self.registries: - for entry in reg.iter_registry(filter_string=filter_string): - yield entry + yield from reg.iter_registry(filter_string=filter_string) def find(self, name, path=None): """ From 0e34cc3bcbb9cce39304c33f2910bc6b771b7c83 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Fri, 9 Sep 2022 16:17:46 +0100 Subject: [PATCH 31/72] Optimised iter_modules method for remote registries (using the cache) --- shpc/main/registry/remote.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/shpc/main/registry/remote.py b/shpc/main/registry/remote.py index 9eae8005b..1a18f5493 100644 --- a/shpc/main/registry/remote.py +++ b/shpc/main/registry/remote.py @@ -14,6 +14,7 @@ from shpc.logger import logger from .provider import Provider, Result +from .filesystem import Filesystem class RemoteResult(Result): @@ -183,6 +184,13 @@ def cleanup(self): self.is_cloned = False delattr(self, "source") + def iter_modules(self): + """ + yield module names + """ + self._update_cache() + yield from self._cache.keys() + def find(self, name): """ Find a particular entry in a registry @@ -215,7 +223,7 @@ def _update_clone_cache(self): % self.url ) tmpdir = self.clone() - for module in self.iter_modules(): + for module in Filesystem(tmpdir).iter_modules(): # Minimum amount of metadata to function here config_url = self.get_module_config_url(module)[0] self._cache[module] = { From 700b9e303c9b5b9757919dbcf3a0a606a89aef16 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Fri, 9 Sep 2022 16:16:27 +0100 Subject: [PATCH 32/72] Moved back iter_modules to Filesystem since VersionControl has its own, optimised, version --- shpc/main/registry/filesystem.py | 11 +++++++++++ shpc/main/registry/provider.py | 10 +--------- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/shpc/main/registry/filesystem.py b/shpc/main/registry/filesystem.py index 62fe6b101..99594c280 100644 --- a/shpc/main/registry/filesystem.py +++ b/shpc/main/registry/filesystem.py @@ -90,6 +90,17 @@ def matches(cls, source): def exists(self, name): return os.path.exists(os.path.join(self.source, name)) + def iter_modules(self): + """ + yield module names + """ + # Find modules based on container.yaml + for filename in shpc.utils.recursive_find(self.source, "container.yaml"): + module = os.path.dirname(filename) + if not module: + continue + yield module + def find(self, name): """ Find and load a container.yaml from the filesystem. diff --git a/shpc/main/registry/provider.py b/shpc/main/registry/provider.py index a9459eda7..1bca4d9da 100644 --- a/shpc/main/registry/provider.py +++ b/shpc/main/registry/provider.py @@ -48,12 +48,4 @@ def iter_registry(self): pass def iter_modules(self): - """ - yield module names - """ - # Find modules based on container.yaml - for filename in shpc.utils.recursive_find(self.source, "container.yaml"): - module = os.path.dirname(filename) - if not module: - continue - yield module + pass From 3666a978f5bb3c994ff9f20d50796b14c7423f54 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Fri, 9 Sep 2022 16:08:56 +0100 Subject: [PATCH 33/72] Clones only ever exist within a function --- shpc/main/registry/__init__.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/shpc/main/registry/__init__.py b/shpc/main/registry/__init__.py index 8eaa91920..10a705c6f 100644 --- a/shpc/main/registry/__init__.py +++ b/shpc/main/registry/__init__.py @@ -177,8 +177,10 @@ def sync_from_remote( if not local: local = self.filesystem_registry - if isinstance(remote, VersionControl) and not remote.is_cloned: - remote.clone() + ini_remote = None + if isinstance(remote, VersionControl): + ini_remote = remote + remote = Filesystem(remote.clone()) # These are modules to update for module in remote.iter_modules(): @@ -204,6 +206,9 @@ def sync_from_remote( module, from_path, os.path.join(local.source, module) ) + if ini_remote: + ini_remote.cleanup() + if not updates: logger.info("There were no upgrades.") From 58b6af9815d73b79d42d6f31810ec133f0ee789c Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Fri, 9 Sep 2022 16:19:34 +0100 Subject: [PATCH 34/72] Stopped using self.source in VersionControl, to avoid confusion with Filesystem --- shpc/main/registry/remote.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/shpc/main/registry/remote.py b/shpc/main/registry/remote.py index 1a18f5493..f432029b8 100644 --- a/shpc/main/registry/remote.py +++ b/shpc/main/registry/remote.py @@ -108,7 +108,7 @@ def __init__(self, source, tag=None, subdir=None): else: self.url = "https://" + source self._library_url = None - self.is_cloned = False + self._clone_dir = None self.tag = tag @@ -155,7 +155,7 @@ def clone(self, tmpdir=None): """ Clone the known source URL to a temporary directory """ - if self.is_cloned: + if self._clone_dir: return tmpdir = tmpdir or shpc.utils.get_tmpdir() @@ -163,26 +163,24 @@ def clone(self, tmpdir=None): if self.tag: cmd += ["-b", self.tag] cmd += [self.url, tmpdir] - self.source = tmpdir + self._clone_dir = tmpdir if self.subdir: - self.source = os.path.join(tmpdir, self.subdir) + self._clone_dir = os.path.join(tmpdir, self.subdir) try: sp.run(cmd, check=True) except sp.CalledProcessError as e: raise ValueError("Failed to clone repository {}:\n{}", self.url, e) - self.is_cloned = True - assert os.path.exists(self.source) + assert os.path.exists(self._clone_dir) return tmpdir def cleanup(self): """ Cleanup the temporary clone (this must be intentionally called) """ - if not self.is_cloned: + if not self._clone_dir: return - shutil.rmtree(self.source) - self.is_cloned = False - delattr(self, "source") + shutil.rmtree(self._clone_dir) + self._clone_dir = None def iter_modules(self): """ From 8190884763bec5e875682c94ada853ee6340344d Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Fri, 9 Sep 2022 16:35:47 +0100 Subject: [PATCH 35/72] url, not source, is to be used for remote registries --- shpc/main/registry/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/shpc/main/registry/__init__.py b/shpc/main/registry/__init__.py index 10a705c6f..b51d5c38f 100644 --- a/shpc/main/registry/__init__.py +++ b/shpc/main/registry/__init__.py @@ -145,7 +145,7 @@ def _sync( if not isinstance(local, Filesystem): logger.exit( "sync is only supported for a remote to a filesystem registry: %s" - % local.source + % local.url ) # Upgrade the current registry from the remote From 98259c13f72c996f892c72fd1fe950c4c4de1f64 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Fri, 9 Sep 2022 16:50:15 +0100 Subject: [PATCH 36/72] Cannot do these heuristics as we need to report unexisting local paths --- shpc/main/registry/remote.py | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/shpc/main/registry/remote.py b/shpc/main/registry/remote.py index f432029b8..613c5d153 100644 --- a/shpc/main/registry/remote.py +++ b/shpc/main/registry/remote.py @@ -95,18 +95,12 @@ class VersionControl(Provider): } def __init__(self, source, tag=None, subdir=None): - self.url = source + assert self.matches(source) if "://" not in source: - if os.path.exists(source): - raise ValueError( - "VersionControl registry must be a remote path, not a local one." - ) - # Normalise the URL - # Heuristics: if there is a @, it's probably ssh - if "@" in source: - self.url = "ssh://" + source - else: - self.url = "https://" + source + raise ValueError( + "VersionControl registry must be a remote path, got %s." % source + ) + self.url = source self._library_url = None self._clone_dir = None @@ -120,7 +114,7 @@ def __init__(self, source, tag=None, subdir=None): @classmethod def matches(cls, source): - return "://" in source or not os.path.exists(source) + return "://" in source @property def library_url(self): From 4797224e981ee00901aa16727dcc6c1cf0e5d721 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Sat, 10 Sep 2022 10:39:45 +0100 Subject: [PATCH 37/72] str.split can limit the number of splits --- shpc/main/registry/remote.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/shpc/main/registry/remote.py b/shpc/main/registry/remote.py index 613c5d153..468f62dec 100644 --- a/shpc/main/registry/remote.py +++ b/shpc/main/registry/remote.py @@ -76,7 +76,7 @@ def override_exists(self, tag): class VersionControl(Provider): - # The URL is substituted with user, repo + # The URL is substituted with owner, repo library_url_schemes = { "github.com": "https://%s.github.io/%s/library.json", "gitlab.com": "https://%s.gitlab.io/%s/library.json", @@ -126,12 +126,11 @@ def library_url(self): url = self.url if url.endswith(".git"): url = url[:-4] - parts = url.split("/") - domain = parts[2] + (_, _, domain, owner, repo) = url.split("/", 4) if domain in self.library_url_schemes: self._library_url = self.library_url_schemes[domain] % ( - parts[3], - "/".join(parts[4:]), + owner, + repo, ) else: self._library_url = "" @@ -230,10 +229,9 @@ def get_module_config_url(self, module_name): """ Get the raw address of the config (container.yaml) """ - parts = self.url.split("/") - domain = parts[2] + (_, _, domain, repo_path) = self.url.split("/", 3) if domain in self.raw_container_url_schemes: - t = ("/".join(parts[3:]), self.tag, module_name) + t = (repo_path, self.tag, module_name) return ( self.raw_container_url_schemes[domain] % t, self.web_container_url_schemes[domain] % t, From bc240161ee4886e7f38760c643c50c5419b264ef Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Sun, 11 Sep 2022 11:52:37 +0100 Subject: [PATCH 38/72] Increased the symmetry to simplify the maintainability --- shpc/main/client.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/shpc/main/client.py b/shpc/main/client.py index 0e215c58d..8537a61b7 100644 --- a/shpc/main/client.py +++ b/shpc/main/client.py @@ -124,12 +124,17 @@ def update(self, name=None, dryrun=False, filters=None): """ # No name provided == "update all" if name: - config = self._load_container(name) - config.update(dryrun=dryrun, filters=filters) + # find the module in the registries. _load_container + # calls `container.ContainerConfig(result)` like below + configs = [self._load_container(name)] else: + # directly iterate over the content of the registry + configs = [] for result in self.registry.iter_registry(): - config = container.ContainerConfig(result) - config.update(dryrun=dryrun, filters=filters) + configs.append(container.ContainerConfig(result)) + # do the update + for config in configs: + config.update(dryrun=dryrun, filters=filters) def test( self, From 876823708f1e12ada4b7fa6f88d947923904e7c5 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Sat, 17 Sep 2022 21:12:06 +0100 Subject: [PATCH 39/72] NotImplementedError is more useful than pass --- shpc/main/registry/provider.py | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/shpc/main/registry/provider.py b/shpc/main/registry/provider.py index 1bca4d9da..20e0b879e 100644 --- a/shpc/main/registry/provider.py +++ b/shpc/main/registry/provider.py @@ -35,17 +35,33 @@ class Provider: """ @classmethod - def matches(cls, source_url: str): - pass + def matches(cls, source): + """ + Returns true if this class understands the source + """ + raise NotImplementedError def find(self, name): - pass + """ + Returns a Result object if the module can be found in the registry + """ + raise NotImplementedError - def cleanup(self): - pass + def exists(self, name): + """ + Returns true if the module can be found in the registry + """ + raise NotImplementedError - def iter_registry(self): - pass + def iter_registry(self, filter_string=None): + """ + Iterates over the modules of this registry (that match the filte, if + provided) as Result instances + """ + raise NotImplementedError def iter_modules(self): - pass + """ + Iterates over the module names of this registry + """ + raise NotImplementedError From 4170484610d5559defc339adb77f9e03b9b35ca2 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Sat, 17 Sep 2022 21:17:34 +0100 Subject: [PATCH 40/72] The tuplized version is not the preference here --- shpc/main/modules/base.py | 2 +- shpc/main/registry/remote.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/shpc/main/modules/base.py b/shpc/main/modules/base.py index fd94638f4..8da30bdb0 100644 --- a/shpc/main/modules/base.py +++ b/shpc/main/modules/base.py @@ -241,7 +241,7 @@ def docgen(self, module_name, registry=None, out=None, branch="main"): template = self.template.load("docs.md") registry = registry or defaults.github_url git_registry = shpc.main.registry.VersionControl(registry, tag=branch) - (raw_github_url, github_url) = git_registry.get_module_config_url(module_name) + raw_github_url, github_url = git_registry.get_module_config_url(module_name) # Currently one doc is rendered for all containers result = template.render( diff --git a/shpc/main/registry/remote.py b/shpc/main/registry/remote.py index 468f62dec..b24555aa9 100644 --- a/shpc/main/registry/remote.py +++ b/shpc/main/registry/remote.py @@ -126,7 +126,7 @@ def library_url(self): url = self.url if url.endswith(".git"): url = url[:-4] - (_, _, domain, owner, repo) = url.split("/", 4) + _, _, domain, owner, repo = url.split("/", 4) if domain in self.library_url_schemes: self._library_url = self.library_url_schemes[domain] % ( owner, @@ -229,7 +229,7 @@ def get_module_config_url(self, module_name): """ Get the raw address of the config (container.yaml) """ - (_, _, domain, repo_path) = self.url.split("/", 3) + _, _, domain, repo_path = self.url.split("/", 3) if domain in self.raw_container_url_schemes: t = (repo_path, self.tag, module_name) return ( From be3793fec483e287cb8317cd339e5ac3065e99df Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Sat, 17 Sep 2022 21:34:16 +0100 Subject: [PATCH 41/72] Easier to understand --- shpc/main/registry/__init__.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/shpc/main/registry/__init__.py b/shpc/main/registry/__init__.py index b51d5c38f..88cf233bd 100644 --- a/shpc/main/registry/__init__.py +++ b/shpc/main/registry/__init__.py @@ -177,9 +177,9 @@ def sync_from_remote( if not local: local = self.filesystem_registry - ini_remote = None + need_cleanup = False if isinstance(remote, VersionControl): - ini_remote = remote + need_cleanup = True remote = Filesystem(remote.clone()) # These are modules to update @@ -206,8 +206,8 @@ def sync_from_remote( module, from_path, os.path.join(local.source, module) ) - if ini_remote: - ini_remote.cleanup() + if need_cleanup: + remote.cleanup() if not updates: logger.info("There were no upgrades.") From 468afdff2b31be47bc1f9b662e434ac196d2d563 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Sat, 17 Sep 2022 22:00:40 +0100 Subject: [PATCH 42/72] Made the clone return a Filesystem object independent from VersionControl --- shpc/main/registry/__init__.py | 2 +- shpc/main/registry/remote.py | 28 ++++++++-------------------- 2 files changed, 9 insertions(+), 21 deletions(-) diff --git a/shpc/main/registry/__init__.py b/shpc/main/registry/__init__.py index 88cf233bd..d149c88d0 100644 --- a/shpc/main/registry/__init__.py +++ b/shpc/main/registry/__init__.py @@ -180,7 +180,7 @@ def sync_from_remote( need_cleanup = False if isinstance(remote, VersionControl): need_cleanup = True - remote = Filesystem(remote.clone()) + remote = remote.clone() # These are modules to update for module in remote.iter_modules(): diff --git a/shpc/main/registry/remote.py b/shpc/main/registry/remote.py index b24555aa9..c4a2a6239 100644 --- a/shpc/main/registry/remote.py +++ b/shpc/main/registry/remote.py @@ -102,7 +102,6 @@ def __init__(self, source, tag=None, subdir=None): ) self.url = source self._library_url = None - self._clone_dir = None self.tag = tag @@ -147,33 +146,22 @@ def exists(self, name): def clone(self, tmpdir=None): """ Clone the known source URL to a temporary directory + and return an equivalent local registry (Filesystem) """ - if self._clone_dir: - return tmpdir = tmpdir or shpc.utils.get_tmpdir() cmd = ["git", "clone", "--depth", "1"] if self.tag: cmd += ["-b", self.tag] cmd += [self.url, tmpdir] - self._clone_dir = tmpdir if self.subdir: - self._clone_dir = os.path.join(tmpdir, self.subdir) + tmpdir = os.path.join(tmpdir, self.subdir) try: sp.run(cmd, check=True) except sp.CalledProcessError as e: raise ValueError("Failed to clone repository {}:\n{}", self.url, e) - assert os.path.exists(self._clone_dir) - return tmpdir - - def cleanup(self): - """ - Cleanup the temporary clone (this must be intentionally called) - """ - if not self._clone_dir: - return - shutil.rmtree(self._clone_dir) - self._clone_dir = None + assert os.path.exists(tmpdir) + return Filesystem(tmpdir) def iter_modules(self): """ @@ -213,17 +201,17 @@ def _update_clone_cache(self): "Remote %s is not deploying a Registry API, falling back to clone." % self.url ) - tmpdir = self.clone() - for module in Filesystem(tmpdir).iter_modules(): + tmplocal = self.clone() + for module in tmplocal.iter_modules(): # Minimum amount of metadata to function here config_url = self.get_module_config_url(module)[0] self._cache[module] = { "config": shpc.utils.read_yaml( - os.path.join(tmpdir, module, "container.yaml") + os.path.join(tmplocal.source, module, "container.yaml") ), "config_url": config_url, } - self.cleanup() + tmplocal.cleanup() def get_module_config_url(self, module_name): """ From bc2fe99117c47d48819f9d57d26768c3b9770bc4 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Sat, 5 Nov 2022 10:28:06 +0000 Subject: [PATCH 43/72] Extra comment --- shpc/main/registry/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/shpc/main/registry/__init__.py b/shpc/main/registry/__init__.py index d149c88d0..ac6957257 100644 --- a/shpc/main/registry/__init__.py +++ b/shpc/main/registry/__init__.py @@ -179,6 +179,7 @@ def sync_from_remote( need_cleanup = False if isinstance(remote, VersionControl): + # Instantiate a local registry, which will have to be cleaned up need_cleanup = True remote = remote.clone() From b836cd80a094cb34b5498e2c3efb5a9c3567671e Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Sun, 6 Nov 2022 20:17:02 +0000 Subject: [PATCH 44/72] Back to a subclass of VersionControl for each forge --- shpc/main/modules/base.py | 4 +- shpc/main/registry/__init__.py | 6 +- shpc/main/registry/remote.py | 117 +++++++++++++++++---------------- 3 files changed, 64 insertions(+), 63 deletions(-) diff --git a/shpc/main/modules/base.py b/shpc/main/modules/base.py index 8da30bdb0..aa0c04f22 100644 --- a/shpc/main/modules/base.py +++ b/shpc/main/modules/base.py @@ -240,8 +240,8 @@ def docgen(self, module_name, registry=None, out=None, branch="main"): aliases = config.get_aliases() template = self.template.load("docs.md") registry = registry or defaults.github_url - git_registry = shpc.main.registry.VersionControl(registry, tag=branch) - raw_github_url, github_url = git_registry.get_module_config_url(module_name) + github_url = git_registry.get_container_yaml_url(module_name) + raw_github_url = git_registry.get_raw_container_yaml_url(module_name) # Currently one doc is rendered for all containers result = template.render( diff --git a/shpc/main/registry/__init__.py b/shpc/main/registry/__init__.py index ac6957257..c7c2b4403 100644 --- a/shpc/main/registry/__init__.py +++ b/shpc/main/registry/__init__.py @@ -14,7 +14,7 @@ from shpc.main.settings import SettingsBase from .filesystem import Filesystem, FilesystemResult -from .remote import VersionControl +from .remote import GitHub, GitLab def update_container_module(module, from_path, existing_path): @@ -178,7 +178,7 @@ def sync_from_remote( local = self.filesystem_registry need_cleanup = False - if isinstance(remote, VersionControl): + if not isinstance(remote, Filesystem): # Instantiate a local registry, which will have to be cleaned up need_cleanup = True remote = remote.clone() @@ -215,4 +215,4 @@ def sync_from_remote( # We only currently allow Filesystem registries to be used in settings -PROVIDERS = [Filesystem, VersionControl] +PROVIDERS = [GitHub, Filesystem, GitLab] diff --git a/shpc/main/registry/remote.py b/shpc/main/registry/remote.py index c4a2a6239..ecf10d7fb 100644 --- a/shpc/main/registry/remote.py +++ b/shpc/main/registry/remote.py @@ -7,6 +7,7 @@ import re import shutil import subprocess as sp +import urllib import requests @@ -75,33 +76,12 @@ def override_exists(self, tag): class VersionControl(Provider): - - # The URL is substituted with owner, repo - library_url_schemes = { - "github.com": "https://%s.github.io/%s/library.json", - "gitlab.com": "https://%s.gitlab.io/%s/library.json", - } - - # The URL is substituted with repo, branch, module_name - web_container_url_schemes = { - "github.com": "https://github.com/%s/blob/%s/%s/container.yaml", - "gitlab.com": "https://gitlab.com/%s/-/blob/%s/%s/container.yaml", - } - - # The URL is substituted with repo, branch, module_name - raw_container_url_schemes = { - "github.com": "https://raw.githubusercontent.com/%s/%s/%s/container.yaml", - "gitlab.com": "https://gitlab.com/%s/-/raw/%s/%s/container.yaml", - } - def __init__(self, source, tag=None, subdir=None): - assert self.matches(source) - if "://" not in source: + if not self.matches(source): raise ValueError( - "VersionControl registry must be a remote path, got %s." % source + type(self).__name__ + "registry must be a remote path, got %s." % source ) self.url = source - self._library_url = None self.tag = tag @@ -111,29 +91,12 @@ def __init__(self, source, tag=None, subdir=None): # E.g., subdirectory with registry files self.subdir = subdir - @classmethod - def matches(cls, source): - return "://" in source - @property def library_url(self): """ - Retrieve the web url, either pages or (eventually) custom. + Retrieve the URL of this registry's library (in JSON). """ - if self._library_url is not None: - return self._library_url - url = self.url - if url.endswith(".git"): - url = url[:-4] - _, _, domain, owner, repo = url.split("/", 4) - if domain in self.library_url_schemes: - self._library_url = self.library_url_schemes[domain] % ( - owner, - repo, - ) - else: - self._library_url = "" - return self._library_url + raise NotImplementedError def exists(self, name): """ @@ -185,10 +148,11 @@ def _update_cache(self, force=False): if self._cache and not force: return - if not self.library_url: + library_url = self.library_url + if not library_url: return self._update_clone_cache() # Check for exposed library API on GitHub or GitLab pages - response = requests.get(self.library_url) + response = requests.get(library_url) if response.status_code != 200: return self._update_clone_cache() self._cache = response.json() @@ -204,7 +168,7 @@ def _update_clone_cache(self): tmplocal = self.clone() for module in tmplocal.iter_modules(): # Minimum amount of metadata to function here - config_url = self.get_module_config_url(module)[0] + config_url = self.get_raw_container_yaml_url(module) self._cache[module] = { "config": shpc.utils.read_yaml( os.path.join(tmplocal.source, module, "container.yaml") @@ -213,19 +177,6 @@ def _update_clone_cache(self): } tmplocal.cleanup() - def get_module_config_url(self, module_name): - """ - Get the raw address of the config (container.yaml) - """ - _, _, domain, repo_path = self.url.split("/", 3) - if domain in self.raw_container_url_schemes: - t = (repo_path, self.tag, module_name) - return ( - self.raw_container_url_schemes[domain] % t, - self.web_container_url_schemes[domain] % t, - ) - return (None, None) - def iter_registry(self, filter_string=None): """ Yield metadata about containers in a remote registry. @@ -240,3 +191,53 @@ def iter_registry(self, filter_string=None): continue # Assemble a faux config with tags so we don't hit remote yield RemoteResult(uri, entry, load=False, config=entry["config"]) + + def get_container_yaml_url(self, module_name): + raise NotImplementedError + + def get_raw_container_yaml_url(self, module_name): + raise NotImplementedError + + +class GitHub(VersionControl): + @classmethod + def matches(cls, source): + return urllib.parse.urlparse(source).hostname == "github.com" + + @property + def library_url(self): + url = self.url + if url.endswith(".git"): + url = url[:-4] + _, _, _, owner, repo = url.split("/", 4) + return f"https://{owner}.github.io/{repo}/library.json" + + def get_container_yaml_url(self, module_name): + _, _, domain, repo_path = self.url.split("/", 3) + return f"https://github.com/{repo_path}/blob/{self.tag}/{module_name}/container.yaml" + + def get_raw_container_yaml_url(self, module_name): + _, _, domain, repo_path = self.url.split("/", 3) + return f"https://raw.githubusercontent.com/{repo_path}/{self.tag}/{module_name}/container.yaml" + + +class GitLab(VersionControl): + @classmethod + def matches(cls, source): + return urllib.parse.urlparse(source).hostname == "gitlab.com" + + @property + def library_url(self, owner, repo): + url = self.url + if url.endswith(".git"): + url = url[:-4] + _, _, _, owner, repo = url.split("/", 4) + return f"https://{owner}.gitlab.io/{repo}/library.json" + + def get_container_yaml_url(self, module_name): + _, _, domain, repo_path = self.url.split("/", 3) + return f"https://gitlab.com/{repo_path}/-/blob/{self.tag}/{module_name}/container.yaml" + + def get_raw_container_yaml_url(self, module_name): + _, _, domain, repo_path = self.url.split("/", 3) + return f"https://gitlab.com/{repo_path}/-/raw/{self.tag}/{module_name}/container.yaml" From e506ee392c014e811852eda4164834f2928b5c8c Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Sun, 6 Nov 2022 20:58:41 +0000 Subject: [PATCH 45/72] Pre-parse the URL --- shpc/main/registry/remote.py | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/shpc/main/registry/remote.py b/shpc/main/registry/remote.py index ecf10d7fb..c2acd9654 100644 --- a/shpc/main/registry/remote.py +++ b/shpc/main/registry/remote.py @@ -82,6 +82,7 @@ def __init__(self, source, tag=None, subdir=None): type(self).__name__ + "registry must be a remote path, got %s." % source ) self.url = source + self.parsed_url = urllib.parse.urlparse(source) self.tag = tag @@ -206,19 +207,17 @@ def matches(cls, source): @property def library_url(self): - url = self.url - if url.endswith(".git"): - url = url[:-4] - _, _, _, owner, repo = url.split("/", 4) + url_path = self.parsed_url.path.lstrip("/") + if url_path.endswith(".git"): + url_path = url_path[:-4] + owner, repo = url_path.split("/", 1) return f"https://{owner}.github.io/{repo}/library.json" def get_container_yaml_url(self, module_name): - _, _, domain, repo_path = self.url.split("/", 3) - return f"https://github.com/{repo_path}/blob/{self.tag}/{module_name}/container.yaml" + return f"https://github.com/{self.parsed_url.path}/blob/{self.tag}/{module_name}/container.yaml" def get_raw_container_yaml_url(self, module_name): - _, _, domain, repo_path = self.url.split("/", 3) - return f"https://raw.githubusercontent.com/{repo_path}/{self.tag}/{module_name}/container.yaml" + return f"https://raw.githubusercontent.com/{self.parsed_url.path}/{self.tag}/{module_name}/container.yaml" class GitLab(VersionControl): @@ -228,16 +227,14 @@ def matches(cls, source): @property def library_url(self, owner, repo): - url = self.url - if url.endswith(".git"): - url = url[:-4] - _, _, _, owner, repo = url.split("/", 4) + url_path = self.parsed_url.path.lstrip("/") + if url_path.endswith(".git"): + url_path = url_path[:-4] + owner, repo = url_path.split("/", 1) return f"https://{owner}.gitlab.io/{repo}/library.json" def get_container_yaml_url(self, module_name): - _, _, domain, repo_path = self.url.split("/", 3) - return f"https://gitlab.com/{repo_path}/-/blob/{self.tag}/{module_name}/container.yaml" + return f"https://gitlab.com/{self.parsed_url.path}/-/blob/{self.tag}/{module_name}/container.yaml" def get_raw_container_yaml_url(self, module_name): - _, _, domain, repo_path = self.url.split("/", 3) - return f"https://gitlab.com/{repo_path}/-/raw/{self.tag}/{module_name}/container.yaml" + return f"https://gitlab.com/{self.parsed_url.path}/-/raw/{self.tag}/{module_name}/container.yaml" From b263272836adfa9a0760757f6183aaf086bd4b19 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Sun, 6 Nov 2022 21:18:09 +0000 Subject: [PATCH 46/72] VersionControl should not be directly used --- shpc/tests/helpers.py | 4 ++-- shpc/tests/test_sync.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/shpc/tests/helpers.py b/shpc/tests/helpers.py index a0372ff34..775bbdf70 100644 --- a/shpc/tests/helpers.py +++ b/shpc/tests/helpers.py @@ -10,7 +10,7 @@ import shutil from shpc.main import get_client -from shpc.main.registry import VersionControl +from shpc.main.registry import GitHub here = os.path.dirname(os.path.abspath(__file__)) root = os.path.dirname(here) @@ -41,7 +41,7 @@ def init_client(tmpdir, module_sys, container_tech, remote=True): # If it's not remote, create a temporary filesystem registry if not remote: clone_dir = os.path.join(tmpdir, "registry") - reg = VersionControl("https://github.com/singularityhub/shpc-registry") + reg = GitHub("https://github.com/singularityhub/shpc-registry") reg.clone(clone_dir) client.settings.registry = [clone_dir] client.reload_registry() diff --git a/shpc/tests/test_sync.py b/shpc/tests/test_sync.py index 94fc4686f..bb51b379c 100644 --- a/shpc/tests/test_sync.py +++ b/shpc/tests/test_sync.py @@ -153,7 +153,7 @@ def test_registry_interaction(tmp_path, remote): client = init_client(str(tmp_path), "lmod", "singularity") reg = client.registry.get_registry(remote) - assert isinstance(reg, registry.VersionControl) + assert isinstance(reg, registry.GitHub) # This will hit the underlying logic to list/show mods = list(reg.iter_registry()) From 9c03796feffcd8cd945e6c58334f555cc9bddf12 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Sun, 6 Nov 2022 21:38:59 +0000 Subject: [PATCH 47/72] bugfix --- shpc/tests/test_sync.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/shpc/tests/test_sync.py b/shpc/tests/test_sync.py index bb51b379c..e542bd92e 100644 --- a/shpc/tests/test_sync.py +++ b/shpc/tests/test_sync.py @@ -153,7 +153,7 @@ def test_registry_interaction(tmp_path, remote): client = init_client(str(tmp_path), "lmod", "singularity") reg = client.registry.get_registry(remote) - assert isinstance(reg, registry.GitHub) + assert not isinstance(reg, registry.FileSystem) # This will hit the underlying logic to list/show mods = list(reg.iter_registry()) From b1a2ea896657b28c88fc38a9684aebc14bddb0cf Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Sun, 6 Nov 2022 21:52:38 +0000 Subject: [PATCH 48/72] Forgot to build the registry object --- shpc/main/modules/base.py | 1 + 1 file changed, 1 insertion(+) diff --git a/shpc/main/modules/base.py b/shpc/main/modules/base.py index aa0c04f22..edbeae928 100644 --- a/shpc/main/modules/base.py +++ b/shpc/main/modules/base.py @@ -240,6 +240,7 @@ def docgen(self, module_name, registry=None, out=None, branch="main"): aliases = config.get_aliases() template = self.template.load("docs.md") registry = registry or defaults.github_url + git_registry = shpc.main.registry.get_registry(registry, tag=branch) github_url = git_registry.get_container_yaml_url(module_name) raw_github_url = git_registry.get_raw_container_yaml_url(module_name) From 492bcb3202537938feb47a2886c77251f6ba639f Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Sun, 6 Nov 2022 21:52:44 +0000 Subject: [PATCH 49/72] typo --- shpc/tests/test_sync.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/shpc/tests/test_sync.py b/shpc/tests/test_sync.py index e542bd92e..c41d5ad13 100644 --- a/shpc/tests/test_sync.py +++ b/shpc/tests/test_sync.py @@ -153,7 +153,7 @@ def test_registry_interaction(tmp_path, remote): client = init_client(str(tmp_path), "lmod", "singularity") reg = client.registry.get_registry(remote) - assert not isinstance(reg, registry.FileSystem) + assert not isinstance(reg, registry.Filesystem) # This will hit the underlying logic to list/show mods = list(reg.iter_registry()) From b6fbd509b6ff2bc5e7b80b70bacfc354d8afa9d6 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Sun, 6 Nov 2022 22:07:50 +0000 Subject: [PATCH 50/72] Maybe this one will work ? --- shpc/main/modules/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/shpc/main/modules/base.py b/shpc/main/modules/base.py index edbeae928..fa6326731 100644 --- a/shpc/main/modules/base.py +++ b/shpc/main/modules/base.py @@ -240,7 +240,7 @@ def docgen(self, module_name, registry=None, out=None, branch="main"): aliases = config.get_aliases() template = self.template.load("docs.md") registry = registry or defaults.github_url - git_registry = shpc.main.registry.get_registry(registry, tag=branch) + git_registry = self.registry.get_registry(registry, tag=branch) github_url = git_registry.get_container_yaml_url(module_name) raw_github_url = git_registry.get_raw_container_yaml_url(module_name) From 8ff0c7784711564f8e5f6808ddcffb52671ecb97 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Sun, 6 Nov 2022 22:08:27 +0000 Subject: [PATCH 51/72] Forgot to change this signature --- shpc/main/registry/remote.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/shpc/main/registry/remote.py b/shpc/main/registry/remote.py index c2acd9654..355db5a25 100644 --- a/shpc/main/registry/remote.py +++ b/shpc/main/registry/remote.py @@ -226,7 +226,7 @@ def matches(cls, source): return urllib.parse.urlparse(source).hostname == "gitlab.com" @property - def library_url(self, owner, repo): + def library_url(self): url_path = self.parsed_url.path.lstrip("/") if url_path.endswith(".git"): url_path = url_path[:-4] From 153bd3937e3a581208a82a30719b0f4b42b74815 Mon Sep 17 00:00:00 2001 From: Vanessasaurus <814322+vsoch@users.noreply.github.com> Date: Mon, 7 Nov 2022 01:17:08 +0000 Subject: [PATCH 52/72] Renamed the variable for clarity --- shpc/main/modules/base.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/shpc/main/modules/base.py b/shpc/main/modules/base.py index fa6326731..69f4fd2ae 100644 --- a/shpc/main/modules/base.py +++ b/shpc/main/modules/base.py @@ -240,9 +240,9 @@ def docgen(self, module_name, registry=None, out=None, branch="main"): aliases = config.get_aliases() template = self.template.load("docs.md") registry = registry or defaults.github_url - git_registry = self.registry.get_registry(registry, tag=branch) - github_url = git_registry.get_container_yaml_url(module_name) - raw_github_url = git_registry.get_raw_container_yaml_url(module_name) + remote = self.registry.get_registry(registry, tag=branch) + github_url = remote.get_container_yaml_url(module_name) + raw_github_url = remote.get_raw_container_yaml_url(module_name) # Currently one doc is rendered for all containers result = template.render( From da752f31d47e3180944be2707c8497fee7cdc8e1 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Mon, 7 Nov 2022 01:18:27 +0000 Subject: [PATCH 53/72] typo --- shpc/main/registry/filesystem.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/shpc/main/registry/filesystem.py b/shpc/main/registry/filesystem.py index 99594c280..f8d46fb41 100644 --- a/shpc/main/registry/filesystem.py +++ b/shpc/main/registry/filesystem.py @@ -78,7 +78,7 @@ class Filesystem(Provider): def __init__(self, source): if not self.matches(source): raise ValueError( - "Filesystem registry source must exist on the filesystem. Got %" + "Filesystem registry source must exist on the filesystem. Got %s" % source ) self.source = os.path.abspath(source) From 653d467fab4117fab6d73ee52571c435a3826ba7 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Mon, 7 Nov 2022 01:19:49 +0000 Subject: [PATCH 54/72] Removing yaml because it's the only file we have for a container --- shpc/main/modules/base.py | 4 ++-- shpc/main/registry/remote.py | 14 +++++++------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/shpc/main/modules/base.py b/shpc/main/modules/base.py index 69f4fd2ae..51424e499 100644 --- a/shpc/main/modules/base.py +++ b/shpc/main/modules/base.py @@ -241,8 +241,8 @@ def docgen(self, module_name, registry=None, out=None, branch="main"): template = self.template.load("docs.md") registry = registry or defaults.github_url remote = self.registry.get_registry(registry, tag=branch) - github_url = remote.get_container_yaml_url(module_name) - raw_github_url = remote.get_raw_container_yaml_url(module_name) + github_url = remote.get_container_url(module_name) + raw_github_url = remote.get_raw_container_url(module_name) # Currently one doc is rendered for all containers result = template.render( diff --git a/shpc/main/registry/remote.py b/shpc/main/registry/remote.py index 355db5a25..3f9e9c28c 100644 --- a/shpc/main/registry/remote.py +++ b/shpc/main/registry/remote.py @@ -169,7 +169,7 @@ def _update_clone_cache(self): tmplocal = self.clone() for module in tmplocal.iter_modules(): # Minimum amount of metadata to function here - config_url = self.get_raw_container_yaml_url(module) + config_url = self.get_raw_container_url(module) self._cache[module] = { "config": shpc.utils.read_yaml( os.path.join(tmplocal.source, module, "container.yaml") @@ -193,10 +193,10 @@ def iter_registry(self, filter_string=None): # Assemble a faux config with tags so we don't hit remote yield RemoteResult(uri, entry, load=False, config=entry["config"]) - def get_container_yaml_url(self, module_name): + def get_container_url(self, module_name): raise NotImplementedError - def get_raw_container_yaml_url(self, module_name): + def get_raw_container_url(self, module_name): raise NotImplementedError @@ -213,10 +213,10 @@ def library_url(self): owner, repo = url_path.split("/", 1) return f"https://{owner}.github.io/{repo}/library.json" - def get_container_yaml_url(self, module_name): + def get_container_url(self, module_name): return f"https://github.com/{self.parsed_url.path}/blob/{self.tag}/{module_name}/container.yaml" - def get_raw_container_yaml_url(self, module_name): + def get_raw_container_url(self, module_name): return f"https://raw.githubusercontent.com/{self.parsed_url.path}/{self.tag}/{module_name}/container.yaml" @@ -233,8 +233,8 @@ def library_url(self): owner, repo = url_path.split("/", 1) return f"https://{owner}.gitlab.io/{repo}/library.json" - def get_container_yaml_url(self, module_name): + def get_container_url(self, module_name): return f"https://gitlab.com/{self.parsed_url.path}/-/blob/{self.tag}/{module_name}/container.yaml" - def get_raw_container_yaml_url(self, module_name): + def get_raw_container_url(self, module_name): return f"https://gitlab.com/{self.parsed_url.path}/-/raw/{self.tag}/{module_name}/container.yaml" From b71fb93c4ba92a22dd7666188606aa8a11d32011 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Mon, 7 Nov 2022 01:27:28 +0000 Subject: [PATCH 55/72] Defensive programming: local could still be None --- shpc/main/registry/__init__.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/shpc/main/registry/__init__.py b/shpc/main/registry/__init__.py index c7c2b4403..f4888f839 100644 --- a/shpc/main/registry/__init__.py +++ b/shpc/main/registry/__init__.py @@ -142,6 +142,8 @@ def _sync( local = self.get_registry(local) if local else self.filesystem_registry # We sync to our first registry - if not filesystem, no go + if not local: + logger.exit("No local registry to sync to") if not isinstance(local, Filesystem): logger.exit( "sync is only supported for a remote to a filesystem registry: %s" @@ -176,6 +178,8 @@ def sync_from_remote( # No local registry provided, use default if not local: local = self.filesystem_registry + if not local: + logger.exit("No local registry to sync to") need_cleanup = False if not isinstance(remote, Filesystem): From f069f4814454f6b6d5463faec06d373dbeb1124f Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Mon, 7 Nov 2022 01:57:07 +0000 Subject: [PATCH 56/72] bugfix: iter_modules needs to yield paths to container.yaml --- shpc/main/registry/__init__.py | 1 + shpc/main/registry/filesystem.py | 6 +----- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/shpc/main/registry/__init__.py b/shpc/main/registry/__init__.py index f4888f839..307d91d7b 100644 --- a/shpc/main/registry/__init__.py +++ b/shpc/main/registry/__init__.py @@ -189,6 +189,7 @@ def sync_from_remote( # These are modules to update for module in remote.iter_modules(): + module = os.path.dirname(module) if name and module != name: continue diff --git a/shpc/main/registry/filesystem.py b/shpc/main/registry/filesystem.py index f8d46fb41..8cc1a87d1 100644 --- a/shpc/main/registry/filesystem.py +++ b/shpc/main/registry/filesystem.py @@ -95,11 +95,7 @@ def iter_modules(self): yield module names """ # Find modules based on container.yaml - for filename in shpc.utils.recursive_find(self.source, "container.yaml"): - module = os.path.dirname(filename) - if not module: - continue - yield module + yield from shpc.utils.recursive_find(self.source, "container.yaml") def find(self, name): """ From ccdab16b59071839e42a0ee70839699ddf511a65 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Mon, 7 Nov 2022 02:52:31 +0000 Subject: [PATCH 57/72] Moved the cleanup call up to _sync() --- shpc/main/registry/__init__.py | 8 +++----- shpc/main/registry/provider.py | 6 ++++++ shpc/main/registry/remote.py | 16 ++++++++++++++-- 3 files changed, 23 insertions(+), 7 deletions(-) diff --git a/shpc/main/registry/__init__.py b/shpc/main/registry/__init__.py index 307d91d7b..81c0fedad 100644 --- a/shpc/main/registry/__init__.py +++ b/shpc/main/registry/__init__.py @@ -160,6 +160,9 @@ def _sync( local=local, ) + # Cleanup the remote once we've done the sync + remote.cleanup() + def sync_from_remote( self, remote, name=None, overwrite=False, dryrun=False, add_new=True, local=None ): @@ -181,10 +184,8 @@ def sync_from_remote( if not local: logger.exit("No local registry to sync to") - need_cleanup = False if not isinstance(remote, Filesystem): # Instantiate a local registry, which will have to be cleaned up - need_cleanup = True remote = remote.clone() # These are modules to update @@ -212,9 +213,6 @@ def sync_from_remote( module, from_path, os.path.join(local.source, module) ) - if need_cleanup: - remote.cleanup() - if not updates: logger.info("There were no upgrades.") diff --git a/shpc/main/registry/provider.py b/shpc/main/registry/provider.py index 20e0b879e..c2aa1dc6c 100644 --- a/shpc/main/registry/provider.py +++ b/shpc/main/registry/provider.py @@ -53,6 +53,12 @@ def exists(self, name): """ raise NotImplementedError + def cleanup(self): + """ + Cleanup the registry + """ + raise NotImplementedError + def iter_registry(self, filter_string=None): """ Iterates over the modules of this registry (that match the filte, if diff --git a/shpc/main/registry/remote.py b/shpc/main/registry/remote.py index 3f9e9c28c..523921c50 100644 --- a/shpc/main/registry/remote.py +++ b/shpc/main/registry/remote.py @@ -83,6 +83,7 @@ def __init__(self, source, tag=None, subdir=None): ) self.url = source self.parsed_url = urllib.parse.urlparse(source) + self._clone = None self.tag = tag @@ -112,6 +113,8 @@ def clone(self, tmpdir=None): Clone the known source URL to a temporary directory and return an equivalent local registry (Filesystem) """ + if self._clone: + return self._clone tmpdir = tmpdir or shpc.utils.get_tmpdir() cmd = ["git", "clone", "--depth", "1"] @@ -125,7 +128,16 @@ def clone(self, tmpdir=None): except sp.CalledProcessError as e: raise ValueError("Failed to clone repository {}:\n{}", self.url, e) assert os.path.exists(tmpdir) - return Filesystem(tmpdir) + self._clone = Filesystem(tmpdir) + return self._clone + + def cleanup(self): + """ + Cleanup the registry + """ + if self._clone: + self._clone.cleanup() + self._clone = None def iter_modules(self): """ @@ -176,7 +188,7 @@ def _update_clone_cache(self): ), "config_url": config_url, } - tmplocal.cleanup() + self.cleanup() def iter_registry(self, filter_string=None): """ From c5b4cb99464784827477a927f1cd168d2af9b900 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Mon, 7 Nov 2022 03:10:05 +0000 Subject: [PATCH 58/72] bugfix: iter_modules now returns path to the container.yaml --- shpc/main/registry/remote.py | 2 +- shpc/tests/test_sync.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/shpc/main/registry/remote.py b/shpc/main/registry/remote.py index 523921c50..d471e5dc7 100644 --- a/shpc/main/registry/remote.py +++ b/shpc/main/registry/remote.py @@ -184,7 +184,7 @@ def _update_clone_cache(self): config_url = self.get_raw_container_url(module) self._cache[module] = { "config": shpc.utils.read_yaml( - os.path.join(tmplocal.source, module, "container.yaml") + os.path.join(tmplocal.source, module) ), "config_url": config_url, } diff --git a/shpc/tests/test_sync.py b/shpc/tests/test_sync.py index c41d5ad13..f70e1ea40 100644 --- a/shpc/tests/test_sync.py +++ b/shpc/tests/test_sync.py @@ -50,7 +50,7 @@ def test_filesystem_upgrade(tmp_path): mods = list(test_registry.iter_modules()) assert len(mods) == 1 module = mods[0] - assert module == "dinosaur/salad" + assert module == "dinosaur/salad/container.yaml" # Upgrade the current registry from the "remote" (test registry) assert not client.registry.exists(module) From 9402e11d2c478ad0eda5afb4e5ff28008d7f9987 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Mon, 7 Nov 2022 03:27:09 +0000 Subject: [PATCH 59/72] Revert "bugfix: iter_modules needs to yield paths to container.yaml" This reverts commit f069f4814454f6b6d5463faec06d373dbeb1124f. --- shpc/main/registry/__init__.py | 1 - shpc/main/registry/filesystem.py | 6 +++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/shpc/main/registry/__init__.py b/shpc/main/registry/__init__.py index 81c0fedad..cb2fe9968 100644 --- a/shpc/main/registry/__init__.py +++ b/shpc/main/registry/__init__.py @@ -190,7 +190,6 @@ def sync_from_remote( # These are modules to update for module in remote.iter_modules(): - module = os.path.dirname(module) if name and module != name: continue diff --git a/shpc/main/registry/filesystem.py b/shpc/main/registry/filesystem.py index 8cc1a87d1..f8d46fb41 100644 --- a/shpc/main/registry/filesystem.py +++ b/shpc/main/registry/filesystem.py @@ -95,7 +95,11 @@ def iter_modules(self): yield module names """ # Find modules based on container.yaml - yield from shpc.utils.recursive_find(self.source, "container.yaml") + for filename in shpc.utils.recursive_find(self.source, "container.yaml"): + module = os.path.dirname(filename) + if not module: + continue + yield module def find(self, name): """ From 687c076a85bed9215f6805bb632f62a5271bb720 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Mon, 7 Nov 2022 03:27:19 +0000 Subject: [PATCH 60/72] Revert "bugfix: iter_modules now returns path to the container.yaml" This reverts commit c5b4cb99464784827477a927f1cd168d2af9b900. --- shpc/main/registry/remote.py | 2 +- shpc/tests/test_sync.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/shpc/main/registry/remote.py b/shpc/main/registry/remote.py index d471e5dc7..523921c50 100644 --- a/shpc/main/registry/remote.py +++ b/shpc/main/registry/remote.py @@ -184,7 +184,7 @@ def _update_clone_cache(self): config_url = self.get_raw_container_url(module) self._cache[module] = { "config": shpc.utils.read_yaml( - os.path.join(tmplocal.source, module) + os.path.join(tmplocal.source, module, "container.yaml") ), "config_url": config_url, } diff --git a/shpc/tests/test_sync.py b/shpc/tests/test_sync.py index f70e1ea40..c41d5ad13 100644 --- a/shpc/tests/test_sync.py +++ b/shpc/tests/test_sync.py @@ -50,7 +50,7 @@ def test_filesystem_upgrade(tmp_path): mods = list(test_registry.iter_modules()) assert len(mods) == 1 module = mods[0] - assert module == "dinosaur/salad/container.yaml" + assert module == "dinosaur/salad" # Upgrade the current registry from the "remote" (test registry) assert not client.registry.exists(module) From e744af249b0c2a1492deb0697a0ac55790c310f9 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Mon, 7 Nov 2022 17:30:19 +0000 Subject: [PATCH 61/72] The temp directory may have been deleted in the meantime Co-authored-by: Vanessasaurus <814322+vsoch@users.noreply.github.com> --- shpc/main/registry/remote.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/shpc/main/registry/remote.py b/shpc/main/registry/remote.py index 523921c50..7a97593d0 100644 --- a/shpc/main/registry/remote.py +++ b/shpc/main/registry/remote.py @@ -135,7 +135,7 @@ def cleanup(self): """ Cleanup the registry """ - if self._clone: + if self._clone and os.path.exists(self._clone): self._clone.cleanup() self._clone = None From e37a252540d39123207495170ff02794cc769ba8 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Mon, 7 Nov 2022 18:14:26 +0000 Subject: [PATCH 62/72] Need to check here too that the clone still exists --- shpc/main/registry/remote.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/shpc/main/registry/remote.py b/shpc/main/registry/remote.py index 7a97593d0..aecf75067 100644 --- a/shpc/main/registry/remote.py +++ b/shpc/main/registry/remote.py @@ -113,7 +113,7 @@ def clone(self, tmpdir=None): Clone the known source URL to a temporary directory and return an equivalent local registry (Filesystem) """ - if self._clone: + if self._clone and os.path.exists(self._clone): return self._clone tmpdir = tmpdir or shpc.utils.get_tmpdir() From ca5b632e90e2ff0732090b9d4ae0ab921eda7339 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Mon, 7 Nov 2022 18:14:56 +0000 Subject: [PATCH 63/72] Also need to reset self._clone if the directory is gone --- shpc/main/registry/remote.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/shpc/main/registry/remote.py b/shpc/main/registry/remote.py index aecf75067..42bc07cfe 100644 --- a/shpc/main/registry/remote.py +++ b/shpc/main/registry/remote.py @@ -137,7 +137,7 @@ def cleanup(self): """ if self._clone and os.path.exists(self._clone): self._clone.cleanup() - self._clone = None + self._clone = None def iter_modules(self): """ From f1c5445c51113dc4852613f2159f9de650f0f606 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Mon, 7 Nov 2022 18:18:12 +0000 Subject: [PATCH 64/72] More checks on local and remote --- shpc/main/registry/__init__.py | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/shpc/main/registry/__init__.py b/shpc/main/registry/__init__.py index cb2fe9968..2344097dd 100644 --- a/shpc/main/registry/__init__.py +++ b/shpc/main/registry/__init__.py @@ -139,16 +139,6 @@ def _sync( remote = self.get_registry( sync_registry or self.settings.sync_registry, tag=tag ) - local = self.get_registry(local) if local else self.filesystem_registry - - # We sync to our first registry - if not filesystem, no go - if not local: - logger.exit("No local registry to sync to") - if not isinstance(local, Filesystem): - logger.exit( - "sync is only supported for a remote to a filesystem registry: %s" - % local.url - ) # Upgrade the current registry from the remote self.sync_from_remote( @@ -172,23 +162,34 @@ def sync_from_remote( If the registry module is not installed, we install to the first filesystem registry found in the list. """ - updates = False + ## First get a valid local Registry # A local (string) path provided - if local and isinstance(local, str) and os.path.exists(local): + if local and isinstance(local, str): + if not os.path.exists(local): + logger.exit("The path %s doesn't exist." % local) local = Filesystem(local) # No local registry provided, use default if not local: local = self.filesystem_registry + # We sync to our first registry - if not filesystem, no go if not local: - logger.exit("No local registry to sync to") + logger.exit("No local registry to sync to. Check the shpc settings.") + + if not isinstance(local, Filesystem): + logger.exit("Can only synchronise to a local file system, not to %s." % local) + + ## Then a valid remote Registry + if not remote: + logger.exit("No remote provided. Cannot sync.") if not isinstance(remote, Filesystem): # Instantiate a local registry, which will have to be cleaned up remote = remote.clone() # These are modules to update + updates = False for module in remote.iter_modules(): if name and module != name: continue From 7bb857b37cbcfdfb157f52c5f2e434c4ce76d410 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Mon, 7 Nov 2022 18:18:18 +0000 Subject: [PATCH 65/72] It makes more sense to cleanup tmplocal than self, and it works because self expects the cleanup may happen --- shpc/main/registry/remote.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/shpc/main/registry/remote.py b/shpc/main/registry/remote.py index 42bc07cfe..178dfdb3d 100644 --- a/shpc/main/registry/remote.py +++ b/shpc/main/registry/remote.py @@ -188,7 +188,7 @@ def _update_clone_cache(self): ), "config_url": config_url, } - self.cleanup() + tmplocal.cleanup() def iter_registry(self, filter_string=None): """ From 043532b4db9d6944414258f80276f9047f3058e5 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Mon, 7 Nov 2022 18:30:38 +0000 Subject: [PATCH 66/72] Moved this to the parent class --- shpc/main/registry/remote.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/shpc/main/registry/remote.py b/shpc/main/registry/remote.py index 178dfdb3d..62ef1f509 100644 --- a/shpc/main/registry/remote.py +++ b/shpc/main/registry/remote.py @@ -93,6 +93,13 @@ def __init__(self, source, tag=None, subdir=None): # E.g., subdirectory with registry files self.subdir = subdir + def parse_github_gitlab_repo(self): + url_path = self.parsed_url.path.lstrip("/") + if url_path.endswith(".git"): + url_path = url_path[:-4] + # tuple(owner, repo) + return url_path.split("/", 1) + @property def library_url(self): """ @@ -219,10 +226,7 @@ def matches(cls, source): @property def library_url(self): - url_path = self.parsed_url.path.lstrip("/") - if url_path.endswith(".git"): - url_path = url_path[:-4] - owner, repo = url_path.split("/", 1) + owner, repo = self.parse_github_gitlab_repo() return f"https://{owner}.github.io/{repo}/library.json" def get_container_url(self, module_name): @@ -239,10 +243,7 @@ def matches(cls, source): @property def library_url(self): - url_path = self.parsed_url.path.lstrip("/") - if url_path.endswith(".git"): - url_path = url_path[:-4] - owner, repo = url_path.split("/", 1) + owner, repo = self.parse_github_gitlab_repo() return f"https://{owner}.gitlab.io/{repo}/library.json" def get_container_url(self, module_name): From 1334b64200bfb9173cfba694249e9a82f11fca5c Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Mon, 7 Nov 2022 18:37:34 +0000 Subject: [PATCH 67/72] Another implementation that doesn't make it too obvious the base-class knows about GitHub and GitLab --- shpc/main/registry/remote.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/shpc/main/registry/remote.py b/shpc/main/registry/remote.py index 62ef1f509..8bcf59f50 100644 --- a/shpc/main/registry/remote.py +++ b/shpc/main/registry/remote.py @@ -82,6 +82,9 @@ def __init__(self, source, tag=None, subdir=None): type(self).__name__ + "registry must be a remote path, got %s." % source ) self.url = source + # We don't want ".git" hanging around + if source.endswith(".git"): + source = source[:-4] self.parsed_url = urllib.parse.urlparse(source) self._clone = None @@ -93,13 +96,6 @@ def __init__(self, source, tag=None, subdir=None): # E.g., subdirectory with registry files self.subdir = subdir - def parse_github_gitlab_repo(self): - url_path = self.parsed_url.path.lstrip("/") - if url_path.endswith(".git"): - url_path = url_path[:-4] - # tuple(owner, repo) - return url_path.split("/", 1) - @property def library_url(self): """ @@ -226,7 +222,7 @@ def matches(cls, source): @property def library_url(self): - owner, repo = self.parse_github_gitlab_repo() + owner, repo = self.parsed_url.path.lstrip("/").split("/", 1) return f"https://{owner}.github.io/{repo}/library.json" def get_container_url(self, module_name): @@ -243,7 +239,7 @@ def matches(cls, source): @property def library_url(self): - owner, repo = self.parse_github_gitlab_repo() + owner, repo = self.parsed_url.path.lstrip("/").split("/", 1) return f"https://{owner}.gitlab.io/{repo}/library.json" def get_container_url(self, module_name): From e925aa187b8bf03cb6387f5de39503cd75bd3926 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Mon, 7 Nov 2022 22:40:21 +0000 Subject: [PATCH 68/72] Silly typo: self._clone is a Filesystem object, not a string --- shpc/main/registry/remote.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/shpc/main/registry/remote.py b/shpc/main/registry/remote.py index 8bcf59f50..f227c4463 100644 --- a/shpc/main/registry/remote.py +++ b/shpc/main/registry/remote.py @@ -111,12 +111,15 @@ def exists(self, name): self._update_cache() return name in self._cache + def has_clone(self): + return self._clone and os.path.exists(self._clone.source): + def clone(self, tmpdir=None): """ Clone the known source URL to a temporary directory and return an equivalent local registry (Filesystem) """ - if self._clone and os.path.exists(self._clone): + if self.has_clone(): return self._clone tmpdir = tmpdir or shpc.utils.get_tmpdir() @@ -138,7 +141,7 @@ def cleanup(self): """ Cleanup the registry """ - if self._clone and os.path.exists(self._clone): + if self.has_clone(): self._clone.cleanup() self._clone = None From 7ad77299ba313f3e6c744bac0097e283dabf8be8 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Mon, 7 Nov 2022 22:47:58 +0000 Subject: [PATCH 69/72] No colon --- shpc/main/registry/remote.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/shpc/main/registry/remote.py b/shpc/main/registry/remote.py index f227c4463..7416c3cfc 100644 --- a/shpc/main/registry/remote.py +++ b/shpc/main/registry/remote.py @@ -112,7 +112,7 @@ def exists(self, name): return name in self._cache def has_clone(self): - return self._clone and os.path.exists(self._clone.source): + return self._clone and os.path.exists(self._clone.source) def clone(self, tmpdir=None): """ From 455f10d4de9a6a1a012356be0ec6e4fc1201bc02 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Mon, 7 Nov 2022 23:43:54 +0000 Subject: [PATCH 70/72] You shall use American spelling Co-authored-by: Vanessasaurus <814322+vsoch@users.noreply.github.com> --- shpc/main/registry/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/shpc/main/registry/__init__.py b/shpc/main/registry/__init__.py index 2344097dd..d7356f529 100644 --- a/shpc/main/registry/__init__.py +++ b/shpc/main/registry/__init__.py @@ -178,7 +178,7 @@ def sync_from_remote( logger.exit("No local registry to sync to. Check the shpc settings.") if not isinstance(local, Filesystem): - logger.exit("Can only synchronise to a local file system, not to %s." % local) + logger.exit("Can only synchronize to a local file system, not to %s." % local) ## Then a valid remote Registry if not remote: From 89975c160ff265f909fb87a022c981198cdb3dbb Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Tue, 8 Nov 2022 00:05:38 +0000 Subject: [PATCH 71/72] Added a test to showcase ssh --- shpc/tests/test_sync.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/shpc/tests/test_sync.py b/shpc/tests/test_sync.py index c41d5ad13..fa95b91f4 100644 --- a/shpc/tests/test_sync.py +++ b/shpc/tests/test_sync.py @@ -73,6 +73,8 @@ def test_sync_from_file(tmp_path): % tmp_path: "https://github.com/singularityhub/shpc-registry", "%s/gitlab-shpc" % tmp_path: "https://gitlab.com/singularityhub/shpc-registry", + "%s/tmp/github-shpc-ssh" + % tmp_path: "ssh://git@github.com/singularityhub/shpc-registry.git", } } registry_config = os.path.join(tmp_path, "registries.yaml") @@ -106,6 +108,7 @@ def test_sync_from_file(tmp_path): [ "https://github.com/singularityhub/shpc-registry", "https://gitlab.com/singularityhub/shpc-registry", + "ssh://git@github.com/singularityhub/shpc-registry.git", # This registry does not expose a web UI "https://github.com/researchapps/shpc-test-registry", ], @@ -142,6 +145,7 @@ def test_remote_upgrade(tmp_path, remote): [ "https://github.com/singularityhub/shpc-registry", "https://gitlab.com/singularityhub/shpc-registry", + "ssh://git@github.com/singularityhub/shpc-registry.git", # This registry does not expose a web UI "https://github.com/researchapps/shpc-test-registry", ], From 6eaadfbac9a431cfbf402f260169843f0d431cf2 Mon Sep 17 00:00:00 2001 From: Matthieu Muffato Date: Tue, 8 Nov 2022 00:10:57 +0000 Subject: [PATCH 72/72] black --- shpc/main/registry/__init__.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/shpc/main/registry/__init__.py b/shpc/main/registry/__init__.py index d7356f529..85270c780 100644 --- a/shpc/main/registry/__init__.py +++ b/shpc/main/registry/__init__.py @@ -150,7 +150,7 @@ def _sync( local=local, ) - # Cleanup the remote once we've done the sync + #  Cleanup the remote once we've done the sync remote.cleanup() def sync_from_remote( @@ -178,7 +178,9 @@ def sync_from_remote( logger.exit("No local registry to sync to. Check the shpc settings.") if not isinstance(local, Filesystem): - logger.exit("Can only synchronize to a local file system, not to %s." % local) + logger.exit( + "Can only synchronize to a local file system, not to %s." % local + ) ## Then a valid remote Registry if not remote: