diff --git a/CHANGELOG.md b/CHANGELOG.md index fb3d078f0..9ad14a5c7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ and **Merged pull requests**. Critical items to know are: The versions coincide with releases on pip. Only major versions will be released as tags on Github. ## [0.0.x](https://github.com/singularityhub/singularity-hpc/tree/main) (0.0.x) + - Support for remotes that do not expose library.json (0.0.12) - Update add to return container yaml (0.1.11) - Fixing bug with writing package file in update (0.1.1) - Add support for remote registry and sync commands --all (0.1.0) diff --git a/shpc/client/sync.py b/shpc/client/sync.py index 6037a9a97..98cee4a8e 100644 --- a/shpc/client/sync.py +++ b/shpc/client/sync.py @@ -2,9 +2,10 @@ __copyright__ = "Copyright 2021-2022, Vanessa Sochat" __license__ = "MPL 2.0" +import os + import shpc.logger as logger import shpc.utils -import os def sync_registry(args, parser, extra, subparser): diff --git a/shpc/main/client.py b/shpc/main/client.py index 088424c72..8537a61b7 100644 --- a/shpc/main/client.py +++ b/shpc/main/client.py @@ -124,12 +124,16 @@ def update(self, name=None, dryrun=False, filters=None): """ # No name provided == "update all" if name: - modules = [name] + # find the module in the registries. _load_container + # calls `container.ContainerConfig(result)` like below + configs = [self._load_container(name)] else: - modules = [x[1] for x in list(self.registry.iter_modules())] - - for module_name in modules: - config = self._load_container(module_name) + # directly iterate over the content of the registry + configs = [] + for result in self.registry.iter_registry(): + configs.append(container.ContainerConfig(result)) + # do the update + for config in configs: config.update(dryrun=dryrun, filters=filters) def test( diff --git a/shpc/main/modules/base.py b/shpc/main/modules/base.py index fc5f921c3..51424e499 100644 --- a/shpc/main/modules/base.py +++ b/shpc/main/modules/base.py @@ -15,7 +15,7 @@ import shpc.main.modules.template as templatectl import shpc.main.modules.versions as versionfile import shpc.main.modules.views as views -import shpc.main.registry as registry +import shpc.main.registry import shpc.utils as utils from shpc.logger import logger from shpc.main.client import Client as BaseClient @@ -172,7 +172,12 @@ def add(self, image, module_name=None, **kwargs): """ Add a container to the registry to enable install. """ - self.settings.ensure_filesystem_registry() + local_registry = self.registry.filesystem_registry + + if not local_registry: + logger.exit( + "This command is only supported for a filesystem registry! Add one or use --registry." + ) # Docker module name is always the same namespace as the image if image.startswith("docker"): @@ -185,7 +190,7 @@ def add(self, image, module_name=None, **kwargs): # Assume adding to default registry dest = os.path.join( - self.settings.filesystem_registry, + local_registry.source, module_name.split(":")[0], "container.yaml", ) @@ -198,7 +203,7 @@ def add(self, image, module_name=None, **kwargs): # Load config (but don't validate yet!) config = container.ContainerConfig( - registry.FilesystemResult(module_name, template), validate=False + shpc.main.registry.FilesystemResult(module_name, template), validate=False ) return self.container.add( module_name, image, config, container_yaml=dest, **kwargs @@ -235,18 +240,9 @@ def docgen(self, module_name, registry=None, out=None, branch="main"): aliases = config.get_aliases() template = self.template.load("docs.md") registry = registry or defaults.github_url - github_url = "%s/blob/%s/%s/container.yaml" % (registry, branch, module_name) - registry_bare = registry.split(".com")[-1] - raw = ( - "https://gitlab.com/%s/-/raw/%s/%s/container.yaml" - if "gitlab" in registry - else "https://raw.githubusercontent.com/%s/%s/%s/container.yaml" - ) - raw_github_url = raw % ( - registry_bare, - branch, - module_name, - ) + remote = self.registry.get_registry(registry, tag=branch) + github_url = remote.get_container_url(module_name) + raw_github_url = remote.get_raw_container_url(module_name) # Currently one doc is rendered for all containers result = template.render( @@ -314,10 +310,9 @@ def _get_module_lookup(self, base, filename, pattern=None): A shared function to get a lookup of installed modules or registry entries """ modules = {} - for fullpath in utils.recursive_find(base, pattern): - if fullpath.endswith(filename): - module_name, version = os.path.dirname(fullpath).rsplit(os.sep, 1) - module_name = module_name.replace(base, "").strip(os.sep) + for relpath in utils.recursive_find(base, pattern): + if relpath.endswith(filename): + module_name, version = os.path.dirname(relpath).rsplit(os.sep, 1) if module_name not in modules: modules[module_name] = set() modules[module_name].add(version) diff --git a/shpc/main/registry/__init__.py b/shpc/main/registry/__init__.py index af4d2a983..85270c780 100644 --- a/shpc/main/registry/__init__.py +++ b/shpc/main/registry/__init__.py @@ -23,13 +23,12 @@ def update_container_module(module, from_path, existing_path): """ if not os.path.exists(existing_path): shpc.utils.mkdir_p(existing_path) - for filename in shpc.utils.recursive_find(from_path): - relative_path = filename.replace(from_path, "").strip("/") + for relative_path in shpc.utils.recursive_find(from_path): to_path = os.path.join(existing_path, relative_path) if os.path.exists(to_path): shutil.rmtree(to_path) shpc.utils.mkdir_p(os.path.dirname(to_path)) - shutil.copy2(filename, to_path) + shutil.copy2(os.path.join(from_path, relative_path), to_path) class Registry: @@ -44,21 +43,29 @@ def __init__(self, settings=None): # and they must exist. self.registries = [self.get_registry(r) for r in self.settings.registry] + @property + def filesystem_registry(self): + """ + Return the first found filesystem registry. + """ + for registry in self.registries: + if isinstance(registry, Filesystem): + return registry + def exists(self, name): """ - Determine if a module name *exists* in any local registry, return path + Determine if a module name *exists* in any registry, return the first one """ for reg in self.registries: if reg.exists(name): - return os.path.join(reg.source, name) + return reg def iter_registry(self, filter_string=None): """ Iterate over all known registries defined in settings. """ for reg in self.registries: - for entry in reg.iter_registry(filter_string=filter_string): - yield entry + yield from reg.iter_registry(filter_string=filter_string) def find(self, name, path=None): """ @@ -80,11 +87,11 @@ def iter_modules(self): """ Iterate over modules found across the registry """ - for reg in self.registries: - for registry, module in reg.iter_modules(): + for registry in self.registries: + for module in registry.iter_modules(): yield registry, module - def get_registry(self, source): + def get_registry(self, source, **kwargs): """ A registry is a local or remote registry. @@ -92,7 +99,7 @@ def get_registry(self, source): """ for Registry in PROVIDERS: if Registry.matches(source): - return Registry(source) + return Registry(source, **kwargs) raise ValueError("No matching registry provider for %s" % source) def sync( @@ -128,20 +135,10 @@ def _sync( local=None, sync_registry=None, ): - # Registry to sync from - sync_registry = sync_registry or self.settings.sync_registry - # Create a remote registry with settings preference - Remote = GitHub if "github.com" in sync_registry else GitLab - remote = Remote(sync_registry, tag=tag) - local = self.get_registry(local or self.settings.filesystem_registry) - - # We sync to our first registry - if not filesystem, no go - if not local.is_filesystem_registry: - logger.exit( - "sync is only supported for a remote to a filesystem registry: %s" - % sync_registry.source - ) + remote = self.get_registry( + sync_registry or self.settings.sync_registry, tag=tag + ) # Upgrade the current registry from the remote self.sync_from_remote( @@ -152,6 +149,8 @@ def _sync( add_new=add_new, local=local, ) + + #  Cleanup the remote once we've done the sync remote.cleanup() def sync_from_remote( @@ -163,26 +162,41 @@ def sync_from_remote( If the registry module is not installed, we install to the first filesystem registry found in the list. """ - updates = False + ## First get a valid local Registry # A local (string) path provided - if local and isinstance(local, str) and os.path.exists(local): + if local and isinstance(local, str): + if not os.path.exists(local): + logger.exit("The path %s doesn't exist." % local) local = Filesystem(local) # No local registry provided, use default if not local: - local = Filesystem(self.settings.filesystem_registry) + local = self.filesystem_registry + # We sync to our first registry - if not filesystem, no go + if not local: + logger.exit("No local registry to sync to. Check the shpc settings.") + + if not isinstance(local, Filesystem): + logger.exit( + "Can only synchronize to a local file system, not to %s." % local + ) - tmpdir = remote.source - if tmpdir.startswith("http") or not os.path.exists(tmpdir): - tmpdir = remote.clone() + ## Then a valid remote Registry + if not remote: + logger.exit("No remote provided. Cannot sync.") + + if not isinstance(remote, Filesystem): + # Instantiate a local registry, which will have to be cleaned up + remote = remote.clone() # These are modules to update - for regpath, module in remote.iter_modules(): + updates = False + for module in remote.iter_modules(): if name and module != name: continue - from_path = os.path.join(regpath, module) + from_path = os.path.join(remote.source, module) existing_path = local.exists(module) # If we have an existing module and we want to replace all files diff --git a/shpc/main/registry/filesystem.py b/shpc/main/registry/filesystem.py index 5c7b6f6e3..f8d46fb41 100644 --- a/shpc/main/registry/filesystem.py +++ b/shpc/main/registry/filesystem.py @@ -75,20 +75,31 @@ def override_exists(self, tag): class Filesystem(Provider): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.source = os.path.abspath(self.source) + def __init__(self, source): + if not self.matches(source): + raise ValueError( + "Filesystem registry source must exist on the filesystem. Got %s" + % source + ) + self.source = os.path.abspath(source) @classmethod def matches(cls, source): return os.path.exists(source) or source == "." + def exists(self, name): + return os.path.exists(os.path.join(self.source, name)) + def iter_modules(self): + """ + yield module names + """ + # Find modules based on container.yaml for filename in shpc.utils.recursive_find(self.source, "container.yaml"): - module = os.path.dirname(filename).replace(self.source, "").strip(os.sep) + module = os.path.dirname(filename) if not module: continue - yield self.source, module + yield module def find(self, name): """ @@ -110,14 +121,9 @@ def iter_registry(self, filter_string=None): """ Iterate over content in filesystem registry. """ - for filename in shpc.utils.recursive_find(self.source): - if not filename.endswith("container.yaml"): - continue - module_name = ( - os.path.dirname(filename).replace(self.source, "").strip(os.sep) - ) - + for module_name in self.iter_modules(): # If the user has provided a filter, honor it if filter_string and not re.search(filter_string, module_name): continue + filename = os.path.join(self.source, module_name) yield FilesystemResult(module_name, filename) diff --git a/shpc/main/registry/provider.py b/shpc/main/registry/provider.py index c396c99bb..c2aa1dc6c 100644 --- a/shpc/main/registry/provider.py +++ b/shpc/main/registry/provider.py @@ -5,6 +5,8 @@ import os +import shpc.utils + class Result: @property @@ -32,36 +34,40 @@ class Provider: A general provider should retrieve and provide registry files. """ - def __init__(self, source, *args, **kwargs): - if not (source.startswith("https://") or os.path.exists(source)): - raise ValueError( - "Registry source must exist on the filesystem or be given as https://." - ) - self.source = source - - def exists(self, name): - return os.path.exists(os.path.join(self.source, name)) - - @property - def is_filesystem_registry(self): - return not self.source.startswith("http") and os.path.exists(self.source) - - @property - def name(self): - return self.__class__.__name__.lower() - @classmethod - def matches(cls, source_url: str): - pass + def matches(cls, source): + """ + Returns true if this class understands the source + """ + raise NotImplementedError def find(self, name): - pass + """ + Returns a Result object if the module can be found in the registry + """ + raise NotImplementedError + + def exists(self, name): + """ + Returns true if the module can be found in the registry + """ + raise NotImplementedError def cleanup(self): - pass + """ + Cleanup the registry + """ + raise NotImplementedError - def iter_registry(self): - pass + def iter_registry(self, filter_string=None): + """ + Iterates over the modules of this registry (that match the filte, if + provided) as Result instances + """ + raise NotImplementedError def iter_modules(self): - pass + """ + Iterates over the module names of this registry + """ + raise NotImplementedError diff --git a/shpc/main/registry/remote.py b/shpc/main/registry/remote.py index c06142df7..7416c3cfc 100644 --- a/shpc/main/registry/remote.py +++ b/shpc/main/registry/remote.py @@ -5,8 +5,9 @@ import os import re +import shutil import subprocess as sp -import sys +import urllib import requests @@ -14,6 +15,7 @@ from shpc.logger import logger from .provider import Provider, Result +from .filesystem import Filesystem class RemoteResult(Result): @@ -74,85 +76,81 @@ def override_exists(self, tag): class VersionControl(Provider): - def __init__(self, *args, **kwargs): - self.tag = kwargs.get("tag") + def __init__(self, source, tag=None, subdir=None): + if not self.matches(source): + raise ValueError( + type(self).__name__ + "registry must be a remote path, got %s." % source + ) + self.url = source + # We don't want ".git" hanging around + if source.endswith(".git"): + source = source[:-4] + self.parsed_url = urllib.parse.urlparse(source) + self._clone = None + + self.tag = tag # Cache of remote container metadata self._cache = {} # E.g., subdirectory with registry files - self.subdir = kwargs.get("subdir") - super().__init__(*args, **kwargs) - self._url = self.source - - @classmethod - def matches(cls, source): - return cls.provider_name in source and source.startswith("http") + self.subdir = subdir @property - def source_url(self): + def library_url(self): """ - Retrieve a parsed / formatted url, ensuring https and without git. + Retrieve the URL of this registry's library (in JSON). """ - url = self.source - if not url.startswith("http"): - url = "https://%s" % url - if url.endswith(".git"): - url = url[:-4] - return url - - @property - def web_url(self): - """ - Retrieve the web url, either pages or (eventually) custom. - """ - parts = self.source_url.split("/")[3:] - return "https://%s.%s.io/%s/library.json" % ( - parts[0], - self.provider_name, - "/".join(parts[1:]), - ) + raise NotImplementedError def exists(self, name): """ Determine if a module exists in the registry. """ - dirname = self.source - if self.subdir: - dirname = os.path.join(dirname, self.subdir) - return os.path.exists(os.path.join(dirname, name)) + name = name.split(":")[0] + self._update_cache() + return name in self._cache + + def has_clone(self): + return self._clone and os.path.exists(self._clone.source) def clone(self, tmpdir=None): """ Clone the known source URL to a temporary directory + and return an equivalent local registry (Filesystem) """ + if self.has_clone(): + return self._clone tmpdir = tmpdir or shpc.utils.get_tmpdir() cmd = ["git", "clone", "--depth", "1"] if self.tag: cmd += ["-b", self.tag] - cmd += [self._url, tmpdir] - self.source = tmpdir + cmd += [self.url, tmpdir] + if self.subdir: + tmpdir = os.path.join(tmpdir, self.subdir) try: sp.run(cmd, check=True) except sp.CalledProcessError as e: - raise ValueError("Failed to clone repository {}:\n{}", self.source, e) - return tmpdir + raise ValueError("Failed to clone repository {}:\n{}", self.url, e) + assert os.path.exists(tmpdir) + self._clone = Filesystem(tmpdir) + return self._clone + + def cleanup(self): + """ + Cleanup the registry + """ + if self.has_clone(): + self._clone.cleanup() + self._clone = None def iter_modules(self): """ yield module names """ - dirname = self.source - if self.subdir: - dirname = os.path.join(dirname, self.subdir) - - # Find modules based on container.yaml - for filename in shpc.utils.recursive_find(dirname, "container.yaml"): - module = os.path.dirname(filename).replace(dirname, "").strip(os.sep) - if not module: - continue - yield dirname, module + self._update_cache() + yield from self._cache.keys() def find(self, name): """ @@ -169,15 +167,35 @@ def _update_cache(self, force=False): if self._cache and not force: return + library_url = self.library_url + if not library_url: + return self._update_clone_cache() # Check for exposed library API on GitHub or GitLab pages - response = requests.get(self.web_url) + response = requests.get(library_url) if response.status_code != 200: - sys.exit( - "Remote %s is not deploying a Registry API (%s). Open a GitHub issue to ask for help." - % (self.source, self.web_url) - ) + return self._update_clone_cache() self._cache = response.json() + def _update_clone_cache(self): + """ + Given a remote that does not expose a library.json, handle via clone. + """ + logger.warning( + "Remote %s is not deploying a Registry API, falling back to clone." + % self.url + ) + tmplocal = self.clone() + for module in tmplocal.iter_modules(): + # Minimum amount of metadata to function here + config_url = self.get_raw_container_url(module) + self._cache[module] = { + "config": shpc.utils.read_yaml( + os.path.join(tmplocal.source, module, "container.yaml") + ), + "config_url": config_url, + } + tmplocal.cleanup() + def iter_registry(self, filter_string=None): """ Yield metadata about containers in a remote registry. @@ -193,10 +211,42 @@ def iter_registry(self, filter_string=None): # Assemble a faux config with tags so we don't hit remote yield RemoteResult(uri, entry, load=False, config=entry["config"]) + def get_container_url(self, module_name): + raise NotImplementedError + + def get_raw_container_url(self, module_name): + raise NotImplementedError + class GitHub(VersionControl): - provider_name = "github" + @classmethod + def matches(cls, source): + return urllib.parse.urlparse(source).hostname == "github.com" + + @property + def library_url(self): + owner, repo = self.parsed_url.path.lstrip("/").split("/", 1) + return f"https://{owner}.github.io/{repo}/library.json" + + def get_container_url(self, module_name): + return f"https://github.com/{self.parsed_url.path}/blob/{self.tag}/{module_name}/container.yaml" + + def get_raw_container_url(self, module_name): + return f"https://raw.githubusercontent.com/{self.parsed_url.path}/{self.tag}/{module_name}/container.yaml" class GitLab(VersionControl): - provider_name = "gitlab" + @classmethod + def matches(cls, source): + return urllib.parse.urlparse(source).hostname == "gitlab.com" + + @property + def library_url(self): + owner, repo = self.parsed_url.path.lstrip("/").split("/", 1) + return f"https://{owner}.gitlab.io/{repo}/library.json" + + def get_container_url(self, module_name): + return f"https://gitlab.com/{self.parsed_url.path}/-/blob/{self.tag}/{module_name}/container.yaml" + + def get_raw_container_url(self, module_name): + return f"https://gitlab.com/{self.parsed_url.path}/-/raw/{self.tag}/{module_name}/container.yaml" diff --git a/shpc/main/settings.py b/shpc/main/settings.py index b7d6ee401..892024866 100644 --- a/shpc/main/settings.py +++ b/shpc/main/settings.py @@ -241,32 +241,6 @@ def change_validate(self, key, value): "%s:%s cannot be added to config: %s" % (key, value, error.message) ) - @property - def filesystem_registry(self): - """ - Return the first found filesystem registry - """ - for path in self.registry: - if path.startswith("http") or not os.path.exists(path): - continue - return path - - def ensure_filesystem_registry(self): - """ - Ensure that the settings has a filesystem registry. - """ - found = False - for path in self.registry: - if path.startswith("http") or not os.path.exists(path): - continue - found = True - - # Cut out early if registry isn't on the filesystem - if not found: - logger.exit( - "This command is only supported for a filesystem registry! Add one or use --registry." - ) - def _substitutions(self, value): """ Given a value, make substitutions diff --git a/shpc/tests/test_sync.py b/shpc/tests/test_sync.py index 1de5b74ad..fa95b91f4 100644 --- a/shpc/tests/test_sync.py +++ b/shpc/tests/test_sync.py @@ -33,12 +33,12 @@ def test_filesystem_upgrade(tmp_path): os.makedirs(registry_path) client.reload_registry() - assert client.settings.filesystem_registry == registry_path + local = client.registry.filesystem_registry + assert local + assert isinstance(local, registry.Filesystem) + assert local.source == registry_path - # Test interacting with local filesystem registry - local = registry.Filesystem(client.settings.filesystem_registry) - - # It should be empty + # Local filesystem registry should be empty assert not list(local.iter_modules()) # Create filesystem registry with test data @@ -49,8 +49,7 @@ def test_filesystem_upgrade(tmp_path): # Should have one module installed mods = list(test_registry.iter_modules()) assert len(mods) == 1 - module = mods[0][1] - assert mods[0][0] == test_registry_path + module = mods[0] assert module == "dinosaur/salad" # Upgrade the current registry from the "remote" (test registry) @@ -58,7 +57,8 @@ def test_filesystem_upgrade(tmp_path): client.registry.sync_from_remote(test_registry, module) existing = client.registry.exists(module) assert existing is not None - assert os.path.exists(existing) + assert existing == local + assert os.path.exists(os.path.join(local.source, module)) def test_sync_from_file(tmp_path): @@ -73,6 +73,8 @@ def test_sync_from_file(tmp_path): % tmp_path: "https://github.com/singularityhub/shpc-registry", "%s/gitlab-shpc" % tmp_path: "https://gitlab.com/singularityhub/shpc-registry", + "%s/tmp/github-shpc-ssh" + % tmp_path: "ssh://git@github.com/singularityhub/shpc-registry.git", } } registry_config = os.path.join(tmp_path, "registries.yaml") @@ -106,6 +108,9 @@ def test_sync_from_file(tmp_path): [ "https://github.com/singularityhub/shpc-registry", "https://gitlab.com/singularityhub/shpc-registry", + "ssh://git@github.com/singularityhub/shpc-registry.git", + # This registry does not expose a web UI + "https://github.com/researchapps/shpc-test-registry", ], ) def test_remote_upgrade(tmp_path, remote): @@ -133,3 +138,31 @@ def test_remote_upgrade(tmp_path, remote): client.registry.sync(sync_registry=remote) assert list(client.registry.iter_modules()) + + +@pytest.mark.parametrize( + "remote", + [ + "https://github.com/singularityhub/shpc-registry", + "https://gitlab.com/singularityhub/shpc-registry", + "ssh://git@github.com/singularityhub/shpc-registry.git", + # This registry does not expose a web UI + "https://github.com/researchapps/shpc-test-registry", + ], +) +def test_registry_interaction(tmp_path, remote): + """ + Test interactions with registries of different types + """ + client = init_client(str(tmp_path), "lmod", "singularity") + reg = client.registry.get_registry(remote) + + assert not isinstance(reg, registry.Filesystem) + + # This will hit the underlying logic to list/show + mods = list(reg.iter_registry()) + assert mods + + # Should use the cache + assert reg.exists("vanessa/salad") + assert reg.find("vanessa/salad") is not None diff --git a/shpc/utils/fileio.py b/shpc/utils/fileio.py index a1c4d23dd..20dae1b80 100644 --- a/shpc/utils/fileio.py +++ b/shpc/utils/fileio.py @@ -129,12 +129,14 @@ def recursive_find(base, pattern=None): """ # We can identify modules by finding module.lua for root, folders, files in os.walk(base): + assert root.startswith(base) + subdir = root[len(base) + 1 :] for file in files: - fullpath = os.path.abspath(os.path.join(root, file)) + relpath = os.path.join(subdir, file) - if pattern and not re.search(pattern, fullpath): + if pattern and not re.search(pattern, relpath): continue - yield fullpath + yield relpath def get_file_hash(image_path, algorithm="sha256"): diff --git a/shpc/version.py b/shpc/version.py index a22ef5373..e295dd9c9 100644 --- a/shpc/version.py +++ b/shpc/version.py @@ -2,7 +2,7 @@ __copyright__ = "Copyright 2021-2022, Vanessa Sochat" __license__ = "MPL 2.0" -__version__ = "0.1.11" +__version__ = "0.1.12" AUTHOR = "Vanessa Sochat" EMAIL = "vsoch@users.noreply.github.com" NAME = "singularity-hpc"