diff --git a/shpc/main/client.py b/shpc/main/client.py index 088424c72..8537a61b7 100644 --- a/shpc/main/client.py +++ b/shpc/main/client.py @@ -124,12 +124,16 @@ def update(self, name=None, dryrun=False, filters=None): """ # No name provided == "update all" if name: - modules = [name] + # find the module in the registries. _load_container + # calls `container.ContainerConfig(result)` like below + configs = [self._load_container(name)] else: - modules = [x[1] for x in list(self.registry.iter_modules())] - - for module_name in modules: - config = self._load_container(module_name) + # directly iterate over the content of the registry + configs = [] + for result in self.registry.iter_registry(): + configs.append(container.ContainerConfig(result)) + # do the update + for config in configs: config.update(dryrun=dryrun, filters=filters) def test( diff --git a/shpc/main/modules/base.py b/shpc/main/modules/base.py index 435a14c02..51424e499 100644 --- a/shpc/main/modules/base.py +++ b/shpc/main/modules/base.py @@ -172,7 +172,12 @@ def add(self, image, module_name=None, **kwargs): """ Add a container to the registry to enable install. """ - self.settings.ensure_filesystem_registry() + local_registry = self.registry.filesystem_registry + + if not local_registry: + logger.exit( + "This command is only supported for a filesystem registry! Add one or use --registry." + ) # Docker module name is always the same namespace as the image if image.startswith("docker"): @@ -185,7 +190,7 @@ def add(self, image, module_name=None, **kwargs): # Assume adding to default registry dest = os.path.join( - self.settings.filesystem_registry, + local_registry.source, module_name.split(":")[0], "container.yaml", ) @@ -235,10 +240,9 @@ def docgen(self, module_name, registry=None, out=None, branch="main"): aliases = config.get_aliases() template = self.template.load("docs.md") registry = registry or defaults.github_url - github_url = "%s/blob/%s/%s/container.yaml" % (registry, branch, module_name) - raw_github_url = shpc.main.registry.get_module_config_url( - registry, module_name, branch - ) + remote = self.registry.get_registry(registry, tag=branch) + github_url = remote.get_container_url(module_name) + raw_github_url = remote.get_raw_container_url(module_name) # Currently one doc is rendered for all containers result = template.render( @@ -306,10 +310,9 @@ def _get_module_lookup(self, base, filename, pattern=None): A shared function to get a lookup of installed modules or registry entries """ modules = {} - for fullpath in utils.recursive_find(base, pattern): - if fullpath.endswith(filename): - module_name, version = os.path.dirname(fullpath).rsplit(os.sep, 1) - module_name = module_name.replace(base, "").strip(os.sep) + for relpath in utils.recursive_find(base, pattern): + if relpath.endswith(filename): + module_name, version = os.path.dirname(relpath).rsplit(os.sep, 1) if module_name not in modules: modules[module_name] = set() modules[module_name].add(version) diff --git a/shpc/main/registry/__init__.py b/shpc/main/registry/__init__.py index f98185c68..85270c780 100644 --- a/shpc/main/registry/__init__.py +++ b/shpc/main/registry/__init__.py @@ -14,7 +14,7 @@ from shpc.main.settings import SettingsBase from .filesystem import Filesystem, FilesystemResult -from .remote import GitHub, GitLab, get_module_config_url +from .remote import GitHub, GitLab def update_container_module(module, from_path, existing_path): @@ -23,13 +23,12 @@ def update_container_module(module, from_path, existing_path): """ if not os.path.exists(existing_path): shpc.utils.mkdir_p(existing_path) - for filename in shpc.utils.recursive_find(from_path): - relative_path = filename.replace(from_path, "").strip("/") + for relative_path in shpc.utils.recursive_find(from_path): to_path = os.path.join(existing_path, relative_path) if os.path.exists(to_path): shutil.rmtree(to_path) shpc.utils.mkdir_p(os.path.dirname(to_path)) - shutil.copy2(filename, to_path) + shutil.copy2(os.path.join(from_path, relative_path), to_path) class Registry: @@ -44,21 +43,29 @@ def __init__(self, settings=None): # and they must exist. self.registries = [self.get_registry(r) for r in self.settings.registry] + @property + def filesystem_registry(self): + """ + Return the first found filesystem registry. + """ + for registry in self.registries: + if isinstance(registry, Filesystem): + return registry + def exists(self, name): """ - Determine if a module name *exists* in any local registry, return path + Determine if a module name *exists* in any registry, return the first one """ for reg in self.registries: if reg.exists(name): - return os.path.join(reg.source, name) + return reg def iter_registry(self, filter_string=None): """ Iterate over all known registries defined in settings. """ for reg in self.registries: - for entry in reg.iter_registry(filter_string=filter_string): - yield entry + yield from reg.iter_registry(filter_string=filter_string) def find(self, name, path=None): """ @@ -80,11 +87,11 @@ def iter_modules(self): """ Iterate over modules found across the registry """ - for reg in self.registries: - for registry, module in reg.iter_modules(): + for registry in self.registries: + for module in registry.iter_modules(): yield registry, module - def get_registry(self, source): + def get_registry(self, source, **kwargs): """ A registry is a local or remote registry. @@ -92,7 +99,7 @@ def get_registry(self, source): """ for Registry in PROVIDERS: if Registry.matches(source): - return Registry(source) + return Registry(source, **kwargs) raise ValueError("No matching registry provider for %s" % source) def sync( @@ -128,20 +135,10 @@ def _sync( local=None, sync_registry=None, ): - # Registry to sync from - sync_registry = sync_registry or self.settings.sync_registry - # Create a remote registry with settings preference - Remote = GitHub if "github.com" in sync_registry else GitLab - remote = Remote(sync_registry, tag=tag) - local = self.get_registry(local or self.settings.filesystem_registry) - - # We sync to our first registry - if not filesystem, no go - if not local.is_filesystem_registry: - logger.exit( - "sync is only supported for a remote to a filesystem registry: %s" - % sync_registry.source - ) + remote = self.get_registry( + sync_registry or self.settings.sync_registry, tag=tag + ) # Upgrade the current registry from the remote self.sync_from_remote( @@ -152,6 +149,8 @@ def _sync( add_new=add_new, local=local, ) + + #  Cleanup the remote once we've done the sync remote.cleanup() def sync_from_remote( @@ -163,26 +162,41 @@ def sync_from_remote( If the registry module is not installed, we install to the first filesystem registry found in the list. """ - updates = False + ## First get a valid local Registry # A local (string) path provided - if local and isinstance(local, str) and os.path.exists(local): + if local and isinstance(local, str): + if not os.path.exists(local): + logger.exit("The path %s doesn't exist." % local) local = Filesystem(local) # No local registry provided, use default if not local: - local = Filesystem(self.settings.filesystem_registry) + local = self.filesystem_registry + # We sync to our first registry - if not filesystem, no go + if not local: + logger.exit("No local registry to sync to. Check the shpc settings.") + + if not isinstance(local, Filesystem): + logger.exit( + "Can only synchronize to a local file system, not to %s." % local + ) - tmpdir = remote.source - if tmpdir.startswith("http") or not os.path.exists(tmpdir): - tmpdir = remote.clone() + ## Then a valid remote Registry + if not remote: + logger.exit("No remote provided. Cannot sync.") + + if not isinstance(remote, Filesystem): + # Instantiate a local registry, which will have to be cleaned up + remote = remote.clone() # These are modules to update - for regpath, module in remote.iter_modules(): + updates = False + for module in remote.iter_modules(): if name and module != name: continue - from_path = os.path.join(regpath, module) + from_path = os.path.join(remote.source, module) existing_path = local.exists(module) # If we have an existing module and we want to replace all files diff --git a/shpc/main/registry/filesystem.py b/shpc/main/registry/filesystem.py index 5c7b6f6e3..f8d46fb41 100644 --- a/shpc/main/registry/filesystem.py +++ b/shpc/main/registry/filesystem.py @@ -75,20 +75,31 @@ def override_exists(self, tag): class Filesystem(Provider): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.source = os.path.abspath(self.source) + def __init__(self, source): + if not self.matches(source): + raise ValueError( + "Filesystem registry source must exist on the filesystem. Got %s" + % source + ) + self.source = os.path.abspath(source) @classmethod def matches(cls, source): return os.path.exists(source) or source == "." + def exists(self, name): + return os.path.exists(os.path.join(self.source, name)) + def iter_modules(self): + """ + yield module names + """ + # Find modules based on container.yaml for filename in shpc.utils.recursive_find(self.source, "container.yaml"): - module = os.path.dirname(filename).replace(self.source, "").strip(os.sep) + module = os.path.dirname(filename) if not module: continue - yield self.source, module + yield module def find(self, name): """ @@ -110,14 +121,9 @@ def iter_registry(self, filter_string=None): """ Iterate over content in filesystem registry. """ - for filename in shpc.utils.recursive_find(self.source): - if not filename.endswith("container.yaml"): - continue - module_name = ( - os.path.dirname(filename).replace(self.source, "").strip(os.sep) - ) - + for module_name in self.iter_modules(): # If the user has provided a filter, honor it if filter_string and not re.search(filter_string, module_name): continue + filename = os.path.join(self.source, module_name) yield FilesystemResult(module_name, filename) diff --git a/shpc/main/registry/provider.py b/shpc/main/registry/provider.py index c396c99bb..c2aa1dc6c 100644 --- a/shpc/main/registry/provider.py +++ b/shpc/main/registry/provider.py @@ -5,6 +5,8 @@ import os +import shpc.utils + class Result: @property @@ -32,36 +34,40 @@ class Provider: A general provider should retrieve and provide registry files. """ - def __init__(self, source, *args, **kwargs): - if not (source.startswith("https://") or os.path.exists(source)): - raise ValueError( - "Registry source must exist on the filesystem or be given as https://." - ) - self.source = source - - def exists(self, name): - return os.path.exists(os.path.join(self.source, name)) - - @property - def is_filesystem_registry(self): - return not self.source.startswith("http") and os.path.exists(self.source) - - @property - def name(self): - return self.__class__.__name__.lower() - @classmethod - def matches(cls, source_url: str): - pass + def matches(cls, source): + """ + Returns true if this class understands the source + """ + raise NotImplementedError def find(self, name): - pass + """ + Returns a Result object if the module can be found in the registry + """ + raise NotImplementedError + + def exists(self, name): + """ + Returns true if the module can be found in the registry + """ + raise NotImplementedError def cleanup(self): - pass + """ + Cleanup the registry + """ + raise NotImplementedError - def iter_registry(self): - pass + def iter_registry(self, filter_string=None): + """ + Iterates over the modules of this registry (that match the filte, if + provided) as Result instances + """ + raise NotImplementedError def iter_modules(self): - pass + """ + Iterates over the module names of this registry + """ + raise NotImplementedError diff --git a/shpc/main/registry/remote.py b/shpc/main/registry/remote.py index 6053a079d..7416c3cfc 100644 --- a/shpc/main/registry/remote.py +++ b/shpc/main/registry/remote.py @@ -7,6 +7,7 @@ import re import shutil import subprocess as sp +import urllib import requests @@ -14,23 +15,7 @@ from shpc.logger import logger from .provider import Provider, Result - - -def get_module_config_url(registry, module_name, branch="main"): - """ - Get the raw address of the config (container.yaml) - """ - registry_bare = registry.split(".com")[-1] - raw = ( - "https://gitlab.com/%s/-/raw/%s/%s/container.yaml" - if "gitlab" in registry - else "https://raw.githubusercontent.com/%s/%s/%s/container.yaml" - ) - return raw % ( - registry_bare, - branch, - module_name, - ) +from .filesystem import Filesystem class RemoteResult(Result): @@ -91,95 +76,87 @@ def override_exists(self, tag): class VersionControl(Provider): - def __init__(self, *args, **kwargs): - self.tag = kwargs.get("tag") + def __init__(self, source, tag=None, subdir=None): + if not self.matches(source): + raise ValueError( + type(self).__name__ + "registry must be a remote path, got %s." % source + ) + self.url = source + # We don't want ".git" hanging around + if source.endswith(".git"): + source = source[:-4] + self.parsed_url = urllib.parse.urlparse(source) + self._clone = None + + self.tag = tag # Cache of remote container metadata self._cache = {} # E.g., subdirectory with registry files - self.subdir = kwargs.get("subdir") - super().__init__(*args, **kwargs) - self._url = self.source - - @classmethod - def matches(cls, source): - return cls.provider_name in source and source.startswith("http") - - @property - def source_url(self): - """ - Retrieve a parsed / formatted url, ensuring https and without git. - """ - url = self.source - if not url.startswith("http"): - url = "https://%s" % url - if url.endswith(".git"): - url = url[:-4] - return url + self.subdir = subdir @property - def web_url(self): + def library_url(self): """ - Retrieve the web url, either pages or (eventually) custom. + Retrieve the URL of this registry's library (in JSON). """ - parts = self.source_url.split("/")[3:] - return "https://%s.%s.io/%s/library.json" % ( - parts[0], - self.provider_name, - "/".join(parts[1:]), - ) + raise NotImplementedError def exists(self, name): """ Determine if a module exists in the registry. """ name = name.split(":")[0] - if self._cache and name in self._cache: - return True - dirname = self.source - if self.subdir: - dirname = os.path.join(dirname, self.subdir) - return os.path.exists(os.path.join(dirname, name)) + self._update_cache() + return name in self._cache + + def has_clone(self): + return self._clone and os.path.exists(self._clone.source) def clone(self, tmpdir=None): """ Clone the known source URL to a temporary directory + and return an equivalent local registry (Filesystem) """ + if self.has_clone(): + return self._clone tmpdir = tmpdir or shpc.utils.get_tmpdir() cmd = ["git", "clone", "--depth", "1"] if self.tag: cmd += ["-b", self.tag] - cmd += [self._url, tmpdir] - self.source = tmpdir + cmd += [self.url, tmpdir] + if self.subdir: + tmpdir = os.path.join(tmpdir, self.subdir) try: sp.run(cmd, check=True) except sp.CalledProcessError as e: - raise ValueError("Failed to clone repository {}:\n{}", self.source, e) - return tmpdir + raise ValueError("Failed to clone repository {}:\n{}", self.url, e) + assert os.path.exists(tmpdir) + self._clone = Filesystem(tmpdir) + return self._clone + + def cleanup(self): + """ + Cleanup the registry + """ + if self.has_clone(): + self._clone.cleanup() + self._clone = None def iter_modules(self): """ yield module names """ - dirname = self.source - if self.subdir: - dirname = os.path.join(dirname, self.subdir) - - # Find modules based on container.yaml - for filename in shpc.utils.recursive_find(dirname, "container.yaml"): - module = os.path.dirname(filename).replace(dirname, "").strip(os.sep) - if not module: - continue - yield dirname, module + self._update_cache() + yield from self._cache.keys() def find(self, name): """ Find a particular entry in a registry """ - if not self._cache: - self._update_cache() + self._update_cache() if name in self._cache: return RemoteResult(name, self._cache[name]) @@ -190,8 +167,11 @@ def _update_cache(self, force=False): if self._cache and not force: return + library_url = self.library_url + if not library_url: + return self._update_clone_cache() # Check for exposed library API on GitHub or GitLab pages - response = requests.get(self.web_url) + response = requests.get(library_url) if response.status_code != 200: return self._update_clone_cache() self._cache = response.json() @@ -202,19 +182,19 @@ def _update_clone_cache(self): """ logger.warning( "Remote %s is not deploying a Registry API, falling back to clone." - % self.source + % self.url ) - tmpdir = self.clone() - for dirname, module in self.iter_modules(): + tmplocal = self.clone() + for module in tmplocal.iter_modules(): # Minimum amount of metadata to function here - config_url = get_module_config_url(self.source, module) + config_url = self.get_raw_container_url(module) self._cache[module] = { "config": shpc.utils.read_yaml( - os.path.join(dirname, module, "container.yaml") + os.path.join(tmplocal.source, module, "container.yaml") ), "config_url": config_url, } - shutil.rmtree(tmpdir) + tmplocal.cleanup() def iter_registry(self, filter_string=None): """ @@ -231,10 +211,42 @@ def iter_registry(self, filter_string=None): # Assemble a faux config with tags so we don't hit remote yield RemoteResult(uri, entry, load=False, config=entry["config"]) + def get_container_url(self, module_name): + raise NotImplementedError + + def get_raw_container_url(self, module_name): + raise NotImplementedError + class GitHub(VersionControl): - provider_name = "github" + @classmethod + def matches(cls, source): + return urllib.parse.urlparse(source).hostname == "github.com" + + @property + def library_url(self): + owner, repo = self.parsed_url.path.lstrip("/").split("/", 1) + return f"https://{owner}.github.io/{repo}/library.json" + + def get_container_url(self, module_name): + return f"https://github.com/{self.parsed_url.path}/blob/{self.tag}/{module_name}/container.yaml" + + def get_raw_container_url(self, module_name): + return f"https://raw.githubusercontent.com/{self.parsed_url.path}/{self.tag}/{module_name}/container.yaml" class GitLab(VersionControl): - provider_name = "gitlab" + @classmethod + def matches(cls, source): + return urllib.parse.urlparse(source).hostname == "gitlab.com" + + @property + def library_url(self): + owner, repo = self.parsed_url.path.lstrip("/").split("/", 1) + return f"https://{owner}.gitlab.io/{repo}/library.json" + + def get_container_url(self, module_name): + return f"https://gitlab.com/{self.parsed_url.path}/-/blob/{self.tag}/{module_name}/container.yaml" + + def get_raw_container_url(self, module_name): + return f"https://gitlab.com/{self.parsed_url.path}/-/raw/{self.tag}/{module_name}/container.yaml" diff --git a/shpc/main/settings.py b/shpc/main/settings.py index b7d6ee401..892024866 100644 --- a/shpc/main/settings.py +++ b/shpc/main/settings.py @@ -241,32 +241,6 @@ def change_validate(self, key, value): "%s:%s cannot be added to config: %s" % (key, value, error.message) ) - @property - def filesystem_registry(self): - """ - Return the first found filesystem registry - """ - for path in self.registry: - if path.startswith("http") or not os.path.exists(path): - continue - return path - - def ensure_filesystem_registry(self): - """ - Ensure that the settings has a filesystem registry. - """ - found = False - for path in self.registry: - if path.startswith("http") or not os.path.exists(path): - continue - found = True - - # Cut out early if registry isn't on the filesystem - if not found: - logger.exit( - "This command is only supported for a filesystem registry! Add one or use --registry." - ) - def _substitutions(self, value): """ Given a value, make substitutions diff --git a/shpc/tests/test_sync.py b/shpc/tests/test_sync.py index 7008a8e26..fa95b91f4 100644 --- a/shpc/tests/test_sync.py +++ b/shpc/tests/test_sync.py @@ -33,12 +33,12 @@ def test_filesystem_upgrade(tmp_path): os.makedirs(registry_path) client.reload_registry() - assert client.settings.filesystem_registry == registry_path + local = client.registry.filesystem_registry + assert local + assert isinstance(local, registry.Filesystem) + assert local.source == registry_path - # Test interacting with local filesystem registry - local = registry.Filesystem(client.settings.filesystem_registry) - - # It should be empty + # Local filesystem registry should be empty assert not list(local.iter_modules()) # Create filesystem registry with test data @@ -49,8 +49,7 @@ def test_filesystem_upgrade(tmp_path): # Should have one module installed mods = list(test_registry.iter_modules()) assert len(mods) == 1 - module = mods[0][1] - assert mods[0][0] == test_registry_path + module = mods[0] assert module == "dinosaur/salad" # Upgrade the current registry from the "remote" (test registry) @@ -58,7 +57,8 @@ def test_filesystem_upgrade(tmp_path): client.registry.sync_from_remote(test_registry, module) existing = client.registry.exists(module) assert existing is not None - assert os.path.exists(existing) + assert existing == local + assert os.path.exists(os.path.join(local.source, module)) def test_sync_from_file(tmp_path): @@ -73,6 +73,8 @@ def test_sync_from_file(tmp_path): % tmp_path: "https://github.com/singularityhub/shpc-registry", "%s/gitlab-shpc" % tmp_path: "https://gitlab.com/singularityhub/shpc-registry", + "%s/tmp/github-shpc-ssh" + % tmp_path: "ssh://git@github.com/singularityhub/shpc-registry.git", } } registry_config = os.path.join(tmp_path, "registries.yaml") @@ -106,6 +108,7 @@ def test_sync_from_file(tmp_path): [ "https://github.com/singularityhub/shpc-registry", "https://gitlab.com/singularityhub/shpc-registry", + "ssh://git@github.com/singularityhub/shpc-registry.git", # This registry does not expose a web UI "https://github.com/researchapps/shpc-test-registry", ], @@ -142,6 +145,7 @@ def test_remote_upgrade(tmp_path, remote): [ "https://github.com/singularityhub/shpc-registry", "https://gitlab.com/singularityhub/shpc-registry", + "ssh://git@github.com/singularityhub/shpc-registry.git", # This registry does not expose a web UI "https://github.com/researchapps/shpc-test-registry", ], @@ -153,7 +157,7 @@ def test_registry_interaction(tmp_path, remote): client = init_client(str(tmp_path), "lmod", "singularity") reg = client.registry.get_registry(remote) - assert not reg.is_filesystem_registry + assert not isinstance(reg, registry.Filesystem) # This will hit the underlying logic to list/show mods = list(reg.iter_registry()) diff --git a/shpc/utils/fileio.py b/shpc/utils/fileio.py index a1c4d23dd..20dae1b80 100644 --- a/shpc/utils/fileio.py +++ b/shpc/utils/fileio.py @@ -129,12 +129,14 @@ def recursive_find(base, pattern=None): """ # We can identify modules by finding module.lua for root, folders, files in os.walk(base): + assert root.startswith(base) + subdir = root[len(base) + 1 :] for file in files: - fullpath = os.path.abspath(os.path.join(root, file)) + relpath = os.path.join(subdir, file) - if pattern and not re.search(pattern, fullpath): + if pattern and not re.search(pattern, relpath): continue - yield fullpath + yield relpath def get_file_hash(image_path, algorithm="sha256"):