From fa233865edd6d4cfc079ca4230006c794e0a415b Mon Sep 17 00:00:00 2001 From: Sergei Rybakov Date: Fri, 20 Sep 2024 20:39:11 +0200 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20Allow=20to=20set=20cache=20destinat?= =?UTF-8?q?ions=20via=20cache=5Fkey=20in=20cloud=5Fto=5Flocal=20(#861)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allow to set cache destinations via cache_key in cloud_to_local --- docs/hub-prod/test-cloud-sync.ipynb | 75 +++++++++++++++++++++++++ lamindb_setup/core/_settings_storage.py | 38 +++++++------ 2 files changed, 97 insertions(+), 16 deletions(-) diff --git a/docs/hub-prod/test-cloud-sync.ipynb b/docs/hub-prod/test-cloud-sync.ipynb index c3fa26a56..d7ec77597 100644 --- a/docs/hub-prod/test-cloud-sync.ipynb +++ b/docs/hub-prod/test-cloud-sync.ipynb @@ -30,6 +30,7 @@ "outputs": [], "source": [ "from lamindb_setup import init, settings\n", + "from upath import UPath\n", "import time\n", "import os" ] @@ -101,6 +102,80 @@ ")" ] }, + { + "cell_type": "markdown", + "id": "02399123", + "metadata": {}, + "source": [ + "Test `cloud_to_local_no_update` paths" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51ce7f38", + "metadata": {}, + "outputs": [], + "source": [ + "test_local_path = UPath(\"./some/local/path\")\n", + "assert settings.storage.cloud_to_local_no_update(test_local_path) == test_local_path\n", + "assert settings.storage.cloud_to_local_no_update(test_local_path.as_posix()) == test_local_path\n", + "assert settings.storage.cloud_to_local_no_update(test_local_path, cache_key=\"some/cache/key\") == test_local_path" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f264a96e", + "metadata": {}, + "outputs": [], + "source": [ + "assert settings.storage.cloud_to_local_no_update(dir_sync) == settings.storage.cache_dir / f\"lamindb-ci/{instance_name}/dir_sync\"\n", + "assert settings.storage.cloud_to_local_no_update(dir_sync, cache_key=\"dir_cache/key\") == settings.storage.cache_dir / \"dir_cache/key\"" + ] + }, + { + "cell_type": "markdown", + "id": "0b79f2f7", + "metadata": {}, + "source": [ + "Test `cloud_to_local` with `cache_key`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c406abb5", + "metadata": {}, + "outputs": [], + "source": [ + "dir_sync_local = settings.storage.cloud_to_local(dir_sync, cache_key=\"dir_cache/key\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e5cf38fa", + "metadata": {}, + "outputs": [], + "source": [ + "assert dir_sync_local == settings.storage.cache_dir / \"dir_cache/key\"\n", + "assert dir_sync_local.is_dir()\n", + "assert num_files(dir_sync_local) == 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9fbf5c69", + "metadata": {}, + "outputs": [], + "source": [ + "for file in dir_sync_local.iterdir():\n", + " file.unlink()\n", + "dir_sync_local.rmdir()" + ] + }, { "cell_type": "markdown", "id": "574c3f95", diff --git a/lamindb_setup/core/_settings_storage.py b/lamindb_setup/core/_settings_storage.py index 3514396d5..74746af92 100644 --- a/lamindb_setup/core/_settings_storage.py +++ b/lamindb_setup/core/_settings_storage.py @@ -372,28 +372,34 @@ def is_on_hub(self) -> bool: else: return True - def key_to_filepath(self, filekey: Path | UPath | str) -> UPath: - """Cloud or local filepath from filekey.""" - return self.root / filekey - - def cloud_to_local(self, filepath: Path | UPath, **kwargs) -> UPath: - """Local (cache) filepath from filepath.""" - local_filepath = self.cloud_to_local_no_update(filepath) # type: ignore + def cloud_to_local( + self, filepath: UPathStr, cache_key: UPathStr | None = None, **kwargs + ) -> UPath: + """Local (or local cache) filepath from filepath.""" + # cache_key is ignored in cloud_to_local_no_update if filepath is local + local_filepath = self.cloud_to_local_no_update(filepath, cache_key) if isinstance(filepath, UPath) and not isinstance(filepath, LocalPathClasses): local_filepath.parent.mkdir(parents=True, exist_ok=True) filepath.synchronize(local_filepath, **kwargs) return local_filepath - # conversion to Path via cloud_to_local() would trigger download - # of remote file to cache if there already is one - # in pure write operations that update the cloud, we don't want this - # hence, we manually construct the local file path - # using the `.parts` attribute in the following line - def cloud_to_local_no_update(self, filepath: UPath) -> UPath: + def cloud_to_local_no_update( + self, filepath: UPathStr, cache_key: UPathStr | None = None + ) -> UPath: + # cache_key is ignored if filepath is local if isinstance(filepath, UPath) and not isinstance(filepath, LocalPathClasses): - return self.cache_dir.joinpath(filepath._url.netloc, *filepath.parts[1:]) # type: ignore - return filepath + # Path / UPath discards protocol from UPath if present + local_filepath = self.cache_dir / ( + filepath if cache_key is None else cache_key + ) + else: + local_filepath = filepath + return UPath(local_filepath) + + def key_to_filepath(self, filekey: UPathStr) -> UPath: + """Cloud or local filepath from filekey.""" + return self.root / filekey - def local_filepath(self, filekey: Path | UPath | str) -> UPath: + def local_filepath(self, filekey: UPathStr) -> UPath: """Local (cache) filepath from filekey: `local(filepath(...))`.""" return self.cloud_to_local(self.key_to_filepath(filekey))