From ca49db629343a2d16c16aeeaeb9e87d8bab26ce7 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Tue, 21 Jan 2020 08:17:03 -0500 Subject: [PATCH 01/12] bump dev version --- changelog.md | 3 +++ refgenieserver/_version.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/changelog.md b/changelog.md index 82b6cb5..8f81386 100644 --- a/changelog.md +++ b/changelog.md @@ -2,6 +2,9 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format. +## [0.4.4] -- unreleased + + ## [0.4.3] -- 2020-01-16 ### Added - a possibility to decouple genome archive directory and genome archive config file. `refgenieserver archive` uses new key (`genome_archive_config`) from `refgenconf` diff --git a/refgenieserver/_version.py b/refgenieserver/_version.py index f6b7e26..9320c61 100644 --- a/refgenieserver/_version.py +++ b/refgenieserver/_version.py @@ -1 +1 @@ -__version__ = "0.4.3" +__version__ = "0.4.4-dev" From 9f8b3481ae7842e134a1c83ae4209a19a5a7a4ae Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Tue, 21 Jan 2020 08:19:13 -0500 Subject: [PATCH 02/12] dont save genome attrs to disk, wait for assets --- refgenieserver/server_builder.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/refgenieserver/server_builder.py b/refgenieserver/server_builder.py index 71ef1d6..4995c65 100644 --- a/refgenieserver/server_builder.py +++ b/refgenieserver/server_builder.py @@ -101,8 +101,7 @@ def archive(rgc, registry_paths, force, remove, cfg_path, genomes_desc): genome_checksum = rgc[CFG_GENOMES_KEY][genome].setdefault(CFG_CHECKSUM_KEY, CHECKSUM_PLACEHOLDER) genome_attrs = {CFG_GENOME_DESC_KEY: genome_desc, CFG_CHECKSUM_KEY: genome_checksum} - with rgc_server as r: - r.update_genomes(genome, genome_attrs) + rgc_server.update_genomes(genome, genome_attrs) _LOGGER.debug("Updating '{}' genome attributes...".format(genome)) asset = asset_list[counter] if asset_list is not None else None assets = asset or rgc[CFG_GENOMES_KEY][genome][CFG_ASSETS_KEY].keys() From d28a4a34161ec3ae182b0b7e67ec9a7481a94c82 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Wed, 22 Jan 2020 07:54:49 -0500 Subject: [PATCH 03/12] tweak messages --- refgenieserver/server_builder.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/refgenieserver/server_builder.py b/refgenieserver/server_builder.py index 4995c65..c51c738 100644 --- a/refgenieserver/server_builder.py +++ b/refgenieserver/server_builder.py @@ -143,7 +143,7 @@ def archive(rgc, registry_paths, force, remove, cfg_path, genomes_desc): asset_digest = rgc[CFG_GENOMES_KEY][genome][CFG_ASSETS_KEY][asset_name][CFG_ASSET_TAGS_KEY][tag_name]. \ setdefault(CFG_ASSET_CHECKSUM_KEY, None) if not os.path.exists(target_file) or force: - _LOGGER.info("Creating asset '{}' from '{}'".format(target_file, input_file)) + _LOGGER.info("Creating archive '{}' from '{}' asset".format(target_file, input_file)) try: _check_tgz(input_file, target_file, asset_name) _copy_recipe(input_file, target_dir, asset_name, tag_name) @@ -165,7 +165,7 @@ def archive(rgc, registry_paths, force, remove, cfg_path, genomes_desc): with rgc_server as r: for parent in parents: # here we update any pre-existing parents' children attr with the newly added asset - _LOGGER.debug("updating {} children list with {}". + _LOGGER.debug("Updating {} children list with {}". format(parent, "{}/{}:{}".format(genome, asset_name, tag_name))) rp = parse_registry_path(parent) parent_genome = rp["namespace"] From 4b428e5542656dc281e25f6e788ad9d75725e2d6 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Wed, 22 Jan 2020 09:26:34 -0500 Subject: [PATCH 04/12] fix no genome attrs saving and setdefault problems --- refgenieserver/server_builder.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/refgenieserver/server_builder.py b/refgenieserver/server_builder.py index c51c738..128d00d 100644 --- a/refgenieserver/server_builder.py +++ b/refgenieserver/server_builder.py @@ -101,7 +101,12 @@ def archive(rgc, registry_paths, force, remove, cfg_path, genomes_desc): genome_checksum = rgc[CFG_GENOMES_KEY][genome].setdefault(CFG_CHECKSUM_KEY, CHECKSUM_PLACEHOLDER) genome_attrs = {CFG_GENOME_DESC_KEY: genome_desc, CFG_CHECKSUM_KEY: genome_checksum} - rgc_server.update_genomes(genome, genome_attrs) + with rgc_server as r: + r.update_genomes(genome, genome_attrs) + # need to remove 'assets' key before writing to file since an empty PathExtAttmap's string + # repr is 'OrderedDict()' string which leads to an error later on, when the setdefault() + # method is used in update_assets() method. The removed key is added in update_assets() below. + del r[CFG_GENOMES_KEY][genome][CFG_ASSETS_KEY] _LOGGER.debug("Updating '{}' genome attributes...".format(genome)) asset = asset_list[counter] if asset_list is not None else None assets = asset or rgc[CFG_GENOMES_KEY][genome][CFG_ASSETS_KEY].keys() From 5928738fcafaab36f2720df1a5270da5d76cc1c4 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Thu, 30 Jan 2020 15:40:24 -0500 Subject: [PATCH 05/12] add fastAPI app info --- refgenieserver/main.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/refgenieserver/main.py b/refgenieserver/main.py index 10b2859..f594c55 100644 --- a/refgenieserver/main.py +++ b/refgenieserver/main.py @@ -10,7 +10,12 @@ import uvicorn from ubiquerg import parse_registry_path -app = FastAPI() +app = FastAPI( + title=PKG_NAME, + description="a web interface and RESTful API for reference genome assets", + version=server_v +) + app.mount("/" + STATIC_DIRNAME, StaticFiles(directory=STATIC_PATH), name=STATIC_DIRNAME) templates = Jinja2Templates(directory=TEMPLATES_PATH) From 0c20e98df5c545825f7bba1dab5c73bd7f014212 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Tue, 17 Mar 2020 08:24:31 -0400 Subject: [PATCH 06/12] RefGenConf adjust method names --- refgenieserver/routers/version2.py | 2 +- refgenieserver/server_builder.py | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/refgenieserver/routers/version2.py b/refgenieserver/routers/version2.py index 8dda7cf..80faae9 100644 --- a/refgenieserver/routers/version2.py +++ b/refgenieserver/routers/version2.py @@ -54,7 +54,7 @@ async def list_available_assets(): """ Returns a list of all assets that can be downloaded. No inputs required. """ - ret_dict = rgc.assets_dict(include_tags=True) + ret_dict = rgc.list(include_tags=True) _LOGGER.info("serving assets dict: {}".format(ret_dict)) return ret_dict diff --git a/refgenieserver/server_builder.py b/refgenieserver/server_builder.py index 128d00d..0145505 100644 --- a/refgenieserver/server_builder.py +++ b/refgenieserver/server_builder.py @@ -131,7 +131,7 @@ def archive(rgc, registry_paths, force, remove, cfg_path, genomes_desc): if not rgc.is_asset_complete(genome, asset_name, tag_name): _LOGGER.info("'{}/{}:{}' is incomplete, skipping".format(genome, asset_name, tag_name)) with rgc_server as r: - r.remove_assets(genome, asset_name, tag_name) + r.cfg_remove_assets(genome, asset_name, tag_name) continue file_name = rgc[CFG_GENOMES_KEY][genome][CFG_ASSETS_KEY][asset_name][CFG_ASSET_TAGS_KEY][tag_name][ CFG_ASSET_PATH_KEY] @@ -271,9 +271,9 @@ def _check_servable(rgc, genome, asset, tag): try: for tag_name, tag in asset[CFG_ASSET_TAGS_KEY].items(): if not _check_servable(rgc, genome_name, asset_name, tag_name): - rgc.remove_assets(genome_name, asset_name, tag_name) + rgc.cfg_remove_assets(genome_name, asset_name, tag_name) except KeyError: - rgc.remove_assets(genome_name, asset_name) + rgc.cfg_remove_assets(genome_name, asset_name) return rgc @@ -291,9 +291,9 @@ def _remove_archive(rgc, registry_paths, cfg_archive_folder_key=CFG_ARCHIVE_KEY) genome, asset, tag = registry_path["namespace"], registry_path["item"], registry_path["tag"] try: if asset is None: - [rgc.remove_assets(genome, x, None) for x in rgc.list_assets_by_genome(genome)] + [rgc.cfg_remove_assets(genome, x, None) for x in rgc.list_assets_by_genome(genome)] else: - rgc.remove_assets(genome, asset, tag) + rgc.cfg_remove_assets(genome, asset, tag) _LOGGER.info("{}/{}{} removed".format(genome, asset, ":" + tag if tag else "")) except KeyError: _LOGGER.warning("{}/{}{} not found and not removed.".format(genome, asset, ":" + tag if tag else "")) From 3c8fd9ab5e02d23563a7e23935ca140a73ce66ac Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Tue, 17 Mar 2020 08:25:23 -0400 Subject: [PATCH 07/12] update refgenconf requirement --- requirements/requirements-all.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index ce18ba3..399447d 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -3,5 +3,5 @@ fastapi jinja2 logmuse>=0.2 uvicorn>=0.7.1 -refgenconf>=0.6.2 +refgenconf>=0.7.0-dev ubiquerg>=0.5.0 From b6d1706a560879b6447df66560f2df6dfc98f56c Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Tue, 17 Mar 2020 10:03:32 -0400 Subject: [PATCH 08/12] do not allow archiving of incomplete assets --- refgenieserver/server_builder.py | 33 ++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/refgenieserver/server_builder.py b/refgenieserver/server_builder.py index 0145505..1dd1dac 100644 --- a/refgenieserver/server_builder.py +++ b/refgenieserver/server_builder.py @@ -4,7 +4,8 @@ from glob import glob from subprocess import run from refgenconf import RefGenConf -from refgenconf.exceptions import GenomeConfigFormatError, ConfigNotCompliantError, RefgenconfError +from refgenconf.exceptions import RefgenconfError, ConfigNotCompliantError, \ + GenomeConfigFormatError, MissingConfigDataError from ubiquerg import checksum, size, is_command_callable, parse_registry_path from .const import * @@ -98,15 +99,11 @@ def archive(rgc, registry_paths, force, remove, cfg_path, genomes_desc): os.makedirs(target_dir) genome_desc = rgc[CFG_GENOMES_KEY][genome].setdefault(CFG_GENOME_DESC_KEY, DESC_PLACEHOLDER) \ if genomes_desc is None or genome not in descs else descs[genome] - genome_checksum = rgc[CFG_GENOMES_KEY][genome].setdefault(CFG_CHECKSUM_KEY, CHECKSUM_PLACEHOLDER) + genome_checksum = rgc[CFG_GENOMES_KEY][genome].\ + setdefault(CFG_CHECKSUM_KEY, CHECKSUM_PLACEHOLDER) genome_attrs = {CFG_GENOME_DESC_KEY: genome_desc, CFG_CHECKSUM_KEY: genome_checksum} - with rgc_server as r: - r.update_genomes(genome, genome_attrs) - # need to remove 'assets' key before writing to file since an empty PathExtAttmap's string - # repr is 'OrderedDict()' string which leads to an error later on, when the setdefault() - # method is used in update_assets() method. The removed key is added in update_assets() below. - del r[CFG_GENOMES_KEY][genome][CFG_ASSETS_KEY] + rgc_server.update_genomes(genome, genome_attrs) _LOGGER.debug("Updating '{}' genome attributes...".format(genome)) asset = asset_list[counter] if asset_list is not None else None assets = asset or rgc[CFG_GENOMES_KEY][genome][CFG_ASSETS_KEY].keys() @@ -116,23 +113,26 @@ def archive(rgc, registry_paths, force, remove, cfg_path, genomes_desc): else: _LOGGER.debug("Assets to be processed: {}".format(str(assets))) for asset_name in assets if isinstance(assets, list) else [assets]: - asset_desc = rgc[CFG_GENOMES_KEY][genome][CFG_ASSETS_KEY][asset_name].setdefault(CFG_ASSET_DESC_KEY, - DESC_PLACEHOLDER) - default_tag = rgc[CFG_GENOMES_KEY][genome][CFG_ASSETS_KEY][asset_name].setdefault(CFG_ASSET_DEFAULT_TAG_KEY, - DEFAULT_TAG) + asset_desc = rgc[CFG_GENOMES_KEY][genome][CFG_ASSETS_KEY][asset_name]\ + .setdefault(CFG_ASSET_DESC_KEY, DESC_PLACEHOLDER) + default_tag = rgc[CFG_GENOMES_KEY][genome][CFG_ASSETS_KEY][asset_name]\ + .setdefault(CFG_ASSET_DEFAULT_TAG_KEY, DEFAULT_TAG) asset_attrs = {CFG_ASSET_DESC_KEY: asset_desc, CFG_ASSET_DEFAULT_TAG_KEY: default_tag} _LOGGER.debug("Updating '{}/{}' asset attributes...".format(genome, asset_name)) with rgc_server as r: r.update_assets(genome, asset_name, asset_attrs) + tag = tag_list[counter] if tag_list is not None else None tags = tag or rgc[CFG_GENOMES_KEY][genome][CFG_ASSETS_KEY][asset_name][CFG_ASSET_TAGS_KEY].keys() for tag_name in tags if isinstance(tags, list) else [tags]: if not rgc.is_asset_complete(genome, asset_name, tag_name): - _LOGGER.info("'{}/{}:{}' is incomplete, skipping".format(genome, asset_name, tag_name)) - with rgc_server as r: - r.cfg_remove_assets(genome, asset_name, tag_name) - continue + raise MissingConfigDataError( + "Asset '{}/{}:{}' is incomplete. This probably means an" + " attempt to archive a partially pulled parent. " + "refgenieserver archive requires all assets to be built" + " prior to archiving.".format(genome, asset_name, tag_name) + ) file_name = rgc[CFG_GENOMES_KEY][genome][CFG_ASSETS_KEY][asset_name][CFG_ASSET_TAGS_KEY][tag_name][ CFG_ASSET_PATH_KEY] target_file = os.path.join(target_dir, "{}__{}".format(asset_name, tag_name) + ".tgz") @@ -271,6 +271,7 @@ def _check_servable(rgc, genome, asset, tag): try: for tag_name, tag in asset[CFG_ASSET_TAGS_KEY].items(): if not _check_servable(rgc, genome_name, asset_name, tag_name): + _LOGGER.debug("Removing '{}/{}:{}', it's not servable".format(genome_name, asset_name, tag_name)) rgc.cfg_remove_assets(genome_name, asset_name, tag_name) except KeyError: rgc.cfg_remove_assets(genome_name, asset_name) From f4c7d41d85bcdb3aa9b749f6d60b20475658b19c Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Tue, 17 Mar 2020 11:17:38 -0400 Subject: [PATCH 09/12] more comprehenisive smoketesting --- test_refgenie.sh | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/test_refgenie.sh b/test_refgenie.sh index 68137aa..3a86a5e 100755 --- a/test_refgenie.sh +++ b/test_refgenie.sh @@ -57,7 +57,8 @@ FROM tiangolo/uvicorn-gunicorn:python3.7-alpine3.8 LABEL authors="Nathan Sheffield, Michal Stolarczyk" COPY . /app - +# RUN pip install https://github.com/databio/refgenconf/archive/dev.zip +# RUN pip install https://github.com/databio/refgenieserver/archive/dev.zip RUN pip install https://github.com/databio/refgenieserver/archive/master.zip EOF @@ -112,6 +113,21 @@ refgenie pull -c $REFGENIE test/fasta || ErrorExit "$LINENO: Failed to pull remo echo -e "\n-- List local assets --\n" refgenie list -c $REFGENIE || ErrorExit "$LINENO: Failed to list local assets." +echo -e "\n-- Tag asset --\n" +refgenie tag -c $REFGENIE test/fasta:default --tag test || ErrorExit "$LINENO: Failed to tag asset." + +echo -e "\n-- Remove asset --\n" +refgenie remove -c $REFGENIE test/fasta --force || ErrorExit "$LINENO: Failed to remove asset." + +echo -e "\n-- Get asset digest --\n" +refgenie id -c $REFGENIE rCRS/fasta || ErrorExit "$LINENO: Failed to get asset digest." + +echo -e "\n-- Subscribe --\n" +refgenie subscribe -c $REFGENIE -s http://faulty.com || ErrorExit "$LINENO: Failed to subscribe." + +echo -e "\n-- Unsubscribe --\n" +refgenie subscribe -c $REFGENIE -s http://faulty.com || ErrorExit "$LINENO: Failed to unsubscribe." + echo -e "\n-- Shut down local servers --\n" docker stop refgenieservercon || ErrorExit "$LINENO: Failed to stop remote rCRS server." docker stop refgenieservercon2 || ErrorExit "$LINENO: Failed to stop remote test server." From e80c52b02836ff42e5374f6f7d45fbe3f57bbe3f Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Tue, 17 Mar 2020 11:36:37 -0400 Subject: [PATCH 10/12] update requirement --- requirements/requirements-all.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index 399447d..33355a2 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -3,5 +3,5 @@ fastapi jinja2 logmuse>=0.2 uvicorn>=0.7.1 -refgenconf>=0.7.0-dev +refgenconf>=0.7.0 ubiquerg>=0.5.0 From 239fd03e23db2f374fb433b5976553f86890943f Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Tue, 17 Mar 2020 13:21:29 -0400 Subject: [PATCH 11/12] prep release --- changelog.md | 5 +++-- refgenieserver/_version.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/changelog.md b/changelog.md index 8f81386..5afd0e4 100644 --- a/changelog.md +++ b/changelog.md @@ -2,8 +2,9 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format. -## [0.4.4] -- unreleased - +## [0.4.4] -- 2020-03-20 +### Changed +- `refgenieserver archive` requires all assets to be complete prior to archiving ## [0.4.3] -- 2020-01-16 ### Added diff --git a/refgenieserver/_version.py b/refgenieserver/_version.py index 9320c61..cd1ee63 100644 --- a/refgenieserver/_version.py +++ b/refgenieserver/_version.py @@ -1 +1 @@ -__version__ = "0.4.4-dev" +__version__ = "0.4.4" From 6ac9ed684ad409e5cf37cdb5806b028ea05591ab Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Tue, 17 Mar 2020 17:11:32 -0400 Subject: [PATCH 12/12] release date --- changelog.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changelog.md b/changelog.md index 5afd0e4..a6f7fc1 100644 --- a/changelog.md +++ b/changelog.md @@ -2,7 +2,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format. -## [0.4.4] -- 2020-03-20 +## [0.4.4] -- 2020-03-17 ### Changed - `refgenieserver archive` requires all assets to be complete prior to archiving