From 98e9c664d2a668d8b08453a4f1c93dd6af0d8660 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Thu, 18 Mar 2021 12:36:20 -0400 Subject: [PATCH 01/44] init dev branch --- changelog.md | 2 ++ refgenieserver/_version.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/changelog.md b/changelog.md index 58615fa..3d7b5f8 100644 --- a/changelog.md +++ b/changelog.md @@ -2,6 +2,8 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format. +## [0.7.0] -- unreleased + ## [0.6.0] -- 2021-03-11 ### Added - API v3, which is a complete redesign and extension of the previous version diff --git a/refgenieserver/_version.py b/refgenieserver/_version.py index 906d362..5a93ff5 100644 --- a/refgenieserver/_version.py +++ b/refgenieserver/_version.py @@ -1 +1 @@ -__version__ = "0.6.0" +__version__ = "0.7.0-dev" From 5622d59311f6da81dbcbaa191cbfc802966c32cb Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Thu, 18 Mar 2021 13:31:47 -0400 Subject: [PATCH 02/44] copy unarchived asset dir --- refgenieserver/server_builder.py | 134 +++++++++++++++---------------- 1 file changed, 65 insertions(+), 69 deletions(-) diff --git a/refgenieserver/server_builder.py b/refgenieserver/server_builder.py index d7b1ada..408296e 100644 --- a/refgenieserver/server_builder.py +++ b/refgenieserver/server_builder.py @@ -37,10 +37,8 @@ def archive(rgc, registry_paths, force, remove, cfg_path, genomes_desc): """ if float(rgc[CFG_VERSION_KEY]) < float(REQ_CFG_VERSION): raise ConfigNotCompliantError( - "You need to update the genome config to v{} in order to use the archiver. " - "The required version can be generated with refgenie >= {}".format( - REQ_CFG_VERSION, REFGENIE_BY_CFG[REQ_CFG_VERSION] - ) + f"You need to update the genome config to v{REQ_CFG_VERSION} in order to use the archiver. " + f"The required version can be generated with refgenie >= {REFGENIE_BY_CFG[REQ_CFG_VERSION]}" ) if CFG_ARCHIVE_CONFIG_KEY in rgc: srp = rgc[CFG_ARCHIVE_CONFIG_KEY] @@ -56,9 +54,7 @@ def archive(rgc, registry_paths, force, remove, cfg_path, genomes_desc): ) except KeyError: raise GenomeConfigFormatError( - "The config '{}' is missing a {} entry. Can't determine the desired archive.".format( - cfg_path, " or ".join([CFG_ARCHIVE_KEY, CFG_ARCHIVE_KEY_OLD]) - ) + f"The config '{cfg_path}' is missing a {' or '.join([CFG_ARCHIVE_KEY, CFG_ARCHIVE_KEY_OLD])} entry. Can't determine the desired archive." ) if os.path.isfile(server_rgc_path) and not os.access(server_rgc_path, os.W_OK): raise OSError( @@ -72,7 +68,7 @@ def archive(rgc, registry_paths, force, remove, cfg_path, genomes_desc): # original RefGenConf has been created in read-only mode, # make it RW compatible and point to new target path for server use or initialize a new object if os.path.exists(server_rgc_path): - _LOGGER.debug("'{}' file was found and will be updated".format(server_rgc_path)) + _LOGGER.debug(f"'{server_rgc_path}' file was found and will be updated") rgc_server = RefGenConf(filepath=server_rgc_path) if remove: if not registry_paths: @@ -89,9 +85,7 @@ def archive(rgc, registry_paths, force, remove, cfg_path, genomes_desc): "You can't remove archives since the genome_archive path does not exist yet." ) exit(1) - _LOGGER.debug( - "'{}' file was not found and will be created".format(server_rgc_path) - ) + _LOGGER.debug(f"'{server_rgc_path}' file was not found and will be created") rgc_server = RefGenConf(filepath=rgc.file_path) rgc_server.make_writable(filepath=server_rgc_path) rgc_server.make_readonly() @@ -106,21 +100,19 @@ def archive(rgc, registry_paths, force, remove, cfg_path, genomes_desc): _LOGGER.error("No genomes found") exit(1) else: - _LOGGER.debug("Genomes to be processed: {}".format(str(genomes))) + _LOGGER.debug(f"Genomes to be processed: {str(genomes)}") genomes = [rgc.get_genome_alias_digest(g) for g in genomes] if genomes_desc is not None: if os.path.exists(genomes_desc): import csv - _LOGGER.info( - "Found a genomes descriptions CSV file: {}".format(genomes_desc) - ) + _LOGGER.info(f"Found a genomes descriptions CSV file: {genomes_desc}") with open(genomes_desc, mode="r") as infile: reader = csv.reader(infile) descs = {rows[0]: rows[1] for rows in reader} else: _LOGGER.error( - "Genomes descriptions CSV file does not exist: {}".format(genomes_desc) + f"Genomes descriptions CSV file does not exist: {genomes_desc}" ) sys.exit(1) counter = 0 @@ -146,14 +138,14 @@ def archive(rgc, registry_paths, force, remove, cfg_path, genomes_desc): with rgc_server as r: r[CFG_GENOMES_KEY].setdefault(genome, PXAM()) r[CFG_GENOMES_KEY][genome].update(genome_attrs) - _LOGGER.debug("Updating '{}' genome attributes...".format(genome)) + _LOGGER.debug(f"Updating '{genome}' genome attributes...") asset = asset_list[counter] if asset_list is not None else None assets = asset or rgc[CFG_GENOMES_KEY][genome][CFG_ASSETS_KEY].keys() if not assets: _LOGGER.error("No assets found") continue else: - _LOGGER.debug("Assets to be processed: {}".format(str(assets))) + _LOGGER.debug(f"Assets to be processed: {str(assets)}") for asset_name in assets if isinstance(assets, list) else [assets]: asset_desc = rgc[CFG_GENOMES_KEY][genome][CFG_ASSETS_KEY][ asset_name @@ -165,9 +157,7 @@ def archive(rgc, registry_paths, force, remove, cfg_path, genomes_desc): CFG_ASSET_DESC_KEY: asset_desc, CFG_ASSET_DEFAULT_TAG_KEY: default_tag, } - _LOGGER.debug( - "Updating '{}/{}' asset attributes...".format(genome, asset_name) - ) + _LOGGER.debug(f"Updating '{genome}/{asset_name}' asset attributes...") with rgc_server as r: r.update_assets(genome, asset_name, asset_attrs) @@ -181,17 +171,16 @@ def archive(rgc, registry_paths, force, remove, cfg_path, genomes_desc): for tag_name in tags if isinstance(tags, list) else [tags]: if not rgc.is_asset_complete(genome, asset_name, tag_name): raise MissingConfigDataError( - "Asset '{}/{}:{}' is incomplete. This probably means an" - " attempt to archive a partially pulled parent. " - "refgenieserver archive requires all assets to be built" - " prior to archiving.".format(genome, asset_name, tag_name) + f"Asset '{genome}/{asset_name}:{tag_name}' is incomplete. This probably means an" + f" attempt to archive a partially pulled parent. " + f"refgenieserver archive requires all assets to be built" + f" prior to archiving." ) file_name = rgc[CFG_GENOMES_KEY][genome][CFG_ASSETS_KEY][asset_name][ CFG_ASSET_TAGS_KEY ][tag_name][CFG_ASSET_PATH_KEY] - target_file = os.path.join( - target_dir, "{}__{}".format(asset_name, tag_name) + ".tgz" - ) + target_file_core = os.path.join(target_dir, f"{asset_name}__{tag_name}") + target_file = f"{target_file_core}.tgz" input_file = os.path.join(genome_dir, file_name, tag_name) # these attributes have to be read from the original RefGenConf in case the archiver just increments # an existing server RefGenConf @@ -209,11 +198,10 @@ def archive(rgc, registry_paths, force, remove, cfg_path, genomes_desc): ][tag_name].setdefault(CFG_ASSET_CHECKSUM_KEY, None) if not os.path.exists(target_file) or force: _LOGGER.info( - "Creating archive '{}' from '{}' asset".format( - target_file, input_file - ) + f"Creating archive '{target_file}' from '{input_file}' asset" ) try: + _copy_asset_dir(input_file, target_file_core) _check_tgz(input_file, target_file) _copy_recipe(input_file, target_dir, asset_name, tag_name) _copy_log(input_file, target_dir, asset_name, tag_name) @@ -232,9 +220,7 @@ def archive(rgc, registry_paths, force, remove, cfg_path, genomes_desc): continue else: _LOGGER.info( - "Updating '{}/{}:{}' tag attributes...".format( - genome, asset_name, tag_name - ) + f"Updating '{genome}/{asset_name}:{tag_name}' tag attributes..." ) tag_attrs = { CFG_ASSET_PATH_KEY: file_name, @@ -260,15 +246,14 @@ def archive(rgc, registry_paths, force, remove, cfg_path, genomes_desc): tag_attrs.update( {CFG_LEGACY_ARCHIVE_CHECKSUM_KEY: legacy_digest} ) - _LOGGER.debug("attr dict: {}".format(tag_attrs)) + _LOGGER.debug(f"attr dict: {tag_attrs}") with rgc_server as r: for parent in parents: - # here we update any pre-existing parents' children attr with the newly added asset + # here we update any pre-existing parents' children + # attr with the newly added asset _LOGGER.debug( - "Updating {} children list with {}".format( - parent, - "{}/{}:{}".format(genome, asset_name, tag_name), - ) + f"Updating {parent} children list with " + f"{genome}/{asset_name}:{tag_name}" ) rp = parse_registry_path(parent) parent_genome = rp["namespace"] @@ -283,22 +268,20 @@ def archive(rgc, registry_paths, force, remove, cfg_path, genomes_desc): ) except RefgenconfError: _LOGGER.warning( - "'{}/{}:{}'s parent '{}' does not exist, " - "skipping relationship updates".format( - genome, asset_name, tag_name, parent - ) + f"'{genome}/{asset_name}:{tag_name}'s parent " + f"'{parent}' does not exist, skipping relationship updates" ) continue r.update_relatives_assets( parent_genome, parent_asset, parent_tag, - ["{}/{}:{}".format(genome, asset_name, tag_name)], + [f"{genome}/{asset_name}:{tag_name}"], children=True, ) r.update_tags(genome, asset_name, tag_name, tag_attrs) else: - exists_msg = "'{}' exists.".format(target_file) + exists_msg = f"'{target_file}' exists." try: rgc_server[CFG_GENOMES_KEY][genome][CFG_ASSETS_KEY][asset_name][ CFG_ASSET_TAGS_KEY @@ -311,9 +294,7 @@ def archive(rgc, registry_paths, force, remove, cfg_path, genomes_desc): r.update_tags(genome, asset_name, tag_name, tag_attrs) counter += 1 - _LOGGER.info( - "Builder finished; server config file saved to {}".format(rgc_server.file_path) - ) + _LOGGER.info(f"Builder finished; server config file saved: {rgc_server.file_path}") def _check_tgz(path, output): @@ -335,10 +316,10 @@ def _check_tgz(path, output): else "-cvzf {o} {tn}" ) command = cmd.format(p=pth, o=output, tn=tag_name) - _LOGGER.info("command: {}".format(command)) + _LOGGER.info(f"command: {command}") run(command, shell=True) else: - raise OSError("Entity '{}' does not exist".format(path)) + raise OSError(f"Entity '{path}' does not exist") def _check_tgz_legacy(path, output, asset_name, genome_name, alias): @@ -374,10 +355,10 @@ def _check_tgz_legacy(path, output, asset_name, genome_name, alias): # remove the new dir cmd += "rm -r {p}/{an}" command = cmd.format(p=path, oa=aliased_output, an=asset_name) - _LOGGER.debug("command: {}".format(command)) + _LOGGER.debug(f"command: {command}") run(command, shell=True) else: - raise OSError("Entity '{}' does not exist".format(path)) + raise OSError(f"Entity '{path}' does not exist") def _copy_log(input_dir, target_dir, asset_name, tag_name): @@ -387,7 +368,7 @@ def _copy_log(input_dir, target_dir, asset_name, tag_name): :param str input_dir: path to the directory to copy the recipe from :param str target_dir: path to the directory to copy the recipe to """ - log_path = "{}/{}/{}".format(input_dir, BUILD_STATS_DIR, ORI_LOG_NAME) + log_path = f"{input_dir}/{BUILD_STATS_DIR}/{ORI_LOG_NAME}" if log_path and os.path.exists(log_path): run( "cp " @@ -396,9 +377,26 @@ def _copy_log(input_dir, target_dir, asset_name, tag_name): + os.path.join(target_dir, TEMPLATE_LOG.format(asset_name, tag_name)), shell=True, ) - _LOGGER.debug("Log copied to: {}".format(target_dir)) + _LOGGER.debug(f"Log copied to: {target_dir}") else: - _LOGGER.warning("Log not found: {}".format(log_path)) + _LOGGER.warning(f"Log not found: {log_path}") + + +def _copy_asset_dir(input_dir, target_dir): + """ + Copy the asset directory + + :param str input_dir: path to the directory to copy the asset dir from + :param str target_dir: path to the directory to copy the asset dir to + """ + if input_dir and os.path.exists(input_dir): + run( + f"rsync -rvL --exclude '_refgenie_build' {input_dir}/ {target_dir}/", + shell=True, + ) + _LOGGER.info(f"Asset directory copied to: {target_dir}") + else: + _LOGGER.warning(f"Asset directory not found: {input_dir}") def _copy_recipe(input_dir, target_dir, asset_name, tag_name): @@ -410,14 +408,13 @@ def _copy_recipe(input_dir, target_dir, asset_name, tag_name): :param str asset_name: asset name :param str tag_name: tag name """ - recipe_path = "{}/{}/{}".format( - input_dir, BUILD_STATS_DIR, TEMPLATE_RECIPE_JSON.format(asset_name, tag_name) - ) + recipe_path = \ + f"{input_dir}/{BUILD_STATS_DIR}/{TEMPLATE_RECIPE_JSON.format(asset_name, tag_name)}" if recipe_path and os.path.exists(recipe_path): run("cp " + recipe_path + " " + target_dir, shell=True) - _LOGGER.debug("Recipe copied to: {}".format(target_dir)) + _LOGGER.debug(f"Recipe copied to: {target_dir}") else: - _LOGGER.warning("Recipe not found: {}".format(recipe_path)) + _LOGGER.warning(f"Recipe not found: {recipe_path}") def _remove_archive(rgc, registry_paths, cfg_archive_folder_key=CFG_ARCHIVE_KEY): @@ -426,7 +423,8 @@ def _remove_archive(rgc, registry_paths, cfg_archive_folder_key=CFG_ARCHIVE_KEY) :param refgenconf.RefGenConf rgc: object to remove the entries from :param list[dict] registry_paths: entries to remove - :param str cfg_archive_folder_key: configuration archive folder key in the genome configuration file + :param str cfg_archive_folder_key: configuration archive folder key in the genome + configuration file :return list[str]: removed file paths """ ret = [] @@ -445,20 +443,18 @@ def _remove_archive(rgc, registry_paths, cfg_archive_folder_key=CFG_ARCHIVE_KEY) else: rgc.cfg_remove_assets(genome, asset, tag) _LOGGER.info( - "{}/{}{} removed".format(genome, asset, ":" + tag if tag else "") + f"{genome}/{asset}{':' + tag if tag else ''} removed" ) except KeyError: _LOGGER.warning( - "{}/{}{} not found and not removed.".format( - genome, asset, ":" + tag if tag else "" - ) + f"{genome}/{asset}{':' + tag if tag else ''} not found and not removed" ) continue ret.append( os.path.join( rgc[cfg_archive_folder_key], genome, - "{}__{}".format(asset or "*", tag or "*") + ".tgz", + f"{asset or '*'}__{tag or '*'}.tgz", ) ) for p in ret: @@ -467,10 +463,10 @@ def _remove_archive(rgc, registry_paths, cfg_archive_folder_key=CFG_ARCHIVE_KEY) try: os.remove(path) except FileNotFoundError: - _LOGGER.warning("File does not exist: {}".format(path)) + _LOGGER.warning(f"File does not exist: {path}") try: os.removedirs(os.path.join(rgc[cfg_archive_folder_key], genome)) - _LOGGER.info("Removed empty genome directory: {}".format(genome)) + _LOGGER.info(f"Removed empty genome directory: {genome}") except OSError: pass return ret From 9c2eb4573c32b21f0f18c6029ea7cde399336221 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Thu, 18 Mar 2021 14:26:43 -0400 Subject: [PATCH 03/44] start working on #100 --- refgenieserver/const.py | 2 +- refgenieserver/helpers.py | 1 + refgenieserver/routers/private.py | 1 - refgenieserver/routers/version3.py | 45 ++++++++++++++++++++++++++++++ 4 files changed, 47 insertions(+), 2 deletions(-) diff --git a/refgenieserver/const.py b/refgenieserver/const.py index 4a1dd50..04c3cbd 100644 --- a/refgenieserver/const.py +++ b/refgenieserver/const.py @@ -13,7 +13,7 @@ PKG_NAME = "refgenieserver" DEFAULT_PORT = 80 BASE_DIR = "/genomes" -# if running outside of the Docker container 'BASE_DIR' can be replaced with rgc[CFG_ARCHIVE_KEY] in 'main.py' +# if running outside of the Docker container 'BASE_DIR' can be replaced with rgc[CFG_ARCHIVE_KEY] TEMPLATES_DIRNAME = "templates" TEMPLATES_PATH = os.path.join( os.path.dirname(os.path.abspath(__file__)), TEMPLATES_DIRNAME diff --git a/refgenieserver/helpers.py b/refgenieserver/helpers.py index 4bbe7ee..83ff83c 100644 --- a/refgenieserver/helpers.py +++ b/refgenieserver/helpers.py @@ -154,6 +154,7 @@ def get_datapath_for_genome(rgc, fill_dict, pth_templ="{base}/{genome}/{file_nam ), "Only the these keys are allowed in the fill_dict: {}".format(req_keys) remote = False fill_dict.update({"base": BASE_DIR}) + # fill_dict.update({"base": rgc["genome_archive_folder"]}) if CFG_REMOTE_URL_BASE_KEY in rgc and rgc[CFG_REMOTE_URL_BASE_KEY] is not None: fill_dict["base"] = rgc[CFG_REMOTE_URL_BASE_KEY].rstrip("/") remote = True diff --git a/refgenieserver/routers/private.py b/refgenieserver/routers/private.py index 31990d6..3ed511b 100644 --- a/refgenieserver/routers/private.py +++ b/refgenieserver/routers/private.py @@ -10,7 +10,6 @@ "/genomes/dict", operation_id=PRIVATE_API + API_ID_GENOMES_DICT, response_model=Dict[str, Genome], - include_in_schema=False, ) async def get_genomes_dict(): """ diff --git a/refgenieserver/routers/version3.py b/refgenieserver/routers/version3.py index c31377d..60e7482 100644 --- a/refgenieserver/routers/version3.py +++ b/refgenieserver/routers/version3.py @@ -50,6 +50,12 @@ regex=r"^\S+$", example=ex_asset, ) +s = Path( + ..., + description="Seek key name", + regex=r"^\S+$", + example=ex_asset, +) t = Path( ..., description="Tag name", @@ -220,6 +226,45 @@ async def download_asset(genome: str = g, asset: str = a, tag: Optional[str] = t raise HTTPException(status_code=404, detail=msg) +@router.get( + "/assets/asset_file/{genome}/{asset}/{seek_key}", + operation_id=API_VERSION + "customAssetFile", + tags=api_version_tags, +) +async def download_asset_file( + genome: str = g, asset: str = a, seek_key: str = s, tag: Optional[str] = tq +): + """ + Returns an archive. Requires the genome name and the asset name as an input. + + Optionally, 'tag' query parameter can be specified to get a tagged asset archive. + Default tag is returned otherwise. + """ + tag = tag or rgc.get_default_tag( + genome, asset + ) # returns 'default' for nonexistent genome/asset; no need to catch + seek_key_target = rgc.genomes[genome][CFG_ASSETS_KEY][asset][CFG_ASSET_TAGS_KEY][ + tag + ][CFG_SEEK_KEYS_KEY][seek_key] + file_name = f"{asset}__{tag}/{seek_key_target}" + path, remote = get_datapath_for_genome( + rgc, dict(genome=genome, file_name=file_name) + ) + _LOGGER.info(f"file source: {path}") + if remote: + _LOGGER.info(f"redirecting to URL: '{path}'") + return RedirectResponse(path) + _LOGGER.info(f"serving asset file: '{path}'") + if os.path.isfile(path): + return FileResponse( + path, filename=file_name, media_type="application/octet-stream" + ) + else: + msg = MSG_404.format(f"asset ({asset})") + _LOGGER.warning(msg) + raise HTTPException(status_code=404, detail=msg) + + @router.get( "/assets/default_tag/{genome}/{asset}", operation_id=API_VERSION + API_ID_DEFAULT_TAG, From 6699ea536aa89b589785fcf04a9f635d6af9b80c Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Thu, 18 Mar 2021 17:19:41 -0400 Subject: [PATCH 04/44] add asset_file and asset_file_path serving endpoints --- dir2cfg_template.py | 12 ++---- refgenieserver/__init__.py | 4 +- refgenieserver/__main__.py | 1 + refgenieserver/const.py | 6 ++- refgenieserver/data_models.py | 3 +- refgenieserver/helpers.py | 42 ++++++++++++++++-- refgenieserver/main.py | 18 ++++---- refgenieserver/routers/private.py | 7 +-- refgenieserver/routers/version1.py | 12 +++--- refgenieserver/routers/version2.py | 14 +++--- refgenieserver/routers/version3.py | 68 +++++++++++++++++------------- refgenieserver/server_builder.py | 25 ++++------- setup.py | 3 +- 13 files changed, 127 insertions(+), 88 deletions(-) diff --git a/dir2cfg_template.py b/dir2cfg_template.py index eefec9d..ab0746f 100755 --- a/dir2cfg_template.py +++ b/dir2cfg_template.py @@ -1,16 +1,12 @@ V = 0.1 SERVER_CFG_NAME = "refgenieserver_config.yaml" +import argparse import os import sys + import yaml -import argparse -from refgenconf import ( - CFG_GENOMES_KEY, - CFG_SERVER_KEY, - CFG_FOLDER_KEY, - CFG_ARCHIVE_KEY, - CFG_ASSET_PATH_KEY, -) +from refgenconf import (CFG_ARCHIVE_KEY, CFG_ASSET_PATH_KEY, CFG_FOLDER_KEY, + CFG_GENOMES_KEY, CFG_SERVER_KEY) class _VersionInHelpParser(argparse.ArgumentParser): diff --git a/refgenieserver/__init__.py b/refgenieserver/__init__.py index 593f8e1..b898684 100644 --- a/refgenieserver/__init__.py +++ b/refgenieserver/__init__.py @@ -1,4 +1,4 @@ -from .helpers import * -from .server_builder import * from .const import * +from .helpers import * from .main import * +from .server_builder import * diff --git a/refgenieserver/__main__.py b/refgenieserver/__main__.py index 2e4396a..3ea99ba 100644 --- a/refgenieserver/__main__.py +++ b/refgenieserver/__main__.py @@ -1,4 +1,5 @@ import sys + from .main import main if __name__ == "__main__": diff --git a/refgenieserver/const.py b/refgenieserver/const.py index 04c3cbd..c7f6857 100644 --- a/refgenieserver/const.py +++ b/refgenieserver/const.py @@ -1,9 +1,11 @@ """ Package constants """ import os -from refgenconf.const import * +from platform import python_version + from refgenconf._version import __version__ as rgc_v +from refgenconf.const import * + from ._version import __version__ as server_v -from platform import python_version ALL_VERSIONS = { "server_version": server_v, diff --git a/refgenieserver/data_models.py b/refgenieserver/data_models.py index d92e2dd..8b2243b 100644 --- a/refgenieserver/data_models.py +++ b/refgenieserver/data_models.py @@ -1,5 +1,6 @@ +from typing import Dict, List + from pydantic import BaseModel -from typing import List, Dict class Tag(BaseModel): diff --git a/refgenieserver/helpers.py b/refgenieserver/helpers.py index 83ff83c..8cf0182 100644 --- a/refgenieserver/helpers.py +++ b/refgenieserver/helpers.py @@ -1,10 +1,13 @@ import logging -from .const import * -from ._version import __version__ as v -from yacman import get_first_env_var +from string import Formatter + +from fastapi import HTTPException +from refgenconf.exceptions import RefgenconfError from ubiquerg import VersionInHelpParser +from yacman import get_first_env_var -from string import Formatter +from ._version import __version__ as v +from .const import * global _LOGGER _LOGGER = logging.getLogger(PKG_NAME) @@ -203,3 +206,34 @@ def safely_get_example(rgc, entity, rgc_method, default, **kwargs): f"Failed to create {entity} example! Using '{default}', which might not exist" ) return default + + +def create_asset_file_path(rgc, genome, asset, tag, seek_key): + """ + Construct a path to an unarchived asset file + + :param str genome: + :param str asset: + :param str tag: + """ + tag = tag or rgc.get_default_tag( + genome, asset + ) # returns 'default' for nonexistent genome/asset; no need to catch + try: + rgc._assert_gat_exists(gname=genome, aname=asset, tname=tag) + except RefgenconfError: + msg = MSG_404.format(f"asset ({genome}/{asset}:{tag})") + _LOGGER.warning(msg) + raise HTTPException(status_code=404, detail=msg) + tag_dict = rgc.genomes[genome][CFG_ASSETS_KEY][asset][CFG_ASSET_TAGS_KEY][tag] + if seek_key not in tag_dict[CFG_SEEK_KEYS_KEY]: + msg = MSG_404.format(f"seek_key ({genome}/{asset}.{seek_key}:{tag})") + _LOGGER.warning(msg) + raise HTTPException(status_code=404, detail=msg) + seek_key_target = tag_dict[CFG_SEEK_KEYS_KEY][seek_key] + file_name = f"{asset}__{tag}/{seek_key_target}" + path, remote = get_datapath_for_genome( + rgc, dict(genome=genome, file_name=file_name) + ) + _LOGGER.info(f"serving asset file path: {path}") + return path diff --git a/refgenieserver/main.py b/refgenieserver/main.py index 6796dde..ab9608d 100644 --- a/refgenieserver/main.py +++ b/refgenieserver/main.py @@ -1,15 +1,17 @@ -from .const import * -from .helpers import build_parser, purge_nonservable -from .server_builder import archive -from refgenconf import RefGenConf, select_genome_config +import sys + +import logmuse +import uvicorn from fastapi import FastAPI +from refgenconf import RefGenConf, select_genome_config from starlette.staticfiles import StaticFiles from starlette.templating import Jinja2Templates -import logmuse -import sys -import uvicorn from ubiquerg import parse_registry_path +from .const import * +from .helpers import build_parser, purge_nonservable +from .server_builder import archive + app = FastAPI( title=PKG_NAME, description="a web interface and RESTful API for reference genome assets", @@ -54,7 +56,7 @@ def main(): # the router imports need to be after the RefGenConf object is declared with rgc as r: purge_nonservable(r) - from .routers import version1, version2, version3, private + from .routers import private, version1, version2, version3 app.include_router(version3.router) app.include_router(version1.router, prefix="/v1") diff --git a/refgenieserver/routers/private.py b/refgenieserver/routers/private.py index 3ed511b..66a8096 100644 --- a/refgenieserver/routers/private.py +++ b/refgenieserver/routers/private.py @@ -1,8 +1,9 @@ -from ..const import * -from ..main import rgc, templates, _LOGGER, app -from ..data_models import Genome, Dict from fastapi import APIRouter +from ..const import * +from ..data_models import Dict, Genome +from ..main import _LOGGER, app, rgc, templates + router = APIRouter() diff --git a/refgenieserver/routers/version1.py b/refgenieserver/routers/version1.py index 152ae67..dba78fc 100644 --- a/refgenieserver/routers/version1.py +++ b/refgenieserver/routers/version1.py @@ -1,14 +1,14 @@ -from starlette.responses import FileResponse, RedirectResponse -from starlette.requests import Request -from fastapi import HTTPException, APIRouter from copy import copy +from fastapi import APIRouter, HTTPException from refgenconf.helpers import replace_str_in_obj +from starlette.requests import Request +from starlette.responses import FileResponse, RedirectResponse from ..const import * -from ..helpers import preprocess_attrs -from ..main import rgc, templates, _LOGGER, app -from ..helpers import get_openapi_version, get_datapath_for_genome +from ..helpers import (get_datapath_for_genome, get_openapi_version, + preprocess_attrs) +from ..main import _LOGGER, app, rgc, templates router = APIRouter() diff --git a/refgenieserver/routers/version2.py b/refgenieserver/routers/version2.py index 2313c08..afcea4f 100644 --- a/refgenieserver/routers/version2.py +++ b/refgenieserver/routers/version2.py @@ -1,15 +1,15 @@ -from starlette.responses import FileResponse, JSONResponse, RedirectResponse -from starlette.requests import Request -from fastapi import HTTPException, APIRouter from copy import copy -from ubiquerg import parse_registry_path -from refgenconf.refgenconf import map_paths_by_id +from fastapi import APIRouter, HTTPException from refgenconf.helpers import replace_str_in_obj +from refgenconf.refgenconf import map_paths_by_id +from starlette.requests import Request +from starlette.responses import FileResponse, JSONResponse, RedirectResponse +from ubiquerg import parse_registry_path from ..const import * -from ..main import rgc, templates, _LOGGER, app -from ..helpers import get_openapi_version, get_datapath_for_genome +from ..helpers import get_datapath_for_genome, get_openapi_version +from ..main import _LOGGER, app, rgc, templates router = APIRouter() diff --git a/refgenieserver/routers/version3.py b/refgenieserver/routers/version3.py index 60e7482..8bc76ab 100644 --- a/refgenieserver/routers/version3.py +++ b/refgenieserver/routers/version3.py @@ -1,18 +1,19 @@ -from starlette.responses import FileResponse, JSONResponse, RedirectResponse -from starlette.requests import Request -from fastapi import HTTPException, APIRouter, Path, Query -from typing import Optional from copy import copy +from typing import Optional - -from ubiquerg import parse_registry_path +from fastapi import APIRouter, HTTPException, Path, Query +from refgenconf.exceptions import RefgenconfError from refgenconf.refgenconf import map_paths_by_id -from yacman import UndefinedAliasError, IK +from starlette.requests import Request +from starlette.responses import FileResponse, JSONResponse, RedirectResponse +from ubiquerg import parse_registry_path +from yacman import IK, UndefinedAliasError from ..const import * -from ..main import rgc, templates, _LOGGER, app -from ..helpers import get_openapi_version, get_datapath_for_genome, safely_get_example -from ..data_models import Tag, Genome, Dict, List +from ..data_models import Dict, Genome, List, Tag +from ..helpers import (create_asset_file_path, get_datapath_for_genome, + get_openapi_version, safely_get_example) +from ..main import _LOGGER, app, rgc, templates ex_alias = safely_get_example( rgc, @@ -235,36 +236,43 @@ async def download_asset_file( genome: str = g, asset: str = a, seek_key: str = s, tag: Optional[str] = tq ): """ - Returns an archive. Requires the genome name and the asset name as an input. + Returns the unarchived asset file. + Requires a genome name, an asset name and a seek_key name as an input. - Optionally, 'tag' query parameter can be specified to get a tagged asset archive. + Optionally, 'tag' query parameter can be specified to get a tagged asset file. Default tag is returned otherwise. """ - tag = tag or rgc.get_default_tag( - genome, asset - ) # returns 'default' for nonexistent genome/asset; no need to catch - seek_key_target = rgc.genomes[genome][CFG_ASSETS_KEY][asset][CFG_ASSET_TAGS_KEY][ - tag - ][CFG_SEEK_KEYS_KEY][seek_key] - file_name = f"{asset}__{tag}/{seek_key_target}" - path, remote = get_datapath_for_genome( - rgc, dict(genome=genome, file_name=file_name) - ) - _LOGGER.info(f"file source: {path}") - if remote: - _LOGGER.info(f"redirecting to URL: '{path}'") - return RedirectResponse(path) - _LOGGER.info(f"serving asset file: '{path}'") + path = create_asset_file_path(rgc, genome, asset, tag, seek_key) + remote = False + if CFG_REMOTE_URL_BASE_KEY in rgc and rgc[CFG_REMOTE_URL_BASE_KEY] is not None: + remote = True if os.path.isfile(path): - return FileResponse( - path, filename=file_name, media_type="application/octet-stream" - ) + return FileResponse(path, media_type="application/octet-stream") else: msg = MSG_404.format(f"asset ({asset})") _LOGGER.warning(msg) raise HTTPException(status_code=404, detail=msg) +@router.get( + "/assets/asset_file_path/{genome}/{asset}/{seek_key}", + operation_id=API_VERSION + "customAssetFilePath", + tags=api_version_tags, + response_model=str, +) +async def get_asset_file_path( + genome: str = g, asset: str = a, seek_key: str = s, tag: Optional[str] = tq +): + """ + Returns a path to the unarchived asset file. + Requires a genome name, an asset name and a seek_key name as an input. + + Optionally, 'tag' query parameter can be specified to get a tagged asset file path. + Default tag is returned otherwise. + """ + return create_asset_file_path(rgc, genome, asset, tag, seek_key) + + @router.get( "/assets/default_tag/{genome}/{asset}", operation_id=API_VERSION + API_ID_DEFAULT_TAG, diff --git a/refgenieserver/server_builder.py b/refgenieserver/server_builder.py index 408296e..3862b34 100644 --- a/refgenieserver/server_builder.py +++ b/refgenieserver/server_builder.py @@ -1,19 +1,15 @@ -import sys import logging - +import sys from glob import glob from subprocess import run -from refgenconf import RefGenConf -from refgenconf.exceptions import ( - RefgenconfError, - ConfigNotCompliantError, - GenomeConfigFormatError, - MissingConfigDataError, -) -from refgenconf.helpers import swap_names_in_tree, replace_str_in_obj from attmap import PathExAttMap as PXAM -from ubiquerg import checksum, size, is_command_callable, parse_registry_path +from refgenconf import RefGenConf +from refgenconf.exceptions import (ConfigNotCompliantError, + GenomeConfigFormatError, + MissingConfigDataError, RefgenconfError) +from refgenconf.helpers import replace_str_in_obj, swap_names_in_tree +from ubiquerg import checksum, is_command_callable, parse_registry_path, size from .const import * @@ -408,8 +404,7 @@ def _copy_recipe(input_dir, target_dir, asset_name, tag_name): :param str asset_name: asset name :param str tag_name: tag name """ - recipe_path = \ - f"{input_dir}/{BUILD_STATS_DIR}/{TEMPLATE_RECIPE_JSON.format(asset_name, tag_name)}" + recipe_path = f"{input_dir}/{BUILD_STATS_DIR}/{TEMPLATE_RECIPE_JSON.format(asset_name, tag_name)}" if recipe_path and os.path.exists(recipe_path): run("cp " + recipe_path + " " + target_dir, shell=True) _LOGGER.debug(f"Recipe copied to: {target_dir}") @@ -442,9 +437,7 @@ def _remove_archive(rgc, registry_paths, cfg_archive_folder_key=CFG_ARCHIVE_KEY) ] else: rgc.cfg_remove_assets(genome, asset, tag) - _LOGGER.info( - f"{genome}/{asset}{':' + tag if tag else ''} removed" - ) + _LOGGER.info(f"{genome}/{asset}{':' + tag if tag else ''} removed") except KeyError: _LOGGER.warning( f"{genome}/{asset}{':' + tag if tag else ''} not found and not removed" diff --git a/setup.py b/setup.py index 1e06c7e..c618823 100644 --- a/setup.py +++ b/setup.py @@ -1,8 +1,9 @@ #! /usr/bin/env python -from setuptools import setup import sys +from setuptools import setup + PACKAGE = "refgenieserver" # Additional keyword arguments for setup(). From 123fc98dfd165abd980f27c7c35392638d8642a7 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Thu, 18 Mar 2021 17:27:30 -0400 Subject: [PATCH 05/44] handle not a local file case in asset file serving --- dir2cfg_template.py | 9 +++++++-- refgenieserver/routers/version1.py | 3 +-- refgenieserver/routers/version3.py | 30 ++++++++++++++++++++---------- refgenieserver/server_builder.py | 9 ++++++--- 4 files changed, 34 insertions(+), 17 deletions(-) diff --git a/dir2cfg_template.py b/dir2cfg_template.py index ab0746f..21ed3eb 100755 --- a/dir2cfg_template.py +++ b/dir2cfg_template.py @@ -5,8 +5,13 @@ import sys import yaml -from refgenconf import (CFG_ARCHIVE_KEY, CFG_ASSET_PATH_KEY, CFG_FOLDER_KEY, - CFG_GENOMES_KEY, CFG_SERVER_KEY) +from refgenconf import ( + CFG_ARCHIVE_KEY, + CFG_ASSET_PATH_KEY, + CFG_FOLDER_KEY, + CFG_GENOMES_KEY, + CFG_SERVER_KEY, +) class _VersionInHelpParser(argparse.ArgumentParser): diff --git a/refgenieserver/routers/version1.py b/refgenieserver/routers/version1.py index dba78fc..a1a977a 100644 --- a/refgenieserver/routers/version1.py +++ b/refgenieserver/routers/version1.py @@ -6,8 +6,7 @@ from starlette.responses import FileResponse, RedirectResponse from ..const import * -from ..helpers import (get_datapath_for_genome, get_openapi_version, - preprocess_attrs) +from ..helpers import get_datapath_for_genome, get_openapi_version, preprocess_attrs from ..main import _LOGGER, app, rgc, templates router = APIRouter() diff --git a/refgenieserver/routers/version3.py b/refgenieserver/routers/version3.py index 8bc76ab..828312b 100644 --- a/refgenieserver/routers/version3.py +++ b/refgenieserver/routers/version3.py @@ -11,8 +11,12 @@ from ..const import * from ..data_models import Dict, Genome, List, Tag -from ..helpers import (create_asset_file_path, get_datapath_for_genome, - get_openapi_version, safely_get_example) +from ..helpers import ( + create_asset_file_path, + get_datapath_for_genome, + get_openapi_version, + safely_get_example, +) from ..main import _LOGGER, app, rgc, templates ex_alias = safely_get_example( @@ -243,15 +247,21 @@ async def download_asset_file( Default tag is returned otherwise. """ path = create_asset_file_path(rgc, genome, asset, tag, seek_key) - remote = False - if CFG_REMOTE_URL_BASE_KEY in rgc and rgc[CFG_REMOTE_URL_BASE_KEY] is not None: - remote = True - if os.path.isfile(path): - return FileResponse(path, media_type="application/octet-stream") + remote = ( + True + if CFG_REMOTE_URL_BASE_KEY in rgc and rgc[CFG_REMOTE_URL_BASE_KEY] is not None + else False + ) + if not remote: + if os.path.isfile(path): + return FileResponse(path, media_type="application/octet-stream") + else: + msg = f"The target of the selected seek_key ({seek_key}) is not a file" + _LOGGER.warning(msg) + raise HTTPException(status_code=404, detail=msg) else: - msg = MSG_404.format(f"asset ({asset})") - _LOGGER.warning(msg) - raise HTTPException(status_code=404, detail=msg) + _LOGGER.info(f"redirecting to URL: '{path}'") + return RedirectResponse(path) @router.get( diff --git a/refgenieserver/server_builder.py b/refgenieserver/server_builder.py index 3862b34..6173289 100644 --- a/refgenieserver/server_builder.py +++ b/refgenieserver/server_builder.py @@ -5,9 +5,12 @@ from attmap import PathExAttMap as PXAM from refgenconf import RefGenConf -from refgenconf.exceptions import (ConfigNotCompliantError, - GenomeConfigFormatError, - MissingConfigDataError, RefgenconfError) +from refgenconf.exceptions import ( + ConfigNotCompliantError, + GenomeConfigFormatError, + MissingConfigDataError, + RefgenconfError, +) from refgenconf.helpers import replace_str_in_obj, swap_names_in_tree from ubiquerg import checksum, is_command_callable, parse_registry_path, size From 7beb1c7847b75b439961521c83c2c7c77bc7e9f4 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Fri, 19 Mar 2021 09:28:30 -0400 Subject: [PATCH 06/44] DRYer code --- refgenieserver/helpers.py | 19 ++++++++++++++++--- refgenieserver/routers/version3.py | 8 ++------ 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/refgenieserver/helpers.py b/refgenieserver/helpers.py index 8cf0182..80a03fc 100644 --- a/refgenieserver/helpers.py +++ b/refgenieserver/helpers.py @@ -155,15 +155,28 @@ def get_datapath_for_genome(rgc, fill_dict, pth_templ="{base}/{genome}/{file_nam assert all( [k in req_keys for k in list(fill_dict.keys())] ), "Only the these keys are allowed in the fill_dict: {}".format(req_keys) - remote = False fill_dict.update({"base": BASE_DIR}) # fill_dict.update({"base": rgc["genome_archive_folder"]}) - if CFG_REMOTE_URL_BASE_KEY in rgc and rgc[CFG_REMOTE_URL_BASE_KEY] is not None: + remote = is_data_remote(rgc) + if remote: fill_dict["base"] = rgc[CFG_REMOTE_URL_BASE_KEY].rstrip("/") - remote = True return pth_templ.format(**fill_dict), remote +def is_data_remote(rgc): + """ + Determine if server genome config defines a remote_url_base key + + :param refgenconf.RefGenConf rgc: server genome config object + :return bool: whether remote data source is configured + """ + return ( + True + if CFG_REMOTE_URL_BASE_KEY in rgc and rgc[CFG_REMOTE_URL_BASE_KEY] is not None + else False + ) + + def purge_nonservable(rgc): """ Remove entries in RefGenConf object that were not processed by the archiver and should not be served diff --git a/refgenieserver/routers/version3.py b/refgenieserver/routers/version3.py index 828312b..baed459 100644 --- a/refgenieserver/routers/version3.py +++ b/refgenieserver/routers/version3.py @@ -16,6 +16,7 @@ get_datapath_for_genome, get_openapi_version, safely_get_example, + is_data_remote, ) from ..main import _LOGGER, app, rgc, templates @@ -247,12 +248,7 @@ async def download_asset_file( Default tag is returned otherwise. """ path = create_asset_file_path(rgc, genome, asset, tag, seek_key) - remote = ( - True - if CFG_REMOTE_URL_BASE_KEY in rgc and rgc[CFG_REMOTE_URL_BASE_KEY] is not None - else False - ) - if not remote: + if not is_data_remote(rgc): if os.path.isfile(path): return FileResponse(path, media_type="application/octet-stream") else: From 967e50d3195efe5cd283973b326e4f0a9d84e95f Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Fri, 19 Mar 2021 09:41:02 -0400 Subject: [PATCH 07/44] add dev refgenconf to staging dockerfile --- requirements/requirements-all.txt | 2 +- staging.Dockerfile | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index 7026040..1f2ba28 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -3,5 +3,5 @@ fastapi jinja2 logmuse>=0.2 uvicorn>=0.7.1 -refgenconf>=0.10.0 +# refgenconf>=0.11.0 ubiquerg>=0.6.1 \ No newline at end of file diff --git a/staging.Dockerfile b/staging.Dockerfile index eed51c2..7ab68ee 100644 --- a/staging.Dockerfile +++ b/staging.Dockerfile @@ -2,4 +2,5 @@ FROM tiangolo/uvicorn-gunicorn:python3.7-alpine3.8 LABEL authors="Nathan Sheffield, Michal Stolarczyk" COPY . /app +RUN pip install https://github.com/refgenie/refgenconf/archive/dev.zip RUN pip install . From cfd890e5558620cc98be1e74d920bacec3c657c2 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Fri, 19 Mar 2021 10:24:46 -0400 Subject: [PATCH 08/44] flip conditional, use operation IDs from refgenconf --- refgenieserver/routers/version3.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/refgenieserver/routers/version3.py b/refgenieserver/routers/version3.py index baed459..dbcee0c 100644 --- a/refgenieserver/routers/version3.py +++ b/refgenieserver/routers/version3.py @@ -234,7 +234,7 @@ async def download_asset(genome: str = g, asset: str = a, tag: Optional[str] = t @router.get( "/assets/asset_file/{genome}/{asset}/{seek_key}", - operation_id=API_VERSION + "customAssetFile", + operation_id=API_VERSION + API_ID_ASSET_FILE, tags=api_version_tags, ) async def download_asset_file( @@ -248,21 +248,26 @@ async def download_asset_file( Default tag is returned otherwise. """ path = create_asset_file_path(rgc, genome, asset, tag, seek_key) - if not is_data_remote(rgc): + + if is_data_remote(rgc): + _LOGGER.info(f"redirecting to URL: {path}") + return RedirectResponse(path) + else: if os.path.isfile(path): - return FileResponse(path, media_type="application/octet-stream") + return FileResponse( + path, + filename=os.path.basename(path), + media_type="application/octet-stream", + ) else: msg = f"The target of the selected seek_key ({seek_key}) is not a file" _LOGGER.warning(msg) raise HTTPException(status_code=404, detail=msg) - else: - _LOGGER.info(f"redirecting to URL: '{path}'") - return RedirectResponse(path) @router.get( "/assets/asset_file_path/{genome}/{asset}/{seek_key}", - operation_id=API_VERSION + "customAssetFilePath", + operation_id=API_VERSION + API_ID_ASSET_PATH, tags=api_version_tags, response_model=str, ) From cc4e722032204997f61af0348f63cfdd24d14234 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Fri, 19 Mar 2021 13:09:22 -0400 Subject: [PATCH 09/44] add newline --- refgenieserver/_version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/refgenieserver/_version.py b/refgenieserver/_version.py index d9f7dca..5a93ff5 100644 --- a/refgenieserver/_version.py +++ b/refgenieserver/_version.py @@ -1 +1 @@ -__version__ = "0.7.0-dev" \ No newline at end of file +__version__ = "0.7.0-dev" From 8b85f99409f3a98c8f9a1e7af7e4274dbc84f65b Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Fri, 19 Mar 2021 16:03:45 -0400 Subject: [PATCH 10/44] multiple changes (#110): - produce asset dir tree - add api endpoint to serve it - link the endpoint in asset splash page --- refgenieserver/helpers.py | 4 +-- refgenieserver/routers/version3.py | 39 +++++++++++++++++++++++++++++- refgenieserver/server_builder.py | 18 +++++++++++++- 3 files changed, 57 insertions(+), 4 deletions(-) diff --git a/refgenieserver/helpers.py b/refgenieserver/helpers.py index 80a03fc..527439f 100644 --- a/refgenieserver/helpers.py +++ b/refgenieserver/helpers.py @@ -155,8 +155,8 @@ def get_datapath_for_genome(rgc, fill_dict, pth_templ="{base}/{genome}/{file_nam assert all( [k in req_keys for k in list(fill_dict.keys())] ), "Only the these keys are allowed in the fill_dict: {}".format(req_keys) - fill_dict.update({"base": BASE_DIR}) - # fill_dict.update({"base": rgc["genome_archive_folder"]}) + # fill_dict.update({"base": BASE_DIR}) + fill_dict.update({"base": rgc["genome_archive_folder"]}) remote = is_data_remote(rgc) if remote: fill_dict["base"] = rgc[CFG_REMOTE_URL_BASE_KEY].rstrip("/") diff --git a/refgenieserver/routers/version3.py b/refgenieserver/routers/version3.py index dbcee0c..ba9657d 100644 --- a/refgenieserver/routers/version3.py +++ b/refgenieserver/routers/version3.py @@ -251,7 +251,7 @@ async def download_asset_file( if is_data_remote(rgc): _LOGGER.info(f"redirecting to URL: {path}") - return RedirectResponse(path) + return RedirectResponse(url=path, headers={'tontent-type': "application/octet-stream"}) else: if os.path.isfile(path): return FileResponse( @@ -411,6 +411,43 @@ async def download_asset_build_recipe( raise HTTPException(status_code=404, detail=msg) +@router.get( + "/assets/dir_tree/{genome}/{asset}", + operation_id=API_VERSION + API_ID_TREE, + tags=api_version_tags, +) +async def download_asset_directory_tree( + genome: str = g, asset: str = a, tag: Optional[str] = tq +): + """ + Returns a asset directory tree file. + Requires the genome name and the asset name as an input. + + Optionally, 'tag' query parameter can be specified to get a tagged asset archive. + Default tag is returned otherwise. + """ + # TODO: DRY + tag = tag or rgc.get_default_tag( + genome, asset + ) # returns 'default' for nonexistent genome/asset; no need to catch + file_name = TEMPLATE_ASSET_DIR_TREE.format(asset, tag) + path, remote = get_datapath_for_genome( + rgc, dict(genome=genome, file_name=file_name) + ) + if remote: + _LOGGER.info(f"redirecting to URL: '{path}'") + return RedirectResponse(path) + _LOGGER.info(f"serving asset dir tree file: '{path}'") + if os.path.isfile(path): + return FileResponse( + path, filename=file_name, media_type="application/octet-stream" + ) + else: + msg = MSG_404.format(f"asset ({asset})") + _LOGGER.warning(msg) + raise HTTPException(status_code=404, detail=msg) + + @router.get( "/assets/attrs/{genome}/{asset}", operation_id=API_VERSION + API_ID_ASSET_ATTRS, diff --git a/refgenieserver/server_builder.py b/refgenieserver/server_builder.py index 6173289..8c85708 100644 --- a/refgenieserver/server_builder.py +++ b/refgenieserver/server_builder.py @@ -201,10 +201,11 @@ def archive(rgc, registry_paths, force, remove, cfg_path, genomes_desc): ) try: _copy_asset_dir(input_file, target_file_core) + _create_asset_dir_tree(target_file_core, asset_name, tag_name) _check_tgz(input_file, target_file) _copy_recipe(input_file, target_dir, asset_name, tag_name) _copy_log(input_file, target_dir, asset_name, tag_name) - # TODO: remove the rest of the try block in the future + # TODO: remove the rest of this try block in the future _check_tgz_legacy( input_file, target_file, @@ -398,6 +399,21 @@ def _copy_asset_dir(input_dir, target_dir): _LOGGER.warning(f"Asset directory not found: {input_dir}") +def _create_asset_dir_tree(asset_dir, asset_name, tag_name): + """ + Create a file tree with contents of the unarchived asset directory + + :param str asset_dir: path to the asset directory to get the contents of + :param str asset_name: name of the asset + :param str tag_name: name of the tag + """ + asset_dir_tree_file_path = os.path.join( + os.path.dirname(asset_dir), TEMPLATE_ASSET_DIR_TREE.format(asset_name, tag_name) + ) + run(f"tree {asset_dir} | tail -n +2 > {asset_dir_tree_file_path}", shell=True) + _LOGGER.info(f"Asset directory tree created: {asset_dir_tree_file_path}") + + def _copy_recipe(input_dir, target_dir, asset_name, tag_name): """ Copy the recipe From aba56c21e2eb4118b744d3e392ba65285a18ba85 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Sun, 21 Mar 2021 16:25:40 -0400 Subject: [PATCH 11/44] use find instead of tree for asset dir contents --- refgenieserver/routers/version3.py | 6 +++--- refgenieserver/server_builder.py | 12 ++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/refgenieserver/routers/version3.py b/refgenieserver/routers/version3.py index ba9657d..ae6ecb1 100644 --- a/refgenieserver/routers/version3.py +++ b/refgenieserver/routers/version3.py @@ -413,10 +413,10 @@ async def download_asset_build_recipe( @router.get( "/assets/dir_tree/{genome}/{asset}", - operation_id=API_VERSION + API_ID_TREE, + operation_id=API_VERSION + API_ID_CONTENTS, tags=api_version_tags, ) -async def download_asset_directory_tree( +async def download_asset_directory_contents( genome: str = g, asset: str = a, tag: Optional[str] = tq ): """ @@ -430,7 +430,7 @@ async def download_asset_directory_tree( tag = tag or rgc.get_default_tag( genome, asset ) # returns 'default' for nonexistent genome/asset; no need to catch - file_name = TEMPLATE_ASSET_DIR_TREE.format(asset, tag) + file_name = TEMPLATE_ASSET_DIR_CONTENTS.format(asset, tag) path, remote = get_datapath_for_genome( rgc, dict(genome=genome, file_name=file_name) ) diff --git a/refgenieserver/server_builder.py b/refgenieserver/server_builder.py index 8c85708..0f2cf15 100644 --- a/refgenieserver/server_builder.py +++ b/refgenieserver/server_builder.py @@ -201,7 +201,7 @@ def archive(rgc, registry_paths, force, remove, cfg_path, genomes_desc): ) try: _copy_asset_dir(input_file, target_file_core) - _create_asset_dir_tree(target_file_core, asset_name, tag_name) + _get_asset_dir_contents(target_file_core, asset_name, tag_name) _check_tgz(input_file, target_file) _copy_recipe(input_file, target_dir, asset_name, tag_name) _copy_log(input_file, target_dir, asset_name, tag_name) @@ -399,7 +399,7 @@ def _copy_asset_dir(input_dir, target_dir): _LOGGER.warning(f"Asset directory not found: {input_dir}") -def _create_asset_dir_tree(asset_dir, asset_name, tag_name): +def _get_asset_dir_contents(asset_dir, asset_name, tag_name): """ Create a file tree with contents of the unarchived asset directory @@ -407,11 +407,11 @@ def _create_asset_dir_tree(asset_dir, asset_name, tag_name): :param str asset_name: name of the asset :param str tag_name: name of the tag """ - asset_dir_tree_file_path = os.path.join( - os.path.dirname(asset_dir), TEMPLATE_ASSET_DIR_TREE.format(asset_name, tag_name) + asset_dir_contents_file_path = os.path.join( + os.path.dirname(asset_dir), TEMPLATE_ASSET_DIR_CONTENTS.format(asset_name, tag_name) ) - run(f"tree {asset_dir} | tail -n +2 > {asset_dir_tree_file_path}", shell=True) - _LOGGER.info(f"Asset directory tree created: {asset_dir_tree_file_path}") + run(f"cd {asset_dir}; find . -type f > {asset_dir_contents_file_path}", shell=True) + _LOGGER.info(f"Asset directory contents file created: {asset_dir_contents_file_path}") def _copy_recipe(input_dir, target_dir, asset_name, tag_name): From 190fd86014b84710bb6fc0e12ef632c1a172b336 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Sun, 21 Mar 2021 17:06:56 -0400 Subject: [PATCH 12/44] cd to dir in legacy archive creation --- refgenieserver/routers/version3.py | 4 +++- refgenieserver/server_builder.py | 11 +++++++---- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/refgenieserver/routers/version3.py b/refgenieserver/routers/version3.py index ae6ecb1..62abfc8 100644 --- a/refgenieserver/routers/version3.py +++ b/refgenieserver/routers/version3.py @@ -251,7 +251,9 @@ async def download_asset_file( if is_data_remote(rgc): _LOGGER.info(f"redirecting to URL: {path}") - return RedirectResponse(url=path, headers={'tontent-type': "application/octet-stream"}) + return RedirectResponse( + url=path, headers={"tontent-type": "application/octet-stream"} + ) else: if os.path.isfile(path): return FileResponse( diff --git a/refgenieserver/server_builder.py b/refgenieserver/server_builder.py index 0f2cf15..b00aa59 100644 --- a/refgenieserver/server_builder.py +++ b/refgenieserver/server_builder.py @@ -348,12 +348,12 @@ def _check_tgz_legacy(path, output, asset_name, genome_name, alias): swap_names_in_tree(os.path.join(path, asset_name), a, genome_name) # tar gzip the new dir cmd = ( - "cd {p}; tar -cvf - {an} | pigz > {oa}; " + "cd {p}; " + "tar -cvf - {an} | pigz > {oa}; " if is_command_callable("pigz") else "tar -cvzf {oa} {an}; " ) # remove the new dir - cmd += "rm -r {p}/{an}" + cmd += "rm -r {p}/{an}; " command = cmd.format(p=path, oa=aliased_output, an=asset_name) _LOGGER.debug(f"command: {command}") run(command, shell=True) @@ -408,10 +408,13 @@ def _get_asset_dir_contents(asset_dir, asset_name, tag_name): :param str tag_name: name of the tag """ asset_dir_contents_file_path = os.path.join( - os.path.dirname(asset_dir), TEMPLATE_ASSET_DIR_CONTENTS.format(asset_name, tag_name) + os.path.dirname(asset_dir), + TEMPLATE_ASSET_DIR_CONTENTS.format(asset_name, tag_name), ) run(f"cd {asset_dir}; find . -type f > {asset_dir_contents_file_path}", shell=True) - _LOGGER.info(f"Asset directory contents file created: {asset_dir_contents_file_path}") + _LOGGER.info( + f"Asset directory contents file created: {asset_dir_contents_file_path}" + ) def _copy_recipe(input_dir, target_dir, asset_name, tag_name): From 16d289418f4e2269000072ae39c4dec9abd9d94a Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Sun, 21 Mar 2021 17:08:53 -0400 Subject: [PATCH 13/44] cd to dir in legacy archive creation --- refgenieserver/server_builder.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/refgenieserver/server_builder.py b/refgenieserver/server_builder.py index b00aa59..e98ee93 100644 --- a/refgenieserver/server_builder.py +++ b/refgenieserver/server_builder.py @@ -347,8 +347,8 @@ def _check_tgz_legacy(path, output, asset_name, genome_name, alias): run(command, shell=True) swap_names_in_tree(os.path.join(path, asset_name), a, genome_name) # tar gzip the new dir - cmd = ( - "cd {p}; " + "tar -cvf - {an} | pigz > {oa}; " + cmd = "cd {p}; " + ( + "tar -cvf - {an} | pigz > {oa}; " if is_command_callable("pigz") else "tar -cvzf {oa} {an}; " ) From 90070e671ff8d18bb940f947397d4038d423b8b8 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Sun, 21 Mar 2021 17:14:13 -0400 Subject: [PATCH 14/44] update endpoint path --- refgenieserver/routers/version3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/refgenieserver/routers/version3.py b/refgenieserver/routers/version3.py index 62abfc8..a11ceed 100644 --- a/refgenieserver/routers/version3.py +++ b/refgenieserver/routers/version3.py @@ -414,7 +414,7 @@ async def download_asset_build_recipe( @router.get( - "/assets/dir_tree/{genome}/{asset}", + "/assets/dir_contents/{genome}/{asset}", operation_id=API_VERSION + API_ID_CONTENTS, tags=api_version_tags, ) From 779a9257cf63f387019542859be59aec28d02200 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Sun, 21 Mar 2021 18:09:05 -0400 Subject: [PATCH 15/44] systematically keep copyright year up-to-date --- refgenieserver/routers/version3.py | 5 +++++ refgenieserver/templates/v3/base.html | 2 +- refgenieserver/templates/v3/footer.html | 2 +- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/refgenieserver/routers/version3.py b/refgenieserver/routers/version3.py index a11ceed..40f9c75 100644 --- a/refgenieserver/routers/version3.py +++ b/refgenieserver/routers/version3.py @@ -9,6 +9,7 @@ from ubiquerg import parse_registry_path from yacman import IK, UndefinedAliasError +from datetime import date from ..const import * from ..data_models import Dict, Genome, List, Tag from ..helpers import ( @@ -77,6 +78,7 @@ ) api_version_tags = [API3_ID] +current_year = date.today().year @router.get("/", tags=api_version_tags) @@ -93,6 +95,7 @@ async def index(request: Request): "rgc": rgc, "openapi_version": get_openapi_version(app), "columns": ["aliases", "digest", "description", "fasta asset", "# assets"], + "current_year": current_year, } return templates.TemplateResponse("v3/index.html", dict(templ_vars, **ALL_VERSIONS)) @@ -107,6 +110,7 @@ async def genome_splash_page(request: Request, genome: str = g): "genome": genome, "genome_dict": rgc[CFG_GENOMES_KEY][genome], "request": request, + "current_year": current_year, "columns": [ "download", "asset name:tag", @@ -144,6 +148,7 @@ async def asset_splash_page( "rgc": rgc, "prp": parse_registry_path, "links_dict": links_dict, + "current_year": current_year, "openapi_version": get_openapi_version(app), } _LOGGER.debug(f"merged vars: {dict(templ_vars, **ALL_VERSIONS)}") diff --git a/refgenieserver/templates/v3/base.html b/refgenieserver/templates/v3/base.html index 0a3f72d..1671aa9 100644 --- a/refgenieserver/templates/v3/base.html +++ b/refgenieserver/templates/v3/base.html @@ -23,7 +23,7 @@
- {% include "footer.html" %} + {% include "v3/footer.html" %}
\ No newline at end of file diff --git a/refgenieserver/templates/v3/footer.html b/refgenieserver/templates/v3/footer.html index fd50fac..7f4b52b 100644 --- a/refgenieserver/templates/v3/footer.html +++ b/refgenieserver/templates/v3/footer.html @@ -26,6 +26,6 @@ \ No newline at end of file From 612d1eb97687906c5913ac96290d0ac92f632b3b Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Mon, 22 Mar 2021 16:39:54 -0400 Subject: [PATCH 16/44] implement custom remote classes support; #100 --- refgenieserver/helpers.py | 41 +++++++++++++---- refgenieserver/routers/version1.py | 1 + refgenieserver/routers/version2.py | 3 ++ refgenieserver/routers/version3.py | 74 ++++++++++++------------------ 4 files changed, 66 insertions(+), 53 deletions(-) diff --git a/refgenieserver/helpers.py b/refgenieserver/helpers.py index 527439f..2e6f5d3 100644 --- a/refgenieserver/helpers.py +++ b/refgenieserver/helpers.py @@ -137,7 +137,9 @@ def get_openapi_version(app): return "3.0.2" -def get_datapath_for_genome(rgc, fill_dict, pth_templ="{base}/{genome}/{file_name}"): +def get_datapath_for_genome( + rgc, fill_dict, pth_templ="{base}/{genome}/{file_name}", remote_key=None +): """ Get the path to the data file to serve. @@ -154,25 +156,46 @@ def get_datapath_for_genome(rgc, fill_dict, pth_templ="{base}/{genome}/{file_nam req_keys = [i[1] for i in Formatter().parse(pth_templ) if i[1] is not None] assert all( [k in req_keys for k in list(fill_dict.keys())] - ), "Only the these keys are allowed in the fill_dict: {}".format(req_keys) - # fill_dict.update({"base": BASE_DIR}) - fill_dict.update({"base": rgc["genome_archive_folder"]}) + ), f"Only the these keys are allowed in the fill_dict: {req_keys}" + fill_dict.update({"base": BASE_DIR}) + # fill_dict.update({"base": rgc["genome_archive_folder"]}) remote = is_data_remote(rgc) if remote: - fill_dict["base"] = rgc[CFG_REMOTE_URL_BASE_KEY].rstrip("/") + if remote_key is None: + raise ValueError( + f"'remotes' key found in config; the 'remote_key' argument must " + f"be one of: {list(rgc['remotes'].keys())} " + ) + if remote_key not in rgc["remotes"]: + raise KeyError( + f"In remotes mapping the '{remote_key}' not found. " + f"Can't determine a data path prefix identified by this key." + ) + # at this point we know that the 'remotes' mapping has the 'remote_key' key + # and the value is a dict with 'prefix' key defined. + fill_dict["base"] = rgc["remotes"][remote_key]["prefix"].rstrip("/") return pth_templ.format(**fill_dict), remote def is_data_remote(rgc): """ - Determine if server genome config defines a remote_url_base key + Determine if server genome config defines a 'remotes' key, 'http is one of them and + additionally assert the correct structure -- 'prefix' key defined. :param refgenconf.RefGenConf rgc: server genome config object :return bool: whether remote data source is configured """ return ( True - if CFG_REMOTE_URL_BASE_KEY in rgc and rgc[CFG_REMOTE_URL_BASE_KEY] is not None + if "remotes" in rgc + and isinstance(rgc["remotes"], dict) + and "http" in rgc["remotes"] + and all( + [ + "prefix" in r and isinstance(r["prefix"], str) + for r in rgc["remotes"].values() + ] + ) else False ) @@ -221,7 +244,7 @@ def safely_get_example(rgc, entity, rgc_method, default, **kwargs): return default -def create_asset_file_path(rgc, genome, asset, tag, seek_key): +def create_asset_file_path(rgc, genome, asset, tag, seek_key, remote_key="http"): """ Construct a path to an unarchived asset file @@ -246,7 +269,7 @@ def create_asset_file_path(rgc, genome, asset, tag, seek_key): seek_key_target = tag_dict[CFG_SEEK_KEYS_KEY][seek_key] file_name = f"{asset}__{tag}/{seek_key_target}" path, remote = get_datapath_for_genome( - rgc, dict(genome=genome, file_name=file_name) + rgc, dict(genome=genome, file_name=file_name), remote_key=remote_key ) _LOGGER.info(f"serving asset file path: {path}") return path diff --git a/refgenieserver/routers/version1.py b/refgenieserver/routers/version1.py index a1a977a..bc123f3 100644 --- a/refgenieserver/routers/version1.py +++ b/refgenieserver/routers/version1.py @@ -68,6 +68,7 @@ async def download_asset(genome: str, asset: str, tag: str = None): genome=rgc.get_genome_alias(digest=genome, fallback=True), file_name=file_name, ), + remote_key="http", ) _LOGGER.info("file source: {}".format(path)) if remote: diff --git a/refgenieserver/routers/version2.py b/refgenieserver/routers/version2.py index afcea4f..ef69abf 100644 --- a/refgenieserver/routers/version2.py +++ b/refgenieserver/routers/version2.py @@ -100,6 +100,7 @@ async def download_asset(genome: str, asset: str, tag: str = None): genome=rgc.get_genome_alias(digest=genome, fallback=True), file_name=file_name, ), + remote_key="http", ) _LOGGER.info("file source: {}".format(path)) if remote: @@ -189,6 +190,7 @@ async def download_asset_build_log(genome: str, asset: str, tag: str = None): genome=rgc.get_genome_alias(digest=genome, fallback=True), file_name=file_name, ), + remote_key="http", ) if remote: _LOGGER.info("redirecting to URL: '{}'".format(path)) @@ -223,6 +225,7 @@ async def download_asset_build_recipe(genome: str, asset: str, tag: str = None): genome=rgc.get_genome_alias(digest=genome, fallback=True), file_name=file_name, ), + remote_key="http", ) if remote: _LOGGER.info("redirecting to URL: '{}'".format(path)) diff --git a/refgenieserver/routers/version3.py b/refgenieserver/routers/version3.py index 40f9c75..7dd344d 100644 --- a/refgenieserver/routers/version3.py +++ b/refgenieserver/routers/version3.py @@ -1,4 +1,6 @@ from copy import copy +from datetime import date +from enum import Enum from typing import Optional from fastapi import APIRouter, HTTPException, Path, Query @@ -9,18 +11,19 @@ from ubiquerg import parse_registry_path from yacman import IK, UndefinedAliasError -from datetime import date from ..const import * from ..data_models import Dict, Genome, List, Tag from ..helpers import ( create_asset_file_path, get_datapath_for_genome, get_openapi_version, - safely_get_example, is_data_remote, + safely_get_example, ) from ..main import _LOGGER, app, rgc, templates +RemoteClassEnum = Enum("RemoteClassEnum", {r: r for r in rgc["remotes"]}) + ex_alias = safely_get_example( rgc, "genome digest", @@ -100,6 +103,16 @@ async def index(request: Request): return templates.TemplateResponse("v3/index.html", dict(templ_vars, **ALL_VERSIONS)) +@router.get( + "/remotes/dict", tags=api_version_tags, response_model=Dict[str, Dict[str, str]] +) +async def genome_splash_page(): + """ + Returns the remotes section of the server configuration file + """ + return rgc["remotes"] if "remotes" in rgc else None + + @router.get("/genomes/splash/{genome}", tags=api_version_tags) async def genome_splash_page(request: Request, genome: str = g): """ @@ -220,7 +233,7 @@ async def download_asset(genome: str = g, asset: str = a, tag: Optional[str] = t ) # returns 'default' for nonexistent genome/asset; no need to catch file_name = f"{asset}__{tag}.tgz" path, remote = get_datapath_for_genome( - rgc, dict(genome=genome, file_name=file_name) + rgc, dict(genome=genome, file_name=file_name), remote_key="http" ) _LOGGER.info(f"file source: {path}") if remote: @@ -238,48 +251,19 @@ async def download_asset(genome: str = g, asset: str = a, tag: Optional[str] = t @router.get( - "/assets/asset_file/{genome}/{asset}/{seek_key}", - operation_id=API_VERSION + API_ID_ASSET_FILE, - tags=api_version_tags, -) -async def download_asset_file( - genome: str = g, asset: str = a, seek_key: str = s, tag: Optional[str] = tq -): - """ - Returns the unarchived asset file. - Requires a genome name, an asset name and a seek_key name as an input. - - Optionally, 'tag' query parameter can be specified to get a tagged asset file. - Default tag is returned otherwise. - """ - path = create_asset_file_path(rgc, genome, asset, tag, seek_key) - - if is_data_remote(rgc): - _LOGGER.info(f"redirecting to URL: {path}") - return RedirectResponse( - url=path, headers={"tontent-type": "application/octet-stream"} - ) - else: - if os.path.isfile(path): - return FileResponse( - path, - filename=os.path.basename(path), - media_type="application/octet-stream", - ) - else: - msg = f"The target of the selected seek_key ({seek_key}) is not a file" - _LOGGER.warning(msg) - raise HTTPException(status_code=404, detail=msg) - - -@router.get( - "/assets/asset_file_path/{genome}/{asset}/{seek_key}", + "/assets/file_path/{genome}/{asset}/{seek_key}", operation_id=API_VERSION + API_ID_ASSET_PATH, tags=api_version_tags, response_model=str, ) async def get_asset_file_path( - genome: str = g, asset: str = a, seek_key: str = s, tag: Optional[str] = tq + genome: str = g, + asset: str = a, + seek_key: str = s, + tag: Optional[str] = tq, + remoteClass: RemoteClassEnum = Query( + "html", description="Remote data provider class" + ), ): """ Returns a path to the unarchived asset file. @@ -288,7 +272,9 @@ async def get_asset_file_path( Optionally, 'tag' query parameter can be specified to get a tagged asset file path. Default tag is returned otherwise. """ - return create_asset_file_path(rgc, genome, asset, tag, seek_key) + return create_asset_file_path( + rgc, genome, asset, tag, seek_key, remote_key=remoteClass + ) @router.get( @@ -365,7 +351,7 @@ async def download_asset_build_log( ) # returns 'default' for nonexistent genome/asset; no need to catch file_name = TEMPLATE_LOG.format(asset, tag) path, remote = get_datapath_for_genome( - rgc, dict(genome=genome, file_name=file_name) + rgc, dict(genome=genome, file_name=file_name), remote_key="http" ) if remote: _LOGGER.info(f"redirecting to URL: '{path}'") @@ -400,7 +386,7 @@ async def download_asset_build_recipe( ) # returns 'default' for nonexistent genome/asset; no need to catch file_name = TEMPLATE_RECIPE_JSON.format(asset, tag) path, remote = get_datapath_for_genome( - rgc, dict(genome=genome, file_name=file_name) + rgc, dict(genome=genome, file_name=file_name), remote_key="http" ) if remote: _LOGGER.info(f"redirecting to URL: '{path}'") @@ -439,7 +425,7 @@ async def download_asset_directory_contents( ) # returns 'default' for nonexistent genome/asset; no need to catch file_name = TEMPLATE_ASSET_DIR_CONTENTS.format(asset, tag) path, remote = get_datapath_for_genome( - rgc, dict(genome=genome, file_name=file_name) + rgc, dict(genome=genome, file_name=file_name), remote_key="http" ) if remote: _LOGGER.info(f"redirecting to URL: '{path}'") From 0f8a7c791f9fa4d088a2a77b7878394ba7e333aa Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Mon, 22 Mar 2021 16:50:10 -0400 Subject: [PATCH 17/44] get the value of the custom data model --- refgenieserver/routers/version3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/refgenieserver/routers/version3.py b/refgenieserver/routers/version3.py index 7dd344d..3719852 100644 --- a/refgenieserver/routers/version3.py +++ b/refgenieserver/routers/version3.py @@ -273,7 +273,7 @@ async def get_asset_file_path( Default tag is returned otherwise. """ return create_asset_file_path( - rgc, genome, asset, tag, seek_key, remote_key=remoteClass + rgc, genome, asset, tag, seek_key, remote_key=remoteClass.value ) From fe5ad03fc687ec9bac9f43770314bf11286162e3 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Mon, 22 Mar 2021 16:58:22 -0400 Subject: [PATCH 18/44] update remotes endpoint name --- refgenieserver/routers/version3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/refgenieserver/routers/version3.py b/refgenieserver/routers/version3.py index 3719852..47c583c 100644 --- a/refgenieserver/routers/version3.py +++ b/refgenieserver/routers/version3.py @@ -106,7 +106,7 @@ async def index(request: Request): @router.get( "/remotes/dict", tags=api_version_tags, response_model=Dict[str, Dict[str, str]] ) -async def genome_splash_page(): +async def get_remotes_dict(): """ Returns the remotes section of the server configuration file """ From caccb2cd4905e2c057f8233261ed2d44e9307209 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Mon, 22 Mar 2021 17:02:05 -0400 Subject: [PATCH 19/44] update default remoteClass --- refgenieserver/routers/version3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/refgenieserver/routers/version3.py b/refgenieserver/routers/version3.py index 47c583c..60d8b55 100644 --- a/refgenieserver/routers/version3.py +++ b/refgenieserver/routers/version3.py @@ -262,7 +262,7 @@ async def get_asset_file_path( seek_key: str = s, tag: Optional[str] = tq, remoteClass: RemoteClassEnum = Query( - "html", description="Remote data provider class" + "http", description="Remote data provider class" ), ): """ From 0c7df1b5c0090457e27b8b42a873c0f04e74a848 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Mon, 22 Mar 2021 17:06:14 -0400 Subject: [PATCH 20/44] use custom Enum for default remoteClass value --- refgenieserver/routers/version3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/refgenieserver/routers/version3.py b/refgenieserver/routers/version3.py index 60d8b55..e68d8f5 100644 --- a/refgenieserver/routers/version3.py +++ b/refgenieserver/routers/version3.py @@ -262,7 +262,7 @@ async def get_asset_file_path( seek_key: str = s, tag: Optional[str] = tq, remoteClass: RemoteClassEnum = Query( - "http", description="Remote data provider class" + RemoteClassEnum.http, description="Remote data provider class" ), ): """ From 83fcd14beb77cfae2c08f91baf7bdf00cefa75a0 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Mon, 22 Mar 2021 17:32:55 -0400 Subject: [PATCH 21/44] document /assets/file_path endpoint --- refgenieserver/routers/version3.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/refgenieserver/routers/version3.py b/refgenieserver/routers/version3.py index e68d8f5..595b523 100644 --- a/refgenieserver/routers/version3.py +++ b/refgenieserver/routers/version3.py @@ -269,8 +269,10 @@ async def get_asset_file_path( Returns a path to the unarchived asset file. Requires a genome name, an asset name and a seek_key name as an input. - Optionally, 'tag' query parameter can be specified to get a tagged asset file path. - Default tag is returned otherwise. + Optionally, query parameters can be specified: + + - **tag**: to get a tagged asset file path. Default tag is returned if not specified. + - **remoteClass**: to set a remote data provider class. 'http' is used if not specified. """ return create_asset_file_path( rgc, genome, asset, tag, seek_key, remote_key=remoteClass.value From 12867c20c786142afca143ba78e80b55b7e70473 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Thu, 25 Mar 2021 10:09:18 -0400 Subject: [PATCH 22/44] account for local data storage; fix #111 --- refgenieserver/helpers.py | 4 ++-- refgenieserver/routers/version3.py | 12 ++++++++++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/refgenieserver/helpers.py b/refgenieserver/helpers.py index 2e6f5d3..806c517 100644 --- a/refgenieserver/helpers.py +++ b/refgenieserver/helpers.py @@ -110,8 +110,8 @@ def preprocess_attrs(attrs): """ Based on the CHANGED_KEYS mapping (new_key:old_key), rename the keys in the provided one - :param yacman.yacman.YacAttMap attrs: mapping to process - :return yacman.yacman.YacAttMap: mapping with renamed key names + :param yacman.YacAttMap attrs: mapping to process + :return yacman.YacAttMap: mapping with renamed key names """ from copy import deepcopy diff --git a/refgenieserver/routers/version3.py b/refgenieserver/routers/version3.py index 595b523..0bd0637 100644 --- a/refgenieserver/routers/version3.py +++ b/refgenieserver/routers/version3.py @@ -22,7 +22,10 @@ ) from ..main import _LOGGER, app, rgc, templates -RemoteClassEnum = Enum("RemoteClassEnum", {r: r for r in rgc["remotes"]}) +RemoteClassEnum = Enum( + "RemoteClassEnum", + {r: r for r in rgc["remotes"]} if is_data_remote(rgc) else {"http": "http"}, +) ex_alias = safely_get_example( rgc, @@ -262,7 +265,7 @@ async def get_asset_file_path( seek_key: str = s, tag: Optional[str] = tq, remoteClass: RemoteClassEnum = Query( - RemoteClassEnum.http, description="Remote data provider class" + "http", description="Remote data provider class" ), ): """ @@ -274,6 +277,11 @@ async def get_asset_file_path( - **tag**: to get a tagged asset file path. Default tag is returned if not specified. - **remoteClass**: to set a remote data provider class. 'http' is used if not specified. """ + if not is_data_remote(rgc): + _LOGGER.info( + "No 'remotes' defined in the server genome configuration file. " + "Serving a local asset file path." + ) return create_asset_file_path( rgc, genome, asset, tag, seek_key, remote_key=remoteClass.value ) From 44f560263b30f456e1d4ab68f6410433c13e4ceb Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Thu, 25 Mar 2021 11:21:10 -0400 Subject: [PATCH 23/44] return plain text instead of JSON for str responses; #67 --- refgenieserver/routers/version3.py | 31 +++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/refgenieserver/routers/version3.py b/refgenieserver/routers/version3.py index 0bd0637..b7f8ab6 100644 --- a/refgenieserver/routers/version3.py +++ b/refgenieserver/routers/version3.py @@ -3,7 +3,7 @@ from enum import Enum from typing import Optional -from fastapi import APIRouter, HTTPException, Path, Query +from fastapi import APIRouter, HTTPException, Path, Query, Response from refgenconf.exceptions import RefgenconfError from refgenconf.refgenconf import map_paths_by_id from starlette.requests import Request @@ -282,8 +282,11 @@ async def get_asset_file_path( "No 'remotes' defined in the server genome configuration file. " "Serving a local asset file path." ) - return create_asset_file_path( - rgc, genome, asset, tag, seek_key, remote_key=remoteClass.value + return Response( + content=create_asset_file_path( + rgc, genome, asset, tag, seek_key, remote_key=remoteClass.value + ), + media_type="text/plain", ) @@ -297,7 +300,7 @@ async def get_asset_default_tag(genome: str = g, asset: str = a): """ Returns the default tag name. Requires genome name and asset name as an input. """ - return rgc.get_default_tag(genome, asset) + return Response(content=rgc.get_default_tag(genome, asset), media_type="text/plain") @router.get( @@ -312,9 +315,12 @@ async def get_asset_digest(genome: str = g, asset: str = a, tag: Optional[str] = """ tag = tag or DEFAULT_TAG try: - return rgc[CFG_GENOMES_KEY][genome][CFG_ASSETS_KEY][asset][CFG_ASSET_TAGS_KEY][ - tag - ][CFG_ASSET_CHECKSUM_KEY] + return Response( + content=rgc[CFG_GENOMES_KEY][genome][CFG_ASSETS_KEY][asset][ + CFG_ASSET_TAGS_KEY + ][tag][CFG_ASSET_CHECKSUM_KEY], + media_type="text/plain", + ) except KeyError: msg = MSG_404.format(f"genome/asset:tag combination ({genome}/{asset}:{tag})") _LOGGER.warning(msg) @@ -333,9 +339,12 @@ async def get_archive_digest(genome: str = g, asset: str = a, tag: Optional[str] """ tag = tag or DEFAULT_TAG try: - return rgc[CFG_GENOMES_KEY][genome][CFG_ASSETS_KEY][asset][CFG_ASSET_TAGS_KEY][ - tag - ][CFG_ARCHIVE_CHECKSUM_KEY] + return Response( + content=rgc[CFG_GENOMES_KEY][genome][CFG_ASSETS_KEY][asset][ + CFG_ASSET_TAGS_KEY + ][tag][CFG_ARCHIVE_CHECKSUM_KEY], + media_type="text/plain", + ) except KeyError: msg = MSG_404.format(f"genome/asset:tag combination ({genome}/{asset}:{tag})") _LOGGER.warning(msg) @@ -535,7 +544,7 @@ async def get_genome_alias_digest(alias: str = al): try: digest = rgc.get_genome_alias_digest(alias=alias) _LOGGER.info(f"digest returned for '{alias}': {digest}") - return digest + return Response(content=digest, media_type="text/plain") except (KeyError, UndefinedAliasError): msg = MSG_404.format(f"alias ({alias})") _LOGGER.warning(msg) From 062d75f76d76ad3115ba08c5b1ed71aee6467ab6 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Thu, 25 Mar 2021 11:46:34 -0400 Subject: [PATCH 24/44] copy aliases from template cfg; #112 --- refgenieserver/server_builder.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/refgenieserver/server_builder.py b/refgenieserver/server_builder.py index e98ee93..a1405dc 100644 --- a/refgenieserver/server_builder.py +++ b/refgenieserver/server_builder.py @@ -133,7 +133,11 @@ def archive(rgc, registry_paths, force, remove, cfg_path, genomes_desc): if genomes_desc is None or genome not in descs else descs[genome] ) - genome_attrs = {CFG_GENOME_DESC_KEY: genome_desc} + genome_aliases = rgc[CFG_GENOMES_KEY][genome].setdefault(CFG_ALIASES_KEY, []) + genome_attrs = { + CFG_GENOME_DESC_KEY: genome_desc, + CFG_ALIASES_KEY: genome_aliases, + } with rgc_server as r: r[CFG_GENOMES_KEY].setdefault(genome, PXAM()) r[CFG_GENOMES_KEY][genome].update(genome_attrs) From b9712251c884056454a879f96773034546951d4f Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Thu, 25 Mar 2021 13:28:47 -0400 Subject: [PATCH 25/44] update text response class in postman --- refgenieserver.postman_collection.json | 31 +++++++++++++------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/refgenieserver.postman_collection.json b/refgenieserver.postman_collection.json index 11f7a7a..68d1fa7 100644 --- a/refgenieserver.postman_collection.json +++ b/refgenieserver.postman_collection.json @@ -93,11 +93,12 @@ "pm.test(\"Status code is 200\", function () {", " pm.response.to.have.status(200);", "});", - "var jsonData = pm.response.json();", + "var textData = pm.response.text();", "pm.test(\"Test data type of the response\", () => {", - " pm.expect(jsonData).to.be.a(\"string\");", + " pm.expect(textData).to.be.a(\"string\");", "});", - "pm.variables.set(\"genomeDigest\", jsonData)" + "pm.variables.set(\"genomeDigest\", textData)", + "" ], "type": "text/javascript" } @@ -308,11 +309,11 @@ "pm.test(\"Status code is 200\", function () {", " pm.response.to.have.status(200);", "});", - "var jsonData = pm.response.json();", + "var textData = pm.response.text();", "pm.test(\"Test data type of the response\", () => {", - " pm.expect(jsonData).to.be.a(\"string\");", + " pm.expect(textData).to.be.a(\"string\");", "});", - "pm.variables.set(\"defaultTag\", jsonData)" + "pm.variables.set(\"defaultTag\", textData)" ], "type": "text/javascript" } @@ -347,11 +348,11 @@ "pm.test(\"Status code is 200\", function () {", " pm.response.to.have.status(200);", "});", - "var jsonData = pm.response.json();", + "var textData = pm.response.text();", "pm.test(\"Test data type of the response\", () => {", - " pm.expect(jsonData).to.be.a(\"string\");", + " pm.expect(textData).to.be.a(\"string\");", "});", - "pm.variables.set(\"assetDigest\", jsonData)" + "pm.variables.set(\"assetDigest\", textData)" ], "type": "text/javascript" } @@ -392,11 +393,11 @@ "pm.test(\"Status code is 200\", function () {", " pm.response.to.have.status(200);", "});", - "var jsonData = pm.response.json();", + "var textData = pm.response.text();", "pm.test(\"Test data type of the response\", () => {", - " pm.expect(jsonData).to.be.a(\"string\");", + " pm.expect(textData).to.be.a(\"string\");", "});", - "pm.variables.set(\"archiveDigest\", jsonData)" + "pm.variables.set(\"archiveDigest\", textData)" ], "type": "text/javascript" } @@ -437,11 +438,11 @@ "pm.test(\"Status code is 200\", function () {", " pm.response.to.have.status(200);", "});", - "var jsonData = pm.response.json();", + "var textData = pm.response.text();", "pm.test(\"Test data type of the response\", () => {", - " pm.expect(jsonData).to.be.a(\"string\");", + " pm.expect(textData).to.be.a(\"string\");", "});", - "pm.variables.set(\"assetDigest\", jsonData)" + "pm.variables.set(\"assetDigest\", textData)" ], "type": "text/javascript" } From 148150e4b50640754b5b3a2179ddaca4ca803234 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Thu, 25 Mar 2021 14:59:39 -0400 Subject: [PATCH 26/44] add remotes postman tests --- refgenieserver.postman_collection.json | 102 ++++++++++++++++++++++++- 1 file changed, 101 insertions(+), 1 deletion(-) diff --git a/refgenieserver.postman_collection.json b/refgenieserver.postman_collection.json index 68d1fa7..d2d0c0e 100644 --- a/refgenieserver.postman_collection.json +++ b/refgenieserver.postman_collection.json @@ -674,7 +674,7 @@ " pm.expect(jsonData).to.be.a(\"object\");", "});", "", - "pm.test(\"Test data includes genomeDigest\", () => {", + "pm.test(\"Test data includes: \" + genomeDigest, () => {", " pm.expect(jsonData).haveOwnProperty(genomeDigest);", "});", "", @@ -703,6 +703,106 @@ } }, "response": [] + }, + { + "name": "get_remotes_dict", + "event": [ + { + "listen": "test", + "script": { + "exec": [ + "pm.test(\"Status code is 200\", function () {", + " pm.response.to.have.status(200);", + "});", + "var jsonData = pm.response.json();", + "pm.test(\"Test data type of the response\", () => {", + " pm.expect(jsonData).to.be.a(\"object\");", + "});", + "", + "var remoteClasses = Object.keys(jsonData);", + "", + "pm.variables.set(\"retemoteClasses\", remoteClasses);", + "pm.variables.set(\"firstRetemoteClass\", remoteClasses[1]);", + "", + "console.log(\"Determined remote classes: \", remoteClasses);", + "", + "for (const remoteClass of remoteClasses){", + " pm.test(\"Test data value content of: \" + remoteClass, () => {", + " pm.expect(jsonData[remoteClass]).haveOwnProperty(\"prefix\");", + " pm.expect(jsonData[remoteClass][\"prefix\"]).to.be.a(\"string\");", + " });", + "}", + "", + "", + "", + "" + ], + "type": "text/javascript" + } + } + ], + "request": { + "method": "GET", + "header": [], + "url": { + "raw": "http://{{urlBase}}/remotes/dict", + "protocol": "http", + "host": [ + "{{urlBase}}" + ], + "path": [ + "remotes", + "dict" + ] + } + }, + "response": [] + }, + { + "name": "get_asset_file_path", + "event": [ + { + "listen": "test", + "script": { + "exec": [ + "pm.test(\"Status code is 200\", function () {", + " pm.response.to.have.status(200);", + "});", + "var textData = pm.response.text();", + "pm.test(\"Test data type of the response\", () => {", + " pm.expect(textData).to.be.a(\"string\");", + "});", + "" + ], + "type": "text/javascript" + } + } + ], + "request": { + "method": "GET", + "header": [], + "url": { + "raw": "http://{{urlBase}}/assets/file_path/{{genomeDigest}}/{{asset}}/{{asset}}?remoteClass={{firstRetemoteClass}}", + "protocol": "http", + "host": [ + "{{urlBase}}" + ], + "path": [ + "assets", + "file_path", + "{{genomeDigest}}", + "{{asset}}", + "{{asset}}" + ], + "query": [ + { + "key": "remoteClass", + "value": "{{firstRetemoteClass}}" + } + ] + } + }, + "response": [] } ] } \ No newline at end of file From ef66c987bc8f6c9d70201e5b8da317d720bb25a5 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Thu, 25 Mar 2021 16:15:15 -0400 Subject: [PATCH 27/44] reuse code in file serving endpoints --- .pre-commit-config.yaml | 20 ++++++ README.md | 15 +++-- dir2cfg_template.py | 101 ----------------------------- refgenieserver/helpers.py | 32 +++++++++ refgenieserver/routers/version3.py | 77 ++++++++-------------- 5 files changed, 87 insertions(+), 158 deletions(-) create mode 100644 .pre-commit-config.yaml delete mode 100755 dir2cfg_template.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..ab5489e --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,20 @@ +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v3.4.0 + hooks: + - id: trailing-whitespace + - id: check-yaml + - id: end-of-file-fixer + - id: requirements-txt-fixer + - id: trailing-whitespace + + - repo: https://github.com/PyCQA/isort + rev: 5.7.0 + hooks: + - id: isort + args: ["--profile", "black"] + + - repo: https://github.com/psf/black + rev: 20.8b1 + hooks: + - id: black diff --git a/README.md b/README.md index a172559..86dd6c8 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,11 @@ -[![Build Status](https://travis-ci.org/databio/refgenieserver.svg?branch=master)](https://travis-ci.org/databio/refgenieserver) +[![Deploy to Dockerhub on release](https://github.com/refgenie/refgenieserver/actions/workflows/deploy_release_software.yml/badge.svg)](https://github.com/refgenie/refgenieserver/actions/workflows/deploy_release_software.yml) +[![Deploy to Amazon ECS - software-staging](https://github.com/refgenie/refgenieserver/actions/workflows/deploy_staging_software.yml/badge.svg)](https://github.com/refgenie/refgenieserver/actions/workflows/deploy_staging_software.yml) + +[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) # refgenieserver -This folder contains code for an API to provide reference genomes. `refgenieserver` can do 2 things: `archive` an existing refgenie folder, and then `serve` it. +This folder contains code for an API to provide reference genomes. `refgenieserver` can do 2 things: `archive` an existing refgenie folder, and then `serve` it. ## How to `serve` @@ -27,7 +30,7 @@ Better, though, is to use the container. Mount a directory of files to serve at ``` docker run --rm -p 80:80 --name refgenieservercon \ -v $(pwd)/files:/genomes \ - refgenieserverim refgenieserver serve -c refgenie.yaml + refgenieserverim refgenieserver serve -c refgenie.yaml ``` ### Running container for production: @@ -37,7 +40,7 @@ Run the container from the image you just built: docker run --rm -d -p 80:80 \ -v /path/to/genomes_archive:/genomes \ --name refgenieservercon \ - refgenieserverim refgenieserver serve -c /genomes/genome_config.yaml + refgenieserverim refgenieserver serve -c /genomes/genome_config.yaml ``` Make sure the `genome_config.yaml` filename matches what you've named your configuration file! We use `-d` to detach so it's in background. You shouldn't need to mount the app (`-v /path/to/refgenieserver:/app`) because in this case we're running it directly. Terminate container when finished: @@ -88,7 +91,7 @@ First, make sure the config has a `genome_archive_folder` key that points to the Secondly, if you wish to store the refgenieserver configuration file separately from the `genome_archive_folder`, specify a `genome_archive_config` key. The path that this key points to will be considered relative to the refgenie configuration file, unless it's absolute. -Then run: +Then run: ``` refgenieserver archive -c CONFIG ```` @@ -96,7 +99,7 @@ It just requires a `-c` argument or `$REFGENIE` environment variable. This command will: - create the `genome_archive` directory and structure that can be used to serve the assets -- create a server config file in that directory, which includes a couple of extra asset attributes, like `archive_digest` and `archive_size`. +- create a server config file in that directory, which includes a couple of extra asset attributes, like `archive_digest` and `archive_size`. In case you already have some of the assets archived and just want to add a new one, use: diff --git a/dir2cfg_template.py b/dir2cfg_template.py deleted file mode 100755 index 21ed3eb..0000000 --- a/dir2cfg_template.py +++ /dev/null @@ -1,101 +0,0 @@ -V = 0.1 -SERVER_CFG_NAME = "refgenieserver_config.yaml" -import argparse -import os -import sys - -import yaml -from refgenconf import ( - CFG_ARCHIVE_KEY, - CFG_ASSET_PATH_KEY, - CFG_FOLDER_KEY, - CFG_GENOMES_KEY, - CFG_SERVER_KEY, -) - - -class _VersionInHelpParser(argparse.ArgumentParser): - def format_help(self): - """ Add version information to help text. """ - return ( - "version: {}\n".format(V) + super(_VersionInHelpParser, self).format_help() - ) - - -def build_dir2yaml_parser(): - """ - Building argument parser - - :return argparse.ArgumentParser - """ - banner = ( - "%(prog)s builds a refgenieserver config template for the directory structure." - " Keep in mind that the produced config will require some adjustments." - ) - additional_description = "\nhttps://github.com/databio/refgenieserver" - - parser = _VersionInHelpParser(description=banner, epilog=additional_description) - - parser.add_argument( - "-V", "--version", action="version", version="%(prog)s {v}".format(v=V) - ) - parser.add_argument( - "-p", - "--path", - dest="path", - help="A path to the directory that the YAML should be build for. If not provided, current working directory " - "will be used ({})".format(os.getcwd()), - default=None, - ) - return parser - - -def dir_as_dict(path): - """ - creates a dict out of a directory - inspired by: https://gist.github.com/blaketmiller/ee85ec1b5ddf038aa923 - - :param str path: path to dir - :return: - """ - directory = {} - for dirname, dirnames, filenames in os.walk(path): - dn = os.path.basename(dirname) - directory[dn] = {} - if dirnames: - for d in dirnames: - directory[dn].update(dir_as_dict(os.path.join(path, d))) - else: - directory[dn][CFG_ASSET_PATH_KEY] = dn - return directory - - -def main(): - parser = build_dir2yaml_parser() - args = parser.parse_args() - p = os.path.abspath(args.path) if args.path is not None else os.getcwd() - server_path = os.path.join(p, SERVER_CFG_NAME) - try: - with open(server_path, "w") as f: - try: - rgc = { - CFG_FOLDER_KEY: None, - CFG_SERVER_KEY: "http://www.refgenomes.databio.org", - CFG_ARCHIVE_KEY: None, - CFG_GENOMES_KEY: None, - } - rgc[CFG_GENOMES_KEY] = dir_as_dict(p)[os.path.basename(p)] - yaml.dump(rgc, f) - print("Server config written to: {}".format(server_path)) - except Exception as e: - print("Encountered an error: '{}: {}'".format(e.__class__.__name__, e)) - except Exception as e: - print("Encountered an error: '{}: {}'".format(e.__class__.__name__, e)) - - -if __name__ == "__main__": - try: - sys.exit(main()) - except KeyboardInterrupt: - print("Program canceled by user") - sys.exit(1) diff --git a/refgenieserver/helpers.py b/refgenieserver/helpers.py index 806c517..aa63bdb 100644 --- a/refgenieserver/helpers.py +++ b/refgenieserver/helpers.py @@ -2,6 +2,7 @@ from string import Formatter from fastapi import HTTPException +from fastapi.responses import FileResponse, RedirectResponse from refgenconf.exceptions import RefgenconfError from ubiquerg import VersionInHelpParser from yacman import get_first_env_var @@ -273,3 +274,34 @@ def create_asset_file_path(rgc, genome, asset, tag, seek_key, remote_key="http") ) _LOGGER.info(f"serving asset file path: {path}") return path + + +def serve_file_for_asset(rgc, genome, asset, tag, template): + """ + Serve a file, like log or asset dir contents for an asset + + :param str genome: genome name + :param str asset: asset name + :param str tag: tag name + :param ste template: file name template with place for asset and tag names, + e.g. 'build_log_{}__{}.md' + """ + tag = tag or rgc.get_default_tag( + genome, asset + ) # returns 'default' for nonexistent genome/asset; no need to catch + file_name = template.format(asset, tag) + path, remote = get_datapath_for_genome( + rgc, dict(genome=genome, file_name=file_name), remote_key="http" + ) + if remote: + _LOGGER.info(f"redirecting to URL: '{path}'") + return RedirectResponse(path) + _LOGGER.info(f"serving file: '{path}'") + if os.path.isfile(path): + return FileResponse( + path, filename=file_name, media_type="application/octet-stream" + ) + else: + msg = MSG_404.format(f"asset ({genome}/{asset}:{tag})") + _LOGGER.warning(msg) + raise HTTPException(status_code=404, detail=msg) diff --git a/refgenieserver/routers/version3.py b/refgenieserver/routers/version3.py index b7f8ab6..fb45ecc 100644 --- a/refgenieserver/routers/version3.py +++ b/refgenieserver/routers/version3.py @@ -1,6 +1,7 @@ from copy import copy from datetime import date from enum import Enum +from json import load from typing import Optional from fastapi import APIRouter, HTTPException, Path, Query, Response @@ -19,6 +20,7 @@ get_openapi_version, is_data_remote, safely_get_example, + serve_file_for_asset, ) from ..main import _LOGGER, app, rgc, templates @@ -352,15 +354,15 @@ async def get_archive_digest(genome: str = g, asset: str = a, tag: Optional[str] @router.get( - "/assets/log/{genome}/{asset}", - operation_id=API_VERSION + API_ID_LOG, + "/assets/recipe/{genome}/{asset}", + operation_id=API_VERSION + API_ID_RECIPE, tags=api_version_tags, ) -async def download_asset_build_log( +async def download_asset_build_recipe( genome: str = g, asset: str = a, tag: Optional[str] = tq ): """ - Returns a build log. Requires the genome name and the asset name as an input. + Returns a build recipe. Requires the genome name and the asset name as an input. Optionally, 'tag' query parameter can be specified to get a tagged asset archive. Default tag is returned otherwise. @@ -368,7 +370,7 @@ async def download_asset_build_log( tag = tag or rgc.get_default_tag( genome, asset ) # returns 'default' for nonexistent genome/asset; no need to catch - file_name = TEMPLATE_LOG.format(asset, tag) + file_name = TEMPLATE_RECIPE_JSON.format(asset, tag) path, remote = get_datapath_for_genome( rgc, dict(genome=genome, file_name=file_name), remote_key="http" ) @@ -377,9 +379,9 @@ async def download_asset_build_log( return RedirectResponse(path) _LOGGER.info(f"serving build log file: '{path}'") if os.path.isfile(path): - return FileResponse( - path, filename=file_name, media_type="application/octet-stream" - ) + with open(path, "r") as f: + recipe = load(f) + return JSONResponse(recipe) else: msg = MSG_404.format(f"asset ({asset})") _LOGGER.warning(msg) @@ -387,40 +389,26 @@ async def download_asset_build_log( @router.get( - "/assets/recipe/{genome}/{asset}", - operation_id=API_VERSION + API_ID_RECIPE, + "/assets/log/{genome}/{asset}", + operation_id=API_VERSION + API_ID_LOG, tags=api_version_tags, ) -async def download_asset_build_recipe( +async def download_asset_build_log( genome: str = g, asset: str = a, tag: Optional[str] = tq ): """ - Returns a build recipe. Requires the genome name and the asset name as an input. + Returns a build log. Requires the genome name and the asset name as an input. Optionally, 'tag' query parameter can be specified to get a tagged asset archive. Default tag is returned otherwise. """ - tag = tag or rgc.get_default_tag( - genome, asset - ) # returns 'default' for nonexistent genome/asset; no need to catch - file_name = TEMPLATE_RECIPE_JSON.format(asset, tag) - path, remote = get_datapath_for_genome( - rgc, dict(genome=genome, file_name=file_name), remote_key="http" + return serve_file_for_asset( + rgc=rgc, + genome=genome, + asset=asset, + tag=tag, + template=TEMPLATE_LOG, ) - if remote: - _LOGGER.info(f"redirecting to URL: '{path}'") - return RedirectResponse(path) - _LOGGER.info(f"serving build log file: '{path}'") - if os.path.isfile(path): - import json - - with open(path, "r") as f: - recipe = json.load(f) - return JSONResponse(recipe) - else: - msg = MSG_404.format(f"asset ({asset})") - _LOGGER.warning(msg) - raise HTTPException(status_code=404, detail=msg) @router.get( @@ -438,26 +426,13 @@ async def download_asset_directory_contents( Optionally, 'tag' query parameter can be specified to get a tagged asset archive. Default tag is returned otherwise. """ - # TODO: DRY - tag = tag or rgc.get_default_tag( - genome, asset - ) # returns 'default' for nonexistent genome/asset; no need to catch - file_name = TEMPLATE_ASSET_DIR_CONTENTS.format(asset, tag) - path, remote = get_datapath_for_genome( - rgc, dict(genome=genome, file_name=file_name), remote_key="http" + return serve_file_for_asset( + rgc=rgc, + genome=genome, + asset=asset, + tag=tag, + template=TEMPLATE_ASSET_DIR_CONTENTS, ) - if remote: - _LOGGER.info(f"redirecting to URL: '{path}'") - return RedirectResponse(path) - _LOGGER.info(f"serving asset dir tree file: '{path}'") - if os.path.isfile(path): - return FileResponse( - path, filename=file_name, media_type="application/octet-stream" - ) - else: - msg = MSG_404.format(f"asset ({asset})") - _LOGGER.warning(msg) - raise HTTPException(status_code=404, detail=msg) @router.get( From 443365a279738528e91fce2a99c1d8d6e87c6a5d Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Fri, 26 Mar 2021 14:35:03 -0400 Subject: [PATCH 28/44] link seek key direct paths; #113 --- refgenieserver/helpers.py | 1 - refgenieserver/templates/v3/asset.html | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/refgenieserver/helpers.py b/refgenieserver/helpers.py index aa63bdb..ec662ee 100644 --- a/refgenieserver/helpers.py +++ b/refgenieserver/helpers.py @@ -190,7 +190,6 @@ def is_data_remote(rgc): True if "remotes" in rgc and isinstance(rgc["remotes"], dict) - and "http" in rgc["remotes"] and all( [ "prefix" in r and isinstance(r["prefix"], str) diff --git a/refgenieserver/templates/v3/asset.html b/refgenieserver/templates/v3/asset.html index 30cfa9f..2b49967 100644 --- a/refgenieserver/templates/v3/asset.html +++ b/refgenieserver/templates/v3/asset.html @@ -31,7 +31,7 @@

Attributes

  • {{ attr_name }}:
    • {% for seek_key, path in value.items() %} -
    • {{ seek_key }}: {{ path }}
    • +
    • {{ seek_key }}: {{ path }}
    • {% endfor %}
    {% elif value is iterable and value is not string %} @@ -72,4 +72,4 @@

    API endpoints

    -{% endblock %} \ No newline at end of file +{% endblock %} From db87cb180d244426b98549287383d8c6c8fc38b9 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Fri, 26 Mar 2021 15:09:22 -0400 Subject: [PATCH 29/44] add links to HTTP and S3 file path serving; #113 --- refgenieserver/templates/v3/asset.html | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/refgenieserver/templates/v3/asset.html b/refgenieserver/templates/v3/asset.html index 2b49967..facd873 100644 --- a/refgenieserver/templates/v3/asset.html +++ b/refgenieserver/templates/v3/asset.html @@ -31,7 +31,14 @@

    Attributes

  • {{ attr_name }}:
    • {% for seek_key, path in value.items() %} -
    • {{ seek_key }}: {{ path }}
    • +
    • + {{ seek_key }}: {{ path }} + {% if "remotes" in rgc %} + {% for remote_key, remote_dict in rgc["remotes"].items() %} + {{ remote_key }} path + {% endfor %} + {% endif %} +
    • {% endfor %}
    {% elif value is iterable and value is not string %} From 8cc7bf44d1d4c530bd684a47e88110865fd0a790 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Fri, 26 Mar 2021 15:53:05 -0400 Subject: [PATCH 30/44] update seek_keys style --- refgenieserver/templates/v3/asset.html | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/refgenieserver/templates/v3/asset.html b/refgenieserver/templates/v3/asset.html index facd873..42083bc 100644 --- a/refgenieserver/templates/v3/asset.html +++ b/refgenieserver/templates/v3/asset.html @@ -32,12 +32,12 @@

    Attributes

      {% for seek_key, path in value.items() %}
    • - {{ seek_key }}: {{ path }} {% if "remotes" in rgc %} {% for remote_key, remote_dict in rgc["remotes"].items() %} - {{ remote_key }} path + {{ remote_key|upper }} {{ "|" if not loop.last }} {% endfor %} {% endif %} + {{ seek_key }}: {{ path }}
    • {% endfor %}
    @@ -58,7 +58,7 @@

    Attributes

    {% set parent_genome = parsed["namespace"] %} {% set parent_alias = rgc.get_genome_alias(digest=parsed["namespace"]) %} {% endif %} -
  • {{ parent_alias }}/{{ parsed['item'] }}:{{ parsed['tag'] }}{{ "," if not loop.last }}
  • +
  • {{ parent_alias }}/{{ parsed['item'] }}:{{ parsed['tag'] }}
  • {% endfor %} From 8d0656d1dc1b661efdd98c702247197463b78f94 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Mon, 29 Mar 2021 12:08:56 -0400 Subject: [PATCH 31/44] don't append seek_key target for dir seek_key --- refgenieserver/helpers.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/refgenieserver/helpers.py b/refgenieserver/helpers.py index ec662ee..ed892be 100644 --- a/refgenieserver/helpers.py +++ b/refgenieserver/helpers.py @@ -267,8 +267,12 @@ def create_asset_file_path(rgc, genome, asset, tag, seek_key, remote_key="http") _LOGGER.warning(msg) raise HTTPException(status_code=404, detail=msg) seek_key_target = tag_dict[CFG_SEEK_KEYS_KEY][seek_key] - file_name = f"{asset}__{tag}/{seek_key_target}" - path, remote = get_datapath_for_genome( + # append the seek_key value to the path only if it isn't the "dir" seek_key. + # Otherwise the result would be a path ending with "\." + file_name = ( + f"{asset}__{tag}/{seek_key_target}" if seek_key != "dir" else f"{asset}__{tag}/" + ) + path, _ = get_datapath_for_genome( rgc, dict(genome=genome, file_name=file_name), remote_key=remote_key ) _LOGGER.info(f"serving asset file path: {path}") From 5d06269cc6e1d469d1dd7acabc241c927d6815ee Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Tue, 30 Mar 2021 17:19:49 -0400 Subject: [PATCH 32/44] implement asset dir content listing; #113 --- refgenieserver/helpers.py | 25 ++++++++++++++++++++++++- refgenieserver/routers/version3.py | 11 +++++++++++ refgenieserver/templates/v3/asset.html | 12 ++++++++++++ 3 files changed, 47 insertions(+), 1 deletion(-) diff --git a/refgenieserver/helpers.py b/refgenieserver/helpers.py index ed892be..3139013 100644 --- a/refgenieserver/helpers.py +++ b/refgenieserver/helpers.py @@ -4,7 +4,8 @@ from fastapi import HTTPException from fastapi.responses import FileResponse, RedirectResponse from refgenconf.exceptions import RefgenconfError -from ubiquerg import VersionInHelpParser +from refgenconf.helpers import send_data_request +from ubiquerg import VersionInHelpParser, is_url from yacman import get_first_env_var from ._version import __version__ as v @@ -308,3 +309,25 @@ def serve_file_for_asset(rgc, genome, asset, tag, template): msg = MSG_404.format(f"asset ({genome}/{asset}:{tag})") _LOGGER.warning(msg) raise HTTPException(status_code=404, detail=msg) + + +def get_asset_dir_contents(rgc, genome, asset, tag): + """ + Get the asset directory contents into a list + """ + tag = tag or rgc.get_default_tag( + genome, asset + ) # returns 'default' for nonexistent genome/asset; no need to catch + file_name = TEMPLATE_ASSET_DIR_CONTENTS.format(asset, tag) + path, remote = get_datapath_for_genome( + rgc, dict(genome=genome, file_name=file_name), remote_key="http" + ) + if is_url(path): + _LOGGER.debug(f"asset dir contents filepath is a url: {path}.") + lines = send_data_request(url=path).split() + else: + _LOGGER.debug(f"asset dir contents filepath: {path}") + with open(path) as f: + lines = f.readlines() + _LOGGER.debug(f"asset dir contents: {lines}") + return [l.strip() for l in lines] diff --git a/refgenieserver/routers/version3.py b/refgenieserver/routers/version3.py index fb45ecc..b43519a 100644 --- a/refgenieserver/routers/version3.py +++ b/refgenieserver/routers/version3.py @@ -16,6 +16,7 @@ from ..data_models import Dict, Genome, List, Tag from ..helpers import ( create_asset_file_path, + get_asset_dir_contents, get_datapath_for_genome, get_openapi_version, is_data_remote, @@ -158,6 +159,15 @@ async def asset_splash_page( for oid, path in map_paths_by_id(app.openapi()).items() if oid in OPERATION_IDS["v3_asset"].keys() } + try: + asset_dir_contents = get_asset_dir_contents( + rgc=rgc, genome=genome, asset=asset, tag=tag + ) + except Exception as e: + _LOGGER.warning( + f"Could not determine asset directory contents. Caught error: {str(e)}" + ) + asset_dir_contents = None templ_vars = { "request": request, "genome": genome, @@ -168,6 +178,7 @@ async def asset_splash_page( "links_dict": links_dict, "current_year": current_year, "openapi_version": get_openapi_version(app), + "asset_dir_contents": asset_dir_contents, } _LOGGER.debug(f"merged vars: {dict(templ_vars, **ALL_VERSIONS)}") return templates.TemplateResponse("v3/asset.html", dict(templ_vars, **ALL_VERSIONS)) diff --git a/refgenieserver/templates/v3/asset.html b/refgenieserver/templates/v3/asset.html index 42083bc..45c5b86 100644 --- a/refgenieserver/templates/v3/asset.html +++ b/refgenieserver/templates/v3/asset.html @@ -78,5 +78,17 @@

    API endpoints

    + {% if asset_dir_contents is not none %} +
    +
    +

    Asset directory contents

    +
      + {% for file in asset_dir_contents %} +
    • {{ file }}
    • + {% endfor %} +
    +
    +
    + {% endif %} {% endblock %} From 7e4aa9964fa37af9edacc3b93a5db5a18ced40c0 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Tue, 30 Mar 2021 17:26:38 -0400 Subject: [PATCH 33/44] check if local path exists --- refgenieserver/helpers.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/refgenieserver/helpers.py b/refgenieserver/helpers.py index 3139013..7163f5f 100644 --- a/refgenieserver/helpers.py +++ b/refgenieserver/helpers.py @@ -323,11 +323,13 @@ def get_asset_dir_contents(rgc, genome, asset, tag): rgc, dict(genome=genome, file_name=file_name), remote_key="http" ) if is_url(path): - _LOGGER.debug(f"asset dir contents filepath is a url: {path}.") + _LOGGER.debug(f"Asset dir contents path is a URL: {path}") lines = send_data_request(url=path).split() - else: - _LOGGER.debug(f"asset dir contents filepath: {path}") + elif os.path.exists(path): + _LOGGER.debug(f"Asset dir contents path is a file: {path}") with open(path) as f: lines = f.readlines() - _LOGGER.debug(f"asset dir contents: {lines}") + else: + raise TypeError(f"Path is neither a valid URL nor an existing file: {path}") + _LOGGER.debug(f"Asset dir contents: {lines}") return [l.strip() for l in lines] From 3c44dd5f3a6af9e5243c40c61417f9bc2bf6bdfc Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Wed, 31 Mar 2021 13:46:49 -0400 Subject: [PATCH 34/44] link asset files in asset dir contents list --- refgenieserver/helpers.py | 9 ++++++++- refgenieserver/main.py | 1 + refgenieserver/routers/version3.py | 12 ++++++++++++ refgenieserver/templates/v3/asset.html | 8 +++++++- 4 files changed, 28 insertions(+), 2 deletions(-) diff --git a/refgenieserver/helpers.py b/refgenieserver/helpers.py index 7163f5f..a3378ba 100644 --- a/refgenieserver/helpers.py +++ b/refgenieserver/helpers.py @@ -314,6 +314,12 @@ def serve_file_for_asset(rgc, genome, asset, tag, template): def get_asset_dir_contents(rgc, genome, asset, tag): """ Get the asset directory contents into a list + + :param refgenconf.RefGenConf rgc: config + :param str genome: genome name + :param str asset: asset name + :param str tag: tag name + :return list[str]: list of files in the asset directory """ tag = tag or rgc.get_default_tag( genome, asset @@ -332,4 +338,5 @@ def get_asset_dir_contents(rgc, genome, asset, tag): else: raise TypeError(f"Path is neither a valid URL nor an existing file: {path}") _LOGGER.debug(f"Asset dir contents: {lines}") - return [l.strip() for l in lines] + # need to strip ./ from the left of each line and a newline char from the right + return [line.strip().lstrip("./") for line in lines] diff --git a/refgenieserver/main.py b/refgenieserver/main.py index ab9608d..2cde651 100644 --- a/refgenieserver/main.py +++ b/refgenieserver/main.py @@ -21,6 +21,7 @@ app.mount("/" + STATIC_DIRNAME, StaticFiles(directory=STATIC_PATH), name=STATIC_DIRNAME) templates = Jinja2Templates(directory=TEMPLATES_PATH) +templates.env.filters["os_path_join"] = lambda paths: os.path.join(*paths) def main(): diff --git a/refgenieserver/routers/version3.py b/refgenieserver/routers/version3.py index b43519a..51e34d1 100644 --- a/refgenieserver/routers/version3.py +++ b/refgenieserver/routers/version3.py @@ -168,6 +168,16 @@ async def asset_splash_page( f"Could not determine asset directory contents. Caught error: {str(e)}" ) asset_dir_contents = None + + try: + asset_dir_path = create_asset_file_path( + rgc, genome, asset, tag, "dir", remote_key="http" + ) + except Exception as e: + _LOGGER.warning( + f"Could not determine asset directory path. Caught error: {str(e)}" + ) + asset_dir_path = None templ_vars = { "request": request, "genome": genome, @@ -179,6 +189,8 @@ async def asset_splash_page( "current_year": current_year, "openapi_version": get_openapi_version(app), "asset_dir_contents": asset_dir_contents, + "asset_dir_path": asset_dir_path, + "is_data_remote": is_data_remote(rgc), } _LOGGER.debug(f"merged vars: {dict(templ_vars, **ALL_VERSIONS)}") return templates.TemplateResponse("v3/asset.html", dict(templ_vars, **ALL_VERSIONS)) diff --git a/refgenieserver/templates/v3/asset.html b/refgenieserver/templates/v3/asset.html index 45c5b86..380eda7 100644 --- a/refgenieserver/templates/v3/asset.html +++ b/refgenieserver/templates/v3/asset.html @@ -84,7 +84,13 @@

    API endpoints

    Asset directory contents

      {% for file in asset_dir_contents %} -
    • {{ file }}
    • +
    • + {% if is_data_remote %} + {{ file }} + {% else %} + {{ file }} + {% endif %} +
    • {% endfor %}
    From 180fbb05428e0901192d2fc69d1715389de52b6b Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Wed, 31 Mar 2021 14:56:06 -0400 Subject: [PATCH 35/44] add link to all remotes for asset dir conents; #113 --- refgenieserver/routers/version3.py | 25 +++++++++++++++---------- refgenieserver/templates/v3/asset.html | 13 ++++++++----- 2 files changed, 23 insertions(+), 15 deletions(-) diff --git a/refgenieserver/routers/version3.py b/refgenieserver/routers/version3.py index 51e34d1..241b5be 100644 --- a/refgenieserver/routers/version3.py +++ b/refgenieserver/routers/version3.py @@ -169,15 +169,20 @@ async def asset_splash_page( ) asset_dir_contents = None - try: - asset_dir_path = create_asset_file_path( - rgc, genome, asset, tag, "dir", remote_key="http" - ) - except Exception as e: - _LOGGER.warning( - f"Could not determine asset directory path. Caught error: {str(e)}" - ) - asset_dir_path = None + asset_dir_paths = {} + if is_data_remote(rgc): + for remote_key in rgc["remotes"].keys(): + try: + asset_dir_path = create_asset_file_path( + rgc, genome, asset, tag, "dir", remote_key=remote_key + ) + except Exception as e: + _LOGGER.warning( + f"Could not determine asset directory path. Caught error: {str(e)}" + ) + asset_dir_path = None + asset_dir_paths[remote_key] = asset_dir_path + templ_vars = { "request": request, "genome": genome, @@ -189,7 +194,7 @@ async def asset_splash_page( "current_year": current_year, "openapi_version": get_openapi_version(app), "asset_dir_contents": asset_dir_contents, - "asset_dir_path": asset_dir_path, + "asset_dir_paths": asset_dir_paths, "is_data_remote": is_data_remote(rgc), } _LOGGER.debug(f"merged vars: {dict(templ_vars, **ALL_VERSIONS)}") diff --git a/refgenieserver/templates/v3/asset.html b/refgenieserver/templates/v3/asset.html index 380eda7..9c900d0 100644 --- a/refgenieserver/templates/v3/asset.html +++ b/refgenieserver/templates/v3/asset.html @@ -23,7 +23,7 @@

    {{ alias }}/{{ asset }}:{{ tag
    -
    +

    Attributes

      {% for attr_name, value in tag_data.items() %} @@ -32,7 +32,7 @@

      Attributes

        {% for seek_key, path in value.items() %}
      • - {% if "remotes" in rgc %} + {% if is_data_remote %} {% for remote_key, remote_dict in rgc["remotes"].items() %} {{ remote_key|upper }} {{ "|" if not loop.last }} {% endfor %} @@ -69,7 +69,7 @@

        Attributes

        {% endfor %}
    -
    +

    API endpoints

      {% for id, path in links_dict.items() %} @@ -80,13 +80,16 @@

      API endpoints

    {% if asset_dir_contents is not none %}
    -
    +

    Asset directory contents

      {% for file in asset_dir_contents %}
    • {% if is_data_remote %} - {{ file }} + {% for remote_key, asset_dir_path in asset_dir_paths.items() %} + {{ remote_key|upper }}{{ ":" if loop.last else " |" }} + {% endfor %} + {{ file }} {% else %} {{ file }} {% endif %} From b019940e17ce6a5c35dd51b7e34a53cde546e550 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Thu, 1 Apr 2021 08:41:48 -0400 Subject: [PATCH 36/44] list sorted files --- refgenieserver/templates/v3/asset.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/refgenieserver/templates/v3/asset.html b/refgenieserver/templates/v3/asset.html index 9c900d0..087c89b 100644 --- a/refgenieserver/templates/v3/asset.html +++ b/refgenieserver/templates/v3/asset.html @@ -83,7 +83,7 @@

      API endpoints

      Asset directory contents

        - {% for file in asset_dir_contents %} + {% for file in asset_dir_contents|sort %}
      • {% if is_data_remote %} {% for remote_key, asset_dir_path in asset_dir_paths.items() %} From e5e2d4752a936120a992d581814c4b4fa5cb58b2 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Mon, 5 Apr 2021 15:31:24 -0400 Subject: [PATCH 37/44] use os.listdir and JSON for dir contents storage; #110 --- refgenieserver/helpers.py | 34 ++++++++++++++++++++++++------ refgenieserver/routers/version3.py | 28 ++++++++---------------- refgenieserver/server_builder.py | 6 +++++- 3 files changed, 42 insertions(+), 26 deletions(-) diff --git a/refgenieserver/helpers.py b/refgenieserver/helpers.py index a3378ba..18da2b5 100644 --- a/refgenieserver/helpers.py +++ b/refgenieserver/helpers.py @@ -1,8 +1,9 @@ import logging +from json import load from string import Formatter from fastapi import HTTPException -from fastapi.responses import FileResponse, RedirectResponse +from fastapi.responses import FileResponse, JSONResponse, RedirectResponse from refgenconf.exceptions import RefgenconfError from refgenconf.helpers import send_data_request from ubiquerg import VersionInHelpParser, is_url @@ -311,6 +312,28 @@ def serve_file_for_asset(rgc, genome, asset, tag, template): raise HTTPException(status_code=404, detail=msg) +def serve_json_for_asset(rgc, genome, asset, tag, template): + tag = tag or rgc.get_default_tag( + genome, asset + ) # returns 'default' for nonexistent genome/asset; no need to catch + file_name = template.format(asset, tag) + path, remote = get_datapath_for_genome( + rgc, dict(genome=genome, file_name=file_name), remote_key="http" + ) + if remote: + _LOGGER.info(f"redirecting to URL: '{path}'") + return RedirectResponse(path) + _LOGGER.info(f"serving recipe: '{path}'") + if os.path.isfile(path): + with open(path, "r") as f: + recipe = load(f) + return JSONResponse(recipe) + else: + msg = MSG_404.format(f"asset ({asset})") + _LOGGER.warning(msg) + raise HTTPException(status_code=404, detail=msg) + + def get_asset_dir_contents(rgc, genome, asset, tag): """ Get the asset directory contents into a list @@ -330,13 +353,12 @@ def get_asset_dir_contents(rgc, genome, asset, tag): ) if is_url(path): _LOGGER.debug(f"Asset dir contents path is a URL: {path}") - lines = send_data_request(url=path).split() + dir_contents = send_data_request(url=path) elif os.path.exists(path): _LOGGER.debug(f"Asset dir contents path is a file: {path}") with open(path) as f: - lines = f.readlines() + dir_contents = load(f) else: raise TypeError(f"Path is neither a valid URL nor an existing file: {path}") - _LOGGER.debug(f"Asset dir contents: {lines}") - # need to strip ./ from the left of each line and a newline char from the right - return [line.strip().lstrip("./") for line in lines] + _LOGGER.debug(f"Asset dir contents: {dir_contents}") + return dir_contents diff --git a/refgenieserver/routers/version3.py b/refgenieserver/routers/version3.py index 241b5be..6d23915 100644 --- a/refgenieserver/routers/version3.py +++ b/refgenieserver/routers/version3.py @@ -22,6 +22,7 @@ is_data_remote, safely_get_example, serve_file_for_asset, + serve_json_for_asset, ) from ..main import _LOGGER, app, rgc, templates @@ -159,6 +160,7 @@ async def asset_splash_page( for oid, path in map_paths_by_id(app.openapi()).items() if oid in OPERATION_IDS["v3_asset"].keys() } + try: asset_dir_contents = get_asset_dir_contents( rgc=rgc, genome=genome, asset=asset, tag=tag @@ -395,25 +397,13 @@ async def download_asset_build_recipe( Optionally, 'tag' query parameter can be specified to get a tagged asset archive. Default tag is returned otherwise. """ - tag = tag or rgc.get_default_tag( - genome, asset - ) # returns 'default' for nonexistent genome/asset; no need to catch - file_name = TEMPLATE_RECIPE_JSON.format(asset, tag) - path, remote = get_datapath_for_genome( - rgc, dict(genome=genome, file_name=file_name), remote_key="http" + return serve_json_for_asset( + rgc=rgc, + genome=genome, + asset=asset, + tag=tag, + template=TEMPLATE_RECIPE_JSON, ) - if remote: - _LOGGER.info(f"redirecting to URL: '{path}'") - return RedirectResponse(path) - _LOGGER.info(f"serving build log file: '{path}'") - if os.path.isfile(path): - with open(path, "r") as f: - recipe = load(f) - return JSONResponse(recipe) - else: - msg = MSG_404.format(f"asset ({asset})") - _LOGGER.warning(msg) - raise HTTPException(status_code=404, detail=msg) @router.get( @@ -454,7 +444,7 @@ async def download_asset_directory_contents( Optionally, 'tag' query parameter can be specified to get a tagged asset archive. Default tag is returned otherwise. """ - return serve_file_for_asset( + return serve_json_for_asset( rgc=rgc, genome=genome, asset=asset, diff --git a/refgenieserver/server_builder.py b/refgenieserver/server_builder.py index a1405dc..3b296ba 100644 --- a/refgenieserver/server_builder.py +++ b/refgenieserver/server_builder.py @@ -1,6 +1,7 @@ import logging import sys from glob import glob +from json import dump from subprocess import run from attmap import PathExAttMap as PXAM @@ -415,7 +416,10 @@ def _get_asset_dir_contents(asset_dir, asset_name, tag_name): os.path.dirname(asset_dir), TEMPLATE_ASSET_DIR_CONTENTS.format(asset_name, tag_name), ) - run(f"cd {asset_dir}; find . -type f > {asset_dir_contents_file_path}", shell=True) + files = os.listdir(asset_dir) + _LOGGER.debug(f"dir contents: {files}") + with open(asset_dir_contents_file_path, "w") as outfile: + dump(files, outfile) _LOGGER.info( f"Asset directory contents file created: {asset_dir_contents_file_path}" ) From 9c601e9c0f8fc44e8895938d0dd455d90b9e4bf0 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Tue, 6 Apr 2021 09:06:04 -0400 Subject: [PATCH 38/44] docs, cleanup --- refgenieserver/helpers.py | 44 ++++++++++++++++++------------ refgenieserver/routers/version3.py | 6 ++-- refgenieserver/server_builder.py | 21 ++++++++------ 3 files changed, 41 insertions(+), 30 deletions(-) diff --git a/refgenieserver/helpers.py b/refgenieserver/helpers.py index 18da2b5..08e7b7e 100644 --- a/refgenieserver/helpers.py +++ b/refgenieserver/helpers.py @@ -57,11 +57,11 @@ def add_subparser(cmd, description): "--config", required=False, dest="config", - help="A path to the refgenie config file (YAML). If not provided, the first available environment variable " - "among: '{}' will be used if set. Currently: {}".format( - ", ".join(CFG_ENV_VARS), env_var_val - ), - ) + help=f"A path to the refgenie config file (YAML). If not provided, the " + f"first available environment variable among: " + f"'{', '.join(CFG_ENV_VARS)}' will be used if set. " + f"Currently: {env_var_val}", + ), sps[cmd].add_argument( "-d", "--dbg", @@ -83,7 +83,8 @@ def add_subparser(cmd, description): dest="genomes_desc", type=str, default=None, - help="Path to a CSV file with genomes descriptions. Format: genome_name, genome description", + help="Path to a CSV file with genomes descriptions. " + "Format: genome_name, genome description", ) sps["archive"].add_argument( "-f", @@ -204,7 +205,8 @@ def is_data_remote(rgc): def purge_nonservable(rgc): """ - Remove entries in RefGenConf object that were not processed by the archiver and should not be served + Remove entries in RefGenConf object that were not processed by the archiver + and should not be served :param refgenconf.RefGenConf rgc: object to check :return refgenconf.RefGenConf: object with just the servable entries @@ -283,7 +285,7 @@ def create_asset_file_path(rgc, genome, asset, tag, seek_key, remote_key="http") def serve_file_for_asset(rgc, genome, asset, tag, template): """ - Serve a file, like log or asset dir contents for an asset + Serve a file, like log file :param str genome: genome name :param str asset: asset name @@ -291,9 +293,8 @@ def serve_file_for_asset(rgc, genome, asset, tag, template): :param ste template: file name template with place for asset and tag names, e.g. 'build_log_{}__{}.md' """ - tag = tag or rgc.get_default_tag( - genome, asset - ) # returns 'default' for nonexistent genome/asset; no need to catch + # returns 'default' for nonexistent genome/asset; no need to catch + tag = tag or rgc.get_default_tag(genome, asset) file_name = template.format(asset, tag) path, remote = get_datapath_for_genome( rgc, dict(genome=genome, file_name=file_name), remote_key="http" @@ -313,9 +314,17 @@ def serve_file_for_asset(rgc, genome, asset, tag, template): def serve_json_for_asset(rgc, genome, asset, tag, template): - tag = tag or rgc.get_default_tag( - genome, asset - ) # returns 'default' for nonexistent genome/asset; no need to catch + """ + Serve a JSON object, like recipe or asset dir contents for an asset + + :param str genome: genome name + :param str asset: asset name + :param str tag: tag name + :param ste template: file name template with place for asset and tag names, + e.g. 'build_recipe_{}__{}.json' + """ + # returns 'default' for nonexistent genome/asset; no need to catch + tag = tag or rgc.get_default_tag(genome, asset) file_name = template.format(asset, tag) path, remote = get_datapath_for_genome( rgc, dict(genome=genome, file_name=file_name), remote_key="http" @@ -323,7 +332,7 @@ def serve_json_for_asset(rgc, genome, asset, tag, template): if remote: _LOGGER.info(f"redirecting to URL: '{path}'") return RedirectResponse(path) - _LOGGER.info(f"serving recipe: '{path}'") + _LOGGER.info(f"serving JSON: '{path}'") if os.path.isfile(path): with open(path, "r") as f: recipe = load(f) @@ -344,9 +353,8 @@ def get_asset_dir_contents(rgc, genome, asset, tag): :param str tag: tag name :return list[str]: list of files in the asset directory """ - tag = tag or rgc.get_default_tag( - genome, asset - ) # returns 'default' for nonexistent genome/asset; no need to catch + # returns 'default' for nonexistent genome/asset; no need to catch + tag = tag or rgc.get_default_tag(genome, asset) file_name = TEMPLATE_ASSET_DIR_CONTENTS.format(asset, tag) path, remote = get_datapath_for_genome( rgc, dict(genome=genome, file_name=file_name), remote_key="http" diff --git a/refgenieserver/routers/version3.py b/refgenieserver/routers/version3.py index 6d23915..0a9568d 100644 --- a/refgenieserver/routers/version3.py +++ b/refgenieserver/routers/version3.py @@ -1,19 +1,17 @@ from copy import copy from datetime import date from enum import Enum -from json import load from typing import Optional from fastapi import APIRouter, HTTPException, Path, Query, Response -from refgenconf.exceptions import RefgenconfError from refgenconf.refgenconf import map_paths_by_id from starlette.requests import Request -from starlette.responses import FileResponse, JSONResponse, RedirectResponse +from starlette.responses import FileResponse, RedirectResponse from ubiquerg import parse_registry_path from yacman import IK, UndefinedAliasError from ..const import * -from ..data_models import Dict, Genome, List, Tag +from ..data_models import Dict, List, Tag from ..helpers import ( create_asset_file_path, get_asset_dir_contents, diff --git a/refgenieserver/server_builder.py b/refgenieserver/server_builder.py index 3b296ba..320f57a 100644 --- a/refgenieserver/server_builder.py +++ b/refgenieserver/server_builder.py @@ -54,7 +54,8 @@ def archive(rgc, registry_paths, force, remove, cfg_path, genomes_desc): ) except KeyError: raise GenomeConfigFormatError( - f"The config '{cfg_path}' is missing a {' or '.join([CFG_ARCHIVE_KEY, CFG_ARCHIVE_KEY_OLD])} entry. Can't determine the desired archive." + f"The config '{cfg_path}' is missing a {' or '.join([CFG_ARCHIVE_KEY, CFG_ARCHIVE_KEY_OLD])} entry. " + f"Can't determine the desired archive." ) if os.path.isfile(server_rgc_path) and not os.access(server_rgc_path, os.W_OK): raise OSError( @@ -73,7 +74,8 @@ def archive(rgc, registry_paths, force, remove, cfg_path, genomes_desc): if remove: if not registry_paths: _LOGGER.error( - "To remove archives you have to specify them. Use 'asset_registry_path' argument." + "To remove archives you have to specify them. " + "Use 'asset_registry_path' argument." ) exit(1) with rgc_server as r: @@ -175,10 +177,10 @@ def archive(rgc, registry_paths, force, remove, cfg_path, genomes_desc): for tag_name in tags if isinstance(tags, list) else [tags]: if not rgc.is_asset_complete(genome, asset_name, tag_name): raise MissingConfigDataError( - f"Asset '{genome}/{asset_name}:{tag_name}' is incomplete. This probably means an" - f" attempt to archive a partially pulled parent. " - f"refgenieserver archive requires all assets to be built" - f" prior to archiving." + f"Asset '{genome}/{asset_name}:{tag_name}' is incomplete. " + f"This probably means an attempt to archive a partially " + f"pulled parent. refgenieserver archive requires all assets to " + f"be built prior to archiving." ) file_name = rgc[CFG_GENOMES_KEY][genome][CFG_ASSETS_KEY][asset_name][ CFG_ASSET_TAGS_KEY @@ -225,7 +227,7 @@ def archive(rgc, registry_paths, force, remove, cfg_path, genomes_desc): continue else: _LOGGER.info( - f"Updating '{genome}/{asset_name}:{tag_name}' tag attributes..." + f"Updating '{genome}/{asset_name}:{tag_name}' tag attributes" ) tag_attrs = { CFG_ASSET_PATH_KEY: file_name, @@ -434,7 +436,10 @@ def _copy_recipe(input_dir, target_dir, asset_name, tag_name): :param str asset_name: asset name :param str tag_name: tag name """ - recipe_path = f"{input_dir}/{BUILD_STATS_DIR}/{TEMPLATE_RECIPE_JSON.format(asset_name, tag_name)}" + recipe_path = ( + f"{input_dir}/{BUILD_STATS_DIR}/" + f"{TEMPLATE_RECIPE_JSON.format(asset_name, tag_name)}" + ) if recipe_path and os.path.exists(recipe_path): run("cp " + recipe_path + " " + target_dir, shell=True) _LOGGER.debug(f"Recipe copied to: {target_dir}") From a88b0ec48d60da92e2f9e247ebff75f702a51b13 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Tue, 6 Apr 2021 09:30:02 -0400 Subject: [PATCH 39/44] add dir_contents API test --- .github/workflows/black.yml | 2 +- refgenieserver.postman_collection.json | 51 +++++++++++++++++++++++++- 2 files changed, 51 insertions(+), 2 deletions(-) diff --git a/.github/workflows/black.yml b/.github/workflows/black.yml index f58e4c6..63e1851 100644 --- a/.github/workflows/black.yml +++ b/.github/workflows/black.yml @@ -8,4 +8,4 @@ jobs: steps: - uses: actions/checkout@v2 - uses: actions/setup-python@v2 - - uses: psf/black@stable + - uses: psf/black@20.8b1 diff --git a/refgenieserver.postman_collection.json b/refgenieserver.postman_collection.json index d2d0c0e..1e8da57 100644 --- a/refgenieserver.postman_collection.json +++ b/refgenieserver.postman_collection.json @@ -656,6 +656,55 @@ }, "response": [] }, + { + "name": "get_dir_contents", + "event": [ + { + "listen": "test", + "script": { + "exec": [ + "pm.test(\"Status code is 200\", function () {", + " pm.response.to.have.status(200);", + "});", + "pm.test(\"Content-Type is present\", function () {", + " pm.response.to.have.header(\"Content-Type\");", + "});", + "pm.test(\"JSON is an array of strings\", function () {", + " var jsonData = pm.response.json();", + " pm.expect(jsonData).to.be.an(\"array\");", + " jsonData.forEach(item => pm.expect(item).to.be.a(\"string\"));", + "});", + "" + ], + "type": "text/javascript" + } + } + ], + "request": { + "method": "GET", + "header": [], + "url": { + "raw": "http://{{urlBase}}/assets/dir_contents/{{genomeDigest}}/{{asset}}?tag={{defaultTag}}", + "protocol": "http", + "host": [ + "{{urlBase}}" + ], + "path": [ + "assets", + "dir_contents", + "{{genomeDigest}}", + "{{asset}}" + ], + "query": [ + { + "key": "tag", + "value": "{{defaultTag}}" + } + ] + } + }, + "response": [] + }, { "name": "get_alias_dict", "event": [ @@ -805,4 +854,4 @@ "response": [] } ] -} \ No newline at end of file +} From 43d7c459f76404fb12b1d40e850f0f43547e0105 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Tue, 6 Apr 2021 10:22:29 -0400 Subject: [PATCH 40/44] update changelog --- changelog.md | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/changelog.md b/changelog.md index 3d4a548..9e4a920 100644 --- a/changelog.md +++ b/changelog.md @@ -1,13 +1,22 @@ # Changelog -This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format. - +This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format. ## [0.7.0] -- unreleased +### Added +- `remotes` section in the refgenieserver config, which supersedes `remote_url_base`. It can be used to define multiple remote data providers. +- new endpoints: + - `/remotes/dict` to get `remotes` dictionary + - `/assets/dir_contents/{genome}/{asset}` to get a JSON object (array of strings) listing all files in the asset directory + - `/assets/file_path/{genome}/{asset}/{seek_key}` to get a *remote* path to the specified file +- direct *unarchived* asset file downloads from asset splash page, both for seek_keys and other asset files + +### Changed +- endpoints that return digests return plain text instead of JSON objects for easier parsing; [#67](https://github.com/refgenie/refgenieserver/issues/67) ## [0.6.1] -- 2021-03-18 ### Added -- private endpoint serving genomes dict to openAPI schema; [#105](https://github.com/refgenie/refgenieserver/issues/105) +- private endpoint serving genomes dict to openAPI schema; [#105](https://github.com/refgenie/refgenieserver/issues/105) ## [0.6.0] -- 2021-03-11 ### Added @@ -34,7 +43,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm ### Changed - path specified in `genome_archive_config` is considered relative to the refgenie genome config file, unless absolute. - non-servable assets purging is now performed prior to serving rather than after each archive job completion -- dropped Python 2 support +- dropped Python 2 support ### Removed - support for old `genome_archive` key; use `genome_archive_folder` and `genome_archive_config` from now on. @@ -46,7 +55,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm ## [0.4.3] -- 2020-01-16 ### Added - a possibility to decouple genome archive directory and genome archive config file. `refgenieserver archive` uses new key (`genome_archive_config`) from `refgenconf` -- a genome archive config file writability check +- a genome archive config file writability check ### Changed - key `genome_archive` to `genome_archive_folder`. Backwards compatiblity is preserved (both are currently supported) @@ -57,10 +66,10 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm ## [0.4.1] -- 2019-12-13 ### Fixed -- relationship info not being updated during specific asset archivization; [#70](https://github.com/refgenie/refgenieserver/issues/70) +- relationship info not being updated during specific asset archivization; [#70](https://github.com/refgenie/refgenieserver/issues/70) ### Changed -- order of the assets adn tags in the table in the index page: sorted alphabetically instead of oldest to newest +- order of the assets adn tags in the table in the index page: sorted alphabetically instead of oldest to newest ## [0.4.0] -- 2019-12-06 ### Added @@ -115,7 +124,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm - config manipulation support in multi-process contexts, it's racefree, uses file locks - archive removal support (added `-r` option in `refgenieserver archive`) - asset registry path support in `refgenieserver archive` - + ### Changed - command order from `refgenieserver -c CONFIG -d archive/serve` to `refgenieserver archive/serve -c CONFIG -d` - the genome tarballs are not produced From 57d5447476041d6ee22a7bfed1da2c3895b01cd5 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Wed, 7 Apr 2021 13:09:30 -0400 Subject: [PATCH 41/44] store contents of the directories in asset direcotry too; #114 --- refgenieserver/server_builder.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/refgenieserver/server_builder.py b/refgenieserver/server_builder.py index 320f57a..d3c92ea 100644 --- a/refgenieserver/server_builder.py +++ b/refgenieserver/server_builder.py @@ -418,7 +418,12 @@ def _get_asset_dir_contents(asset_dir, asset_name, tag_name): os.path.dirname(asset_dir), TEMPLATE_ASSET_DIR_CONTENTS.format(asset_name, tag_name), ) - files = os.listdir(asset_dir) + files = [ + os.path.relpath(os.path.join(dp, f), asset_dir) + for dp, dn, fn in os.walk(asset_dir) + for f in fn + if BUILD_STATS_DIR not in dp + ] _LOGGER.debug(f"dir contents: {files}") with open(asset_dir_contents_file_path, "w") as outfile: dump(files, outfile) From 3f63e7d3f3e055e4e85bc9463bfc75697f915e12 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Tue, 27 Apr 2021 08:50:27 -0400 Subject: [PATCH 42/44] dont force remotes to uppercase --- refgenieserver/templates/v3/asset.html | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/refgenieserver/templates/v3/asset.html b/refgenieserver/templates/v3/asset.html index 087c89b..4857628 100644 --- a/refgenieserver/templates/v3/asset.html +++ b/refgenieserver/templates/v3/asset.html @@ -34,7 +34,7 @@

        Attributes

      • {% if is_data_remote %} {% for remote_key, remote_dict in rgc["remotes"].items() %} - {{ remote_key|upper }} {{ "|" if not loop.last }} + {{ remote_key }} {{ "|" if not loop.last }} {% endfor %} {% endif %} {{ seek_key }}: {{ path }} @@ -87,7 +87,7 @@

        Asset directory contents

      • {% if is_data_remote %} {% for remote_key, asset_dir_path in asset_dir_paths.items() %} - {{ remote_key|upper }}{{ ":" if loop.last else " |" }} + {{ remote_key }}{{ ":" if loop.last else " |" }} {% endfor %} {{ file }} {% else %} From 5edcaaeb4e2287f6470c26ce81b2f0a45a3facb8 Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Tue, 27 Apr 2021 12:57:02 -0400 Subject: [PATCH 43/44] use released refgenconf --- requirements/requirements-all.txt | 4 ++-- staging.Dockerfile | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index 1f2ba28..e353ac6 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -2,6 +2,6 @@ aiofiles fastapi jinja2 logmuse>=0.2 +refgenconf>=0.11.0 +ubiquerg>=0.6.1 uvicorn>=0.7.1 -# refgenconf>=0.11.0 -ubiquerg>=0.6.1 \ No newline at end of file diff --git a/staging.Dockerfile b/staging.Dockerfile index 7ab68ee..f74376b 100644 --- a/staging.Dockerfile +++ b/staging.Dockerfile @@ -2,5 +2,5 @@ FROM tiangolo/uvicorn-gunicorn:python3.7-alpine3.8 LABEL authors="Nathan Sheffield, Michal Stolarczyk" COPY . /app -RUN pip install https://github.com/refgenie/refgenconf/archive/dev.zip +#RUN pip install https://github.com/refgenie/refgenconf/archive/dev.zip RUN pip install . From ddf62023e4d90d4ccb9fd73133765ca6cb1472fa Mon Sep 17 00:00:00 2001 From: Michal Stolarczyk Date: Tue, 27 Apr 2021 13:01:00 -0400 Subject: [PATCH 44/44] update version and release date --- changelog.md | 2 +- refgenieserver/_version.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/changelog.md b/changelog.md index 9e4a920..9602475 100644 --- a/changelog.md +++ b/changelog.md @@ -2,7 +2,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) format. -## [0.7.0] -- unreleased +## [0.7.0] -- 2021-04-27 ### Added - `remotes` section in the refgenieserver config, which supersedes `remote_url_base`. It can be used to define multiple remote data providers. - new endpoints: diff --git a/refgenieserver/_version.py b/refgenieserver/_version.py index 5a93ff5..49e0fc1 100644 --- a/refgenieserver/_version.py +++ b/refgenieserver/_version.py @@ -1 +1 @@ -__version__ = "0.7.0-dev" +__version__ = "0.7.0"