From 265b9ddefc204c2d4e0adf9e8fb4e2232968957a Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Thu, 2 Nov 2023 21:59:50 +0100 Subject: [PATCH] Work on #33 --- bbconf/bbconf.py | 79 +++++++++++++++++++++++++----------------------- bbconf/const.py | 2 ++ bbconf/models.py | 28 +++++++++++++++++ 3 files changed, 71 insertions(+), 38 deletions(-) create mode 100644 bbconf/models.py diff --git a/bbconf/bbconf.py b/bbconf/bbconf.py index 5e325cd..ada24dc 100644 --- a/bbconf/bbconf.py +++ b/bbconf/bbconf.py @@ -37,9 +37,11 @@ DEFAULT_VEC2VEC_MODEL, DEFAULT_REGION2_VEC_MODEL, DRS_ACCESS_URL, + CFG_ACCESS_METHOD_KEY, ) from bbconf.exceptions import MissingConfigDataError, BedBaseConfError from bbconf.helpers import raise_missing_key, get_bedbase_cfg +from bbconf.models import DRSModel, AccessMethod, AccessURL # os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" # to suppress verbose warnings tensorflow from geniml.text2bednn import text2bednn @@ -592,49 +594,50 @@ def get_prefixed_uri(self, postfix: str, remote_class: str = "http") -> str: """ return os.path.join(self.prefix(remote_class), postfix) - def get_bed_drs_metadata(self, object_id: str) -> dict: + # TODO: fix it - url is incorrect + def get_bed_url(self, object_id: str, access_id: str = "https") -> str: """ + Build bed file url using drs standard - - :param object_id: - :return: + :param object_id: digest of the bed file + :param access_id: https or s3 + :return: bed file url """ - bed_metadata = self.bed.retrieve(object_id) - drs_dict = { - "id": object_id, - "size": bed_metadata["file_size"], - "created_time": bed_metadata["created_time"], - "checksums": object_id, - "access_methods": [], - } - # add access method for each remote class - for access_id in self.config[CFG_REMOTE_KEY].keys(): - access_dict = { - "type": "https", - "access_id": access_id, - "access_url": DRS_ACCESS_URL.format( - server_url=self.config["access_methods"][access_id]["server_url"], - object_id=object_id, - access_id=access_id, - ), - } - access_dict["region"] = ( - self.config["access_methods"][access_id]["region"] or None - ) - drs_dict["access_methods"].append(access_dict) - return drs_dict + # access_url = DRS_ACCESS_URL.format( + # server_url=self.config[CFG_ACCESS_METHOD_KEY][access_id]["server_url"], + # object_id=object_id, + # access_id=access_id, + # ) + # return access_url + return os.path.join(self.config[CFG_ACCESS_METHOD_KEY][access_id]["server_url"], self.bed.retrieve(object_id)["bedfile"]["path"]) - def get_bed_url(self, object_id: str, access_id: str) -> str: + def get_bed_drs_metadata(self, object_id: str) -> DRSModel: """ + Get DRS metadata for a bed file - - :param object_id: - :param access_id: - :return: + :param object_id: record identifier + :return: DRS metadata """ - access_url = DRS_ACCESS_URL.format( - server_url=self.config["access_methods"][access_id]["server_url"], - object_id=object_id, - access_id=access_id, + bed_metadata = self.bed.retrieve(object_id) + access_methods = [] + for access_id in self.config[CFG_ACCESS_METHOD_KEY].keys(): + access_dict = AccessMethod( + type=access_id, + access_id=access_id, + access_url=AccessURL(url=self.get_bed_url(object_id, access_id)), + region=self.config[CFG_ACCESS_METHOD_KEY][access_id].get( + "region", None + ), + ) + access_methods.append(access_dict) + + drs_dict = DRSModel( + id=object_id, + size=bed_metadata["bedfile"]["size"], + created_time=bed_metadata["pipestat_created_time"], + updated_time=bed_metadata["pipestat_modified_time"], + checksums=object_id, + access_methods=access_methods, ) - return access_url + + return drs_dict diff --git a/bbconf/const.py b/bbconf/const.py index dd33feb..26ef011 100644 --- a/bbconf/const.py +++ b/bbconf/const.py @@ -64,6 +64,8 @@ DB_DEFAULT_PORT = 5432 DB_DEFAULT_DIALECT = "postgresql" +CFG_ACCESS_METHOD_KEY = "access_methods" + DEFAULT_QDRANT_HOST = "localhost" DEFAULT_QDRANT_PORT = 6333 DEFAULT_QDRANT_COLLECTION_NAME = "bedbase" diff --git a/bbconf/models.py b/bbconf/models.py new file mode 100644 index 0000000..299a3d5 --- /dev/null +++ b/bbconf/models.py @@ -0,0 +1,28 @@ +import datetime +from typing import Optional, List + +from pydantic import BaseModel, Field + + +class AccessURL(BaseModel): + url: str + headers: Optional[dict] = None + + +class AccessMethod(BaseModel): + type: str + access_url: Optional[AccessURL] = None + access_id: Optional[str] = None + region: Optional[str] = None + + +class DRSModel(BaseModel): + id: str + name: Optional[str] + self_uri: str + size: str + created_time: Optional[datetime.datetime] + updated_time: Optional[datetime.datetime] + checksums: str + access_methods: List[AccessMethod] + description: Optional[str] = None