diff --git a/.github/actions/setup-bfabricpy/action.yml b/.github/actions/setup-bfabricpy/action.yml new file mode 100644 index 00000000..42de3bea --- /dev/null +++ b/.github/actions/setup-bfabricpy/action.yml @@ -0,0 +1,18 @@ +name: "Setup bfabricPy" +description: "Set up bfabricPy for use in GitHub Actions" +inputs: + python-version: + description: "Python version to use" + required: true +runs: + using: "composite" + steps: + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ inputs.python-version }} + - name: Install bfabricPy + shell: bash + run: | + pip install uv + uv pip install --system ".[dev]" diff --git a/.github/workflows/run_functional_test.yml b/.github/workflows/run_functional_test.yml index 45a963f9..dea4b6df 100644 --- a/.github/workflows/run_functional_test.yml +++ b/.github/workflows/run_functional_test.yml @@ -5,12 +5,12 @@ name: functionalTest concurrency: functional-test-group on: - push: - branches: [ main ] - schedule: - - cron: "00 08 * * *" - pull_request: - branches: [ main ] + #push: + # branches: [ main ] + #schedule: + # - cron: "00 08 * * *" + #pull_request: + # branches: [ main ] workflow_dispatch: jobs: diff --git a/.github/workflows/run_unit_tests.yml b/.github/workflows/run_unit_tests.yml index 85543fd4..d4df8852 100644 --- a/.github/workflows/run_unit_tests.yml +++ b/.github/workflows/run_unit_tests.yml @@ -1,4 +1,4 @@ -name: unit tests +name: PR Checks on: push: @@ -8,15 +8,45 @@ on: workflow_dispatch: jobs: - build: + unit_tests: + name: Unit Tests runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 - - name: Set up Python 3.9 - uses: actions/setup-python@v2 + - uses: actions/checkout@v4 + - uses: ./.github/actions/setup-bfabricpy with: python-version: 3.9 - - name: Install bfabricPy - run: python -m pip install . - name: Run unit tests - run: python -m unittest discover -s bfabric/tests/unit -p 'test_*.py' + # Note: we use cd to double-check that the installation actually worked + run: cd bfabric/tests && python -m unittest discover -s ./unit -p 'test_*.py' + code_style: + name: Code Style + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: ./.github/actions/setup-bfabricpy + with: + python-version: 3.9 + - name: Check code with ruff + run: + ruff bfabric || true + list_todos: + name: List TODOs + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - run: sudo apt-get install -y ripgrep + name: Install ripgrep + - run: + rg -n TODO bfabric + name: List TODOs + license_check: + name: License Check + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: ./.github/actions/setup-bfabricpy + with: + python-version: 3.9 + - name: Check licenses + run: licensecheck diff --git a/.gitignore b/.gitignore index 5c4e49d1..d6656746 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ .idea/ +__pycache__ bfabric.egg-info/ -bfabric/__pycache__/ bfabric/scripts/query_result.txt +build/ dist/ diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index 95084b27..00000000 --- a/MANIFEST.in +++ /dev/null @@ -1,2 +0,0 @@ -recursive-include bfabric/scripts bfabric_delete.py bfabric_flask.py bfabric_read.py bfabric_save_resource.py -include README.md setup.py requirements.txt diff --git a/Makefile b/Makefile index a8837c76..c6d3000e 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,6 @@ -test: test_workunit test_read +.PHONY: install install_dev build clean + +test: test_read test_workunit: python3 -m unittest -v bfabric/tests/test_bfabric_workunit.py @@ -6,12 +8,11 @@ test_workunit: test_read: cd bfabric/tests && python3 -m unittest -v test_bfabric_read.py -install: test build - sudo pip3 install dist/bfabric*.gz -e . - -build: clean - python3 setup.py sdist +install: + pip install -e . +install_dev: + pip install -e ".[dev]" clean: rm -vf dist/* diff --git a/README.md b/README.md index 423430f3..4fd37d38 100644 --- a/README.md +++ b/README.md @@ -1,44 +1,86 @@ -![functionTest](https://github.com/fgcz/bfabricPy/workflows/functionalTest/badge.svg) +![unitTests](https://github.com/fgcz/bfabricPy/workflows/unit%20tests/badge.svg) [![EDBT'10](https://img.shields.io/badge/EDBT-10.1145%2F1739041.1739135-brightgreen)](https://doi.org/10.1145/1739041.1739135) [![JIB](https://img.shields.io/badge/JIB-10.1515%2Fjib.2022.0031-brightgreen)](https://doi.org/10.1515/jib-2022-0031) -![Downloads](https://img.shields.io/github/downloads/fgcz/bfabricPy/total) - - # bfabricPy - This package connects the [bfabric](https://fgcz-bfabric.uzh.ch/bfabric/) system to the [python](https://www.python.org/) and [R](https://cran.r-project.org/) world while providing a JSON and REST interface using [Flask](https://www.fullstackpython.com). The [bfabricShiny](https://github.com/cpanse/bfabricShiny) R package is an extension and provides code snippets and sample implementation for a seamless R shiny bfabric integration. For more advanced users the *bfabricPy* package also provides a powerful query interface on the command-line though using the provided scripts. +## Install +The package can be installed like any other Python package, so if you are familiar you might not need to read this section. +Currently, it's only available from GitHub. -![bfabricPy-read](https://user-images.githubusercontent.com/4901987/65025926-db77c900-d937-11e9-8c92-f2412d6793ee.gif) -[see also #14](https://github.com/fgcz/bfabricPy/issues/14) +The best way to install the package depends on your use case, i.e. whether you want to: -## Requirements +1. Use the command line scripts +2. Use the Python API +3. Develop on the package -- install current stable Debian Linux release (any current BSD like or Microsoft OS will do) +The command line scripts are currently included in all cases. -- install the python3 package as follows: +### Command line scripts +To use the command line scripts, it's recommended to install `bfabricPy` with [pipx](https://pipx.pypa.io/). +If you don't have `pipx` installed, refer to the [pipx documentation](https://pipx.pypa.io/stable/installation/) for instructions. -```{bash} -git clone git@github.com:fgcz/bfabricPy.git \ - && cd bfabricPy +You can execute a command using a specific version of `bfabricPy` with the `pipx run` command. +This command handles the dependencies of multiple concurrent installations: + +```bash +pipx run --spec "git+https://github.com/fgcz/bfabricPy.git@0.13.8" bfabric_read.py --help ``` -## Install +To install a specific version of bfabricPy on your system and make the command available without `pipx run` prefix, use the following command: +```bash +pipx install "git+https://github.com/fgcz/bfabricPy.git@0.13.8" +bfabric_read.py --help +``` + +### Python API +If you're interested in using the Python API of `bfabricPy`, you have two options: + +#### 1. Configure it in your `pyproject.toml` file. +```toml +[project] +dependencies = [ + "bfabricPy @ git+https://github.com/fgcz/bfabricPy.git@main" +] +``` + +#### 2. Install the `bfabricPy` package directly using pip. +```bash +pip install git+https://github.com/fgcz/bfabricPy.git +```` + +### Development +As a bfabricPy developer: (i.e. an editable install) ```{bash} -python3 setup.py install --user +pip install -e ".[dev]" ``` ## Configuration +Create a file as follows: (note: the password is not your login password, but the web service password) + +```{yaml} +# ~/.bfabricpy.yml + +GENERAL: + default_config: PRODUCTION + +PRODUCTION: + login: yourBfabricLogin + password: yourBfabricWebPassword + base_url: https://fgcz-bfabric.uzh.ch/bfabric +``` -```{bash} -cat ~/.bfabricrc.py -_WEBBASE="https://fgcz-bfabric-test.uzh.ch/bfabric" -_LOGIN="yourBfabricLogin" -_PASSWD='yourBfabricWebPassword' +You can also include an additional config for the TEST instance + +```{yaml} +TEST: + login: yourBfabricLogin + password: yourBfabricWebPassword + base_url: https://fgcz-bfabric-test.uzh.ch/bfabric ``` ## CheatSheet @@ -65,41 +107,50 @@ bfabric_read.py workunit status failed bfabric_read.py resource filechecksum d41d8cd98f00b204e9800998ecf8427e ``` -call the `python3` interpreter and enter +Using the Python API: ```{py} -import bfabric +from bfabric import Bfabric -B = bfabric.Bfabric() +client = Bfabric.from.config() -user = B.read_object(endpoint = 'user', obj={'login': 'cpanse'}) -resource = B.read_object(endpoint = 'resource', obj={'id': 550327 }) +user = B.read(endpoint = 'user', obj={'login': 'cpanse'}) +resource = B.read(endpoint = 'resource', obj={'id': 550327 }) ``` ### save -``` -rv = B.save_object('workunit', {'id': 254063, 'status': 'available'}) -B.print_json(rv) -# print(rv) +```{bash} +bfabric_save_workunit_attribute.py 199387 status available ``` -### Command line code snippets - -remove pending workunits from the past -```{bash} - bfabric_read.py workunit status pending \ - | awk '$2~/cpanse/ && $3~/2015/{print $1}' - | fgcz_bfabric_delete_workunits.py +```{python} +import json +rv = client.save('workunit', {'id': 254063, 'status': 'available'}) +print(json.dumps(rv.to_list_dict(), indent=2)) ``` -find empty resource files in bfabric +### Command line code snippet +Find empty resource files in bfabric ```{bash} bfabric_read.py resource filechecksum `md5sum < /dev/null | cut -c-32` \ | cat -n \ | tail ``` -## Examples +## Testing +Please be advised that integration tests will write to the `TEST` instance configured in your `~/.bfabricpy.yml` config file. + +Run unit tests: +```{bash} +python3 -m unittest discover -s "bfabric/tests/unit" +``` + +Run integration tests (see note above): +```{bash} +python3 -m unittest discover -s "bfabric/tests/integration" +``` + +## Examples [outdated] ### bash script generated by the yaml wrapper creator / submitter @@ -258,7 +309,7 @@ bfabric_read.py importresource \ done ``` -## Send an E-mail +## Send an E-mail [outdated] ``` # by CT,CP @@ -272,11 +323,7 @@ rv = B.save_object(endpoint = 'mail', # shown as mail for user id 482 ``` -## Testing -```{sh} -cd bfabric/tests/ && python3 -m unittest discover; echo $?; cd - -``` ## See also @@ -287,25 +334,20 @@ cd bfabric/tests/ && python3 -m unittest discover; echo $?; cd - ## FAQ - ### How to resolve ` Christian Panse - - -History - The python3 library first appeared in 2014. + Leonardo Schwarz + Aleksejs Fomins """ -from typing import Dict, Any +from __future__ import annotations -import yaml -import json +import base64 +import importlib.metadata +import logging import sys +from contextlib import contextmanager +from datetime import datetime +from enum import Enum +from pathlib import Path from pprint import pprint +from typing import Literal, ContextManager, Any -from bfabric.bfabric_config import BfabricAuth, BfabricConfig, parse_bfabricrc_py -from suds.client import Client -from suds.wsdl import Service +from rich.console import Console -import hashlib -import os -import base64 -import datetime -import logging.config - -logging.config.dictConfig({ - 'version': 1, - 'formatters': { - 'verbose': { - 'format': 'DEBUG %(name)s: %(message)s' - } - }, - 'handlers': { - 'console': { - 'level': 'DEBUG', - 'class': 'logging.StreamHandler', - 'formatter': 'verbose', - }, - }, - 'loggers': { - 'zeep.transports': { - 'level': 'DEBUG', - 'propagate': True, - 'handlers': ['console'], - }, - } -}) - -import bfabric.gridengine as gridengine -import bfabric.slurm as slurm - - -class bfabricEncoder(json.JSONEncoder): - """ - Implements json encoder for the Bfabric.print_json method - """ - def default(self, o): - try: - return dict(o) - except TypeError: - pass - else: - return list(o) - return JSONEncoder.default(self, o) +from bfabric.bfabric_config import BfabricAuth, read_config +from bfabric.bfabric_config import BfabricConfig +from bfabric.cli_formatting import HostnameHighlighter, DEFAULT_THEME +from bfabric.engine.engine_suds import EngineSUDS +from bfabric.engine.engine_zeep import EngineZeep +from bfabric.results.result_container import ResultContainer +from bfabric.utils.paginator import compute_requested_pages, BFABRIC_QUERY_LIMIT -class Bfabric(object): - """B-Fabric python3 module - Implements read and save object methods for B-Fabric wsdl interface - """ - def warning(self, msg): - sys.stderr.write("\033[93m{}\033[0m\n".format(msg)) +class BfabricAPIEngineType(Enum): + """Choice of engine to use.""" - def __init__(self, login=None, password=None, webbase=None, externaljobid=None, bfabricrc=None, verbose=False): - self.verbose = verbose + SUDS = 1 + ZEEP = 2 - self.cl = {} - self.verbose = False - self.query_counter = 0 - - bfabricrc = bfabricrc or os.path.normpath(os.path.expanduser("~/.bfabricrc.py")) - if not os.path.isfile(bfabricrc): - self.warning("could not find '.bfabricrc.py' file in home directory.") - self.config = BfabricConfig(base_url=webbase) - self.auth = BfabricAuth(login=login, password=password) - else: - with open(bfabricrc, "r", encoding="utf-8") as file: - config, auth = parse_bfabricrc_py(file) - self.config = config.with_overrides(base_url=webbase) - self.auth = auth if login is None and password is None else BfabricAuth(login=login, password=password) - if not self.auth.login or not self.auth.password: - raise ValueError("login or password missing") - - if self.verbose: - pprint(self.config) +# TODO: How to deal with save-skip fields in Zeep? Does it happen in SUDS? +class Bfabric: + """Bfabric client class, providing general functionality for interaction with the B-Fabric API. + Use `Bfabric.from_config` to create a new instance. + :param config: Configuration object + :param auth: Authentication object (if `None`, it has to be provided using the `with_auth` context manager) + :param engine: Engine to use for the API. Default is SUDS. + :param verbose: Print a system info message to standard error console + """ - msg = f"\033[93m--- webbase {self.config.base_url}; login; {self.auth.login} ---\033[0m\n" - sys.stderr.write(msg) + def __init__( + self, + config: BfabricConfig, + auth: BfabricAuth | None, + engine: BfabricAPIEngineType = BfabricAPIEngineType.SUDS, + verbose: bool = False, + ) -> None: + self.query_counter = 0 + self._config = config + self._auth = auth - def read_object(self, endpoint, obj, page=1, plain=False, idonly=False): + if engine == BfabricAPIEngineType.SUDS: + self.engine = EngineSUDS(base_url=config.base_url) + elif engine == BfabricAPIEngineType.ZEEP: + self.engine = EngineZeep(base_url=config.base_url) + else: + raise ValueError(f"Unexpected engine: {engine}") + + if verbose: + self.print_version_message() + + @classmethod + def from_config( + cls, + config_env: str | None = None, + config_path: str | None = None, + auth: BfabricAuth | Literal["config"] | None = "config", + engine: BfabricAPIEngineType = BfabricAPIEngineType.SUDS, + verbose: bool = False, + ) -> Bfabric: + """Returns a new Bfabric instance, configured with the user configuration file. + If the `config_env` is specified then it will be used, if it is not specified the default environment will be + determined by checking the following in order (picking the first one that is found): + - The `BFABRICPY_CONFIG_ENV` environment variable + - The `default_config` field in the config file "GENERAL" section + :param config_env: Configuration environment to use. If not given, it is deduced as described above. + :param config_path: Path to the config file, in case it is different from default + :param auth: Authentication to use. If "config" is given, the authentication will be read from the config file. + If it is set to None, no authentication will be used. + :param engine: Engine to use for the API. Default is SUDS. + :param verbose: Print a system info message to standard error console """ - A generic method which can connect to any endpoint, e.g., workunit, project, order, - externaljob, etc, and returns the object with the requested id. - obj is a python dictionary which contains all the attributes of the endpoint - for the "query". + config, auth_config = get_system_auth(config_env=config_env, config_path=config_path) + auth_used: BfabricAuth | None = auth_config if auth == "config" else auth + return cls(config, auth_used, engine=engine, verbose=verbose) + + @property + def config(self) -> BfabricConfig: + """Returns the config object.""" + return self._config + + @property + def auth(self) -> BfabricAuth: + """Returns the auth object. + :raises ValueError: If authentication is not available """ - return self._perform_request( - endpoint=endpoint, - method="read", - plain=plain, - params=dict(query=obj, idonly=idonly, page=page) - ) - - def readid_object(self, endpoint, obj, page=1, plain=False): + if self._auth is None: + raise ValueError("Authentication not available") + return self._auth + + @contextmanager + def with_auth(self, auth: BfabricAuth) -> ContextManager[Bfabric]: + """Context manager that temporarily (within the scope of the context) sets the authentication for + the Bfabric object to the provided value. This is useful when authenticating multiple users, to avoid accidental + use of the wrong credentials. """ - A generic method which can connect to any endpoint, e.g., workunit, project, order, - externaljob, etc, and returns the object with the requested id. - obj is a python dictionary which contains only the id of the endpoint for the "query". + old_auth = self._auth + self._auth = auth + try: + yield + finally: + self._auth = old_auth + + def read( + self, + endpoint: str, + obj: dict[str, Any], + max_results: int | None = 100, + offset: int = 0, + check: bool = True, + return_id_only: bool = False, + ) -> ResultContainer: + """Reads from the specified endpoint matching all specified attributes in `obj`. + By setting `max_results` it is possible to change the number of results that are returned. + :param endpoint: the endpoint to read from, e.g. "sample" + :param obj: a dictionary containing the query, for every field multiple possible values can be provided, the + final query requires the condition for each field to be met + :param max_results: cap on the number of results to query. The code will keep reading pages until all pages + are read or expected number of results has been reached. If None, load all available pages. + NOTE: max_results will be rounded upwards to the nearest multiple of BFABRIC_QUERY_LIMIT, because results + come in blocks, and there is little overhead to providing results over integer number of pages. + :param offset: the number of elements to skip before starting to return results (useful for pagination, default + is 0 which means no skipping) + :param check: whether to raise an error if the response is not successful + :param return_id_only: whether to return only the ids of the found objects + :return: List of responses, packaged in the results container """ - return self._perform_request( - endpoint=endpoint, - method="readid", - plain=plain, - params=dict(query=obj, page=page) + # Get the first page. + # NOTE: According to old interface, this is equivalent to plain=True + results = self.engine.read(endpoint=endpoint, obj=obj, auth=self.auth, page=1, return_id_only=return_id_only) + n_available_pages = results.total_pages_api + if not n_available_pages: + if check: + results.assert_success() + return results.get_first_n_results(max_results) + + # Get results from other pages as well, if need be + requested_pages, initial_offset = compute_requested_pages( + n_page_total=n_available_pages, + n_item_per_page=BFABRIC_QUERY_LIMIT, + n_item_offset=offset, + n_item_return_max=max_results, ) - - def save_object(self, endpoint, obj, debug=None): + logging.info(f"Requested pages: {requested_pages}") + + # NOTE: Page numbering starts at 1 + response_items = [] + errors = results.errors + page_offset = initial_offset + for i_iter, i_page in enumerate(requested_pages): + if not (i_iter == 0 and i_page == 1): + print(f"-- reading page {i_page} of {n_available_pages}", file=sys.stderr) + results = self.engine.read( + endpoint=endpoint, obj=obj, auth=self.auth, page=i_page, return_id_only=return_id_only + ) + errors += results.errors + + response_items += results[page_offset:] + page_offset = 0 + + result = ResultContainer(response_items, total_pages_api=n_available_pages, errors=errors) + if check: + result.assert_success() + return result.get_first_n_results(max_results) + + def save(self, endpoint: str, obj: dict[str, Any], check: bool = True) -> ResultContainer: + """Saves the provided object to the specified endpoint. + :param endpoint: the endpoint to save to, e.g. "sample" + :param obj: the object to save + :param check: whether to raise an error if the response is not successful + :return a ResultContainer describing the saved object if successful """ - same as read_object above but uses the save method. + results = self.engine.save(endpoint=endpoint, obj=obj, auth=self.auth) + if check: + results.assert_success() + return results + + def delete(self, endpoint: str, id: int | list[int], check: bool = True) -> ResultContainer: + """Deletes the object with the specified ID from the specified endpoint. + :param endpoint: the endpoint to delete from, e.g. "sample" + :param id: the ID of the object to delete + :param check: whether to raise an error if the response is not successful + :return a ResultContainer describing the deleted object if successful """ - return self._perform_request( - endpoint=endpoint, - method="save", - plain=debug is not None, - params={endpoint: obj} - ) - - def checkandinsert_object(self, endpoint, obj, debug=None): - """ - wsdl method to check iff dependencies are fulfilled + results = self.engine.delete(endpoint=endpoint, id=id, auth=self.auth) + if check: + results.assert_success() + return results + + def exists( + self, endpoint: str, key: str, value: int | str, query: dict[str, Any] | None = None, check: bool = True + ) -> bool: + """Returns whether an object with the specified key-value pair exists in the specified endpoint. + Further conditions can be specified in the query. + :param endpoint: the endpoint to check, e.g. "sample" + :param key: the key to check, e.g. "id" + :param value: the value to check, e.g. 123 + :param query: additional query conditions (optional) + :param check: whether to raise an error if the response is not successful """ - # TODO This method was changed a while ago to use the "save"endpoint, which makes it functionally identical - # to the save_object method. Check if this was intended. - return self._perform_request( - endpoint=endpoint, - method="save", - plain=debug is not None, - params={endpoint: obj} + query = query or {} + results = self.read( + endpoint=endpoint, obj={**query, key: value}, max_results=1, check=check, return_id_only=True ) - - def delete_object(self, endpoint, id=None, debug=None): - """ - same as read_object above but uses the delete method. + return len(results) > 0 + + def upload_resource( + self, resource_name: str, content: bytes, workunit_id: int, check: bool = True + ) -> ResultContainer: + """Uploads a resource to B-Fabric, only intended for relatively small files that will be tracked by B-Fabric + and not one of the dedicated experimental data stores. + :param resource_name: the name of the resource to create (the same name can only exist once per workunit) + :param content: the content of the resource as bytes + :param workunit_id: the workunit ID to which the resource belongs + :param check: whether to check for errors in the response """ - return self._perform_request( - endpoint=endpoint, - method="delete", - plain=debug is not None, - params=dict(id=id) + content_encoded = base64.b64encode(content).decode() + return self.save( + endpoint="resource", + obj={ + "base64": content_encoded, + "name": resource_name, + "description": "base64 encoded file", + "workunitid": workunit_id, + }, + check=check, ) - def upload_file(self, filename, workunitid): - with open(filename, 'rb') as f: - content = f.read() - - resource_base64 = base64.b64encode(content).decode() - - res = self.save_object('resource', {'base64': resource_base64, - 'name': os.path.basename(filename), - 'description': "base64 encoded file", - 'workunitid': workunitid}) - - return res - - def _get_service(self, endpoint: str) -> Service: - """Returns a `suds.client.Service` object for the given endpoint name.""" - if endpoint not in self.cl: - self.cl[endpoint] = Client(f"{self.config.base_url}/{endpoint}?wsdl", cache=None) - return self.cl[endpoint].service - - def _perform_request( - self, endpoint: str, method: str, plain: bool, params: Dict[str, Any] - ) -> Any: - """Performs a request to the given endpoint and returns the result.""" - self.query_counter += 1 - request_params = dict(login=self.auth.login, password=self.auth.password, **params) - service = self._get_service(endpoint=endpoint) - response = getattr(service, method)(request_params) - if plain: - return response - elif getattr(response, "entitiesonpage", None) == 0: - return [] - return getattr(response, endpoint) - - @staticmethod - def print_json(queryres=None): - """ - This method prints the query result as returned by ``read_object`` in JSON format. - - Parameter - --------- - - queryres : the object returned by ``read_object`` method. - """ - if queryres is None: - raise TypeError("print_json() missing 1 required positional argument: please provide the output from read_object as parameter to print_json") - - res = json.dumps(queryres, cls=bfabricEncoder, sort_keys=True, indent=2) - print(res) - - @staticmethod - def print_yaml(queryres=None): - """ - This method prints the query result as returned by ``read_object`` in YAML format. - - Parameter - --------- - - queryres : the object returned by ``read_object`` method. - """ - if queryres is None: - raise TypeError("print_yaml() missing 1 required positional argument: please provide the output from read_object as parameter to print_yaml") - - res_json = json.dumps(queryres, cls=bfabricEncoder, sort_keys=True) - res = yaml.dump(res_json, default_flow_style=False, encoding=None, default_style=None) - print(res) - - def get_sampleid(self, resourceid=None): - """ - determines the sample_id of a given resource_id. - it performs a recursive dfs. - TODO(cp): check if the method should be implemented using a stack - - :param resourceid: - :return: (int, int) - """ - - assert isinstance(resourceid, int) - - try: - resource = self.read_object('resource', obj={'id': resourceid})[0] - except: - return (None) - - try: - workunit = self.read_object(endpoint='workunit', obj={'id': resource.workunit._id})[0] - return (self.get_sampleid(resourceid=int(workunit.inputresource[0]._id))) - except: - self.warning("fetching sampleid of resource.workunitid = {} failed.".format(resource.workunit._id)) - return (None) - -class BfabricFeeder(Bfabric): - """ - this class is used for reporting 'resource' status - """ - - def report_resource(self, resourceid): - """ - this function determines the 'md5 checksum', 'the file size', - and set the status of the resource available. - - this is gonna executed on the storage host - - """ - res = self.read_object('resource', {'id': resourceid})[0] - print (res) - - if not hasattr(res, 'storage'): - return -1 - - storage = self.read_object('storage', {'id': res.storage._id})[0] - - filename = "{0}/{1}".format(storage.basepath, res.relativepath) - - if os.path.isfile(filename): - try: - fmd5 = hashlib.md5(open(filename, 'rb').read()).hexdigest() - print ("md5sum ({}) = {}".format(filename, fmd5)) - - fsize = int(os.path.getsize(filename)) + 1 - print ("size ({}) = {}".format(filename, fsize)) - - - return self.save_object('resource', {'id': resourceid, - 'size': fsize, - 'status': 'available', - 'filechecksum': fmd5}) - except: - print ("computing md5 failed") - # print ("{} {}".format(Exception, err)) - raise - - return self.save_object('resource', {'id': resourceid, 'status': 'failed'}) - - -class BfabricExternalJob(Bfabric): - """ - ExternalJobs can use logging. - if you have a valid externaljobid use this class instead of - using Bfabric. - - - TODO check if an external job id is provided - """ - externaljobid = None - - def __init__(self, login=None, password=None, externaljobid=None): - super(BfabricExternalJob, self).__init__(login, password) - if not externaljobid: - print("Error: no externaljobid provided.") - raise - else: - self.externaljobid = externaljobid - - print(("BfabricExternalJob externaljobid={}".format(self.externaljobid))) - - def logger(self, msg): - if self.externaljobid: - super(BfabricExternalJob, self).save_object('externaljob', {'id': self.externaljobid, 'logthis': str(msg)}) - else: - print((str(msg))) - - def save_object(self, endpoint, obj, debug=None): - res = super(BfabricExternalJob, self).save_object(endpoint, obj, debug) - jsonres = json.dumps(res, cls=bfabricEncoder, sort_keys=True, indent=2) - self.logger('saved ' + endpoint + '=' + str(jsonres)) - return res - - def get_workunitid_of_externaljob(self): - print(("DEBUG get_workunitid_of_externaljob self.externaljobid={}".format(self.externaljobid))) - res = self.read_object(endpoint='externaljob', obj={'id': self.externaljobid})[0] - print(res) - print("DEBUG END") - workunit_id = None - try: - workunit_id = res.cliententityid - print(("workunitid={}".format(workunit_id))) - except: - pass - return workunit_id - - def get_application_name(self): - workunitid = self.get_workunitid_of_externaljob() - if workunitid is None: - raise ValueError("no workunit available for the given externaljobid.") - workunit = self.read_object(endpoint='workunit', obj={'id': workunitid})[0] - if workunit is None: - raise ValueError("ERROR: no workunit available for the given externaljobid.") - assert isinstance(workunit._id, int) - application = self.read_object('application', obj={'id': workunit.application._id})[0] - return application.name.replace(' ', '_') - - - def get_executable_of_externaljobid(self): - """ - It takes as input an `externaljobid` and fetches the the `executables` - out of the bfabric system using wsdl into a file. - returns a list of executables. - - todo: this function should check if base64 is provided or - just a program. - """ - workunitid = self.get_workunitid_of_externaljob() - if workunitid is None: - return None - - executables = list() - for executable in self.read_object(endpoint='executable', obj={'workunitid': workunitid}): - if hasattr(executable, 'base64'): - executables.append(executable) - - return executables if len(executables) > 0 else None - - -class BfabricSubmitter(): - """ - the class is used by the submitter which is executed by the bfabric system. - """ - - (G, B) = (None, None) - - workunitid = None - workunit = None - parameters = None - execfilelist = [] - slurm_dict = {"MaxQuant_textfiles_sge" : {'partition': "prx", 'nodelist': "fgcz-r-033", 'memory':"1G"}, - "fragpipe" : {'partition': "prx", 'nodelist': "fgcz-r-033", 'memory':"256G"}, - "MaxQuant" : {'partition': "maxquant", 'nodelist': "fgcz-r-033", 'memory':"4G"}, - "scaffold_generic" : {'partition': "scaffold", 'nodelist': "fgcz-r-033", 'memory':"256G"}, - "MSstats dataProcess" : {'partition': "prx", 'nodelist': "fgcz-r-033", 'memory':"64G"}, - "MaxQuant_sampleSizeEstimation" : {'partition': "prx", 'nodelist': "fgcz-r-028", 'memory': "2G"}, - "ProteomeDiscovererQC" : {'partition': "prx", 'nodelist': "fgcz-r-035", 'memory': "2G"} - } - - def __init__(self, login=None, password=None, externaljobid=None, - user='*', node="PRX@fgcz-r-018", partition="prx", nodelist="fgcz-r-028", memory="10G", SCHEDULEROOT='/export/bfabric/bfabric/', scheduler="GridEngine"): - """ - :rtype : object - """ - self.B = BfabricExternalJob(login=login, password=password, externaljobid=externaljobid) - self.partition = partition - self.nodelist = nodelist - self.memory = memory - self.SCHEDULEROOT = SCHEDULEROOT - self.user = user - self.scheduler = scheduler - - print(self.B.auth.login) - print(self.B.externaljobid) - - self.workunitid = self.B.get_workunitid_of_externaljob() - - try: - self.workunit = self.B.read_object(endpoint='workunit', obj={'id': self.workunitid})[0] - except: - print ("ERROR: could not fetch workunit while calling constructor in BfabricSubmitter.") - raise - - - try: - self.parameters = [self.B.read_object(endpoint='parameter', obj={'id': x._id})[0] for x in self.workunit.parameter] - except: - self.parameters = list() - print ("Warning: could not fetch parameter.") - - partition = [x for x in self.parameters if x.key == "partition"] - nodelist = [x for x in self.parameters if x.key == "nodelist"] - memory = [x for x in self.parameters if x.key == "memory"] - application_name = self.B.get_application_name() - - if len(partition) > 0 and len(nodelist) > 0 and len(memory)>0: - self.partition = partition[0].value - self.nodelist = nodelist[0].value - self.memory = memory[0].value - elif "queue" in [x.key for x in self.parameters] and application_name in self.slurm_dict: - # Temporary check for old workunit previously run with SGE - self.partition = self.slurm_dict[application_name]['partition'] - self.nodelist = self.slurm_dict[application_name]['nodelist'] - self.memory = self.slurm_dict[application_name]['memory'] - else: - pass - - print(("partition={0}".format(self.partition))) - print(("nodelist={0}".format(self.nodelist))) - print(("memory={0}".format(self.memory))) - print("__init__ DONE") - - - def submit_gridengine(self, script="/tmp/runme.bash", arguments=""): - - GE = gridengine.GridEngine(user=self.user, queue=self.queue, GRIDENGINEROOT=self.SCHEDULEROOT) - - print(script) - print((type(script))) - resQsub = GE.qsub(script=script, arguments=arguments) - - self.B.logger("{}".format(resQsub)) - - - def submit_slurm(self, script="/tmp/runme.bash", arguments=""): - - SL = slurm.SLURM(user=self.user, SLURMROOT=self.SCHEDULEROOT) - - print(script) - print((type(script))) - resSbatch = SL.sbatch(script=script, arguments=arguments) - - self.B.logger("{}".format(resSbatch)) - - - def compose_bash_script(self, configuration=None, configuration_parser=lambda x: yaml.safe_load(x)): - """ - composes the bash script which is executed by the submitter (sun grid engine). - as argument it takes a configuration file, e.g., yaml, xml, json, or whatsoever, and a parser function. - - it returns a str object containing the code. - - :rtype : str - """ - - - #assert isinstance(configuration, str) - - try: - config = configuration_parser(configuration) - except: - raise ValueError("error: parsing configuration content failed.") - - - _cmd_template = """#!/bin/bash -# Maria d'Errico -# Christian Panse -# 2020-09-28 -# 2020-09-29 -# https://GitHub.com/fgcz/bfabricPy/ -# Slurm -#SBATCH --partition={0} -#SBATCH --nodelist={11} -#SBATCH -n 1 -#SBATCH -N 1 -#SBATCH --cpus-per-task=1 -#SBATCH --mem-per-cpu={12} -#SBATCH -e {1} -#SBATCH -o {2} -#SBATCH --job-name=WU{10} -#SBATCH --workdir=/home/bfabric -#SBATCH --export=ALL,HOME=/home/bfabric - -# Grid Engine Parameters -#$ -q {0}&{11} -#$ -e {1} -#$ -o {2} - - -set -e -set -o pipefail - -export EMAIL="{job_notification_emails}" -export EXTERNALJOB_ID={3} -export RESSOURCEID_OUTPUT={4} -export RESSOURCEID_STDOUT_STDERR="{5} {6}" -export OUTPUT="{7}" -export WORKUNIT_ID="{10}" -STAMP=`/bin/date +%Y%m%d%H%M`.$$.$JOB_ID -TEMPDIR="/home/bfabric/prx" - -_OUTPUT=`echo $OUTPUT | cut -d"," -f1` -test $? -eq 0 && _OUTPUTHOST=`echo $_OUTPUT | cut -d":" -f1` -test $? -eq 0 && _OUTPUTPATH=`echo $_OUTPUT | cut -d":" -f2` -test $? -eq 0 && _OUTPUTPATH=`dirname $_OUTPUTPATH` -test $? -eq 0 && ssh $_OUTPUTHOST "mkdir -p $_OUTPUTPATH" -test $? -eq 0 && echo $$ > $TEMPDIR/$$ -test $? -eq 0 && scp $TEMPDIR/$$ $OUTPUT - -if [ $? -eq 1 ]; -then - echo "writting to output url failed!"; - exit 1; -fi - -# job configuration set by B-Fabrics wrapper_creator executable -# application parameter/configuration -cat > $TEMPDIR/config_WU$WORKUNIT_ID.yaml < $TEMPDIR/$JOB_ID.bash - - (who am i; hostname; uptime; echo $0; pwd; ps;) \ - | mutt -s "JOB_ID=$JOB_ID WORKUNIT_ID=$WORKUNIT_ID EXTERNALJOB_ID=$EXTERNALJOB_ID" $EMAIL \ - -a $TEMPDIR/$JOB_ID.bash $TEMPDIR/config_WU$WORKUNIT_ID.yaml -fi -# exit 0 - -# run the application -test -f $TEMPDIR/config_WU$WORKUNIT_ID.yaml && {9} $TEMPDIR/config_WU$WORKUNIT_ID.yaml - - -if [ $? -eq 0 ]; -then - ssh fgcz-r-035.uzh.ch "bfabric_setResourceStatus_available.py $RESSOURCEID_OUTPUT" \ - | mutt -s "JOB_ID=$JOB_ID WORKUNIT_ID=$WORKUNIT_ID EXTERNALJOB_ID=$EXTERNALJOB_ID DONE" $EMAIL - - bfabric_save_workflowstep.py $WORKUNIT_ID - bfabric_setExternalJobStatus_done.py $EXTERNALJOB_ID - bfabric_setWorkunitStatus_available.py $WORKUNIT_ID - echo $? -else - echo "application failed" - mutt -s "JOB_ID=$JOB_ID WORKUNIT_ID=$WORKUNIT_ID EXTERNALJOB_ID=$EXTERNALJOB_ID failed" $EMAIL < /dev/null - bfabric_setResourceStatus_available.py $RESSOURCEID_STDOUT_STDERR $RESSOURCEID; - exit 1; -fi - -# should be available also as zero byte files -bfabric_setResourceStatus_available.py $RESSOURCEID_STDOUT_STDERR - - -exit 0 -""".format(self.partition, - config['job_configuration']['stderr']['url'], - config['job_configuration']['stdout']['url'], - config['job_configuration']['external_job_id'], - config['job_configuration']['output']['resource_id'], - config['job_configuration']['stderr']['resource_id'], - config['job_configuration']['stdout']['resource_id'], - ",".join(config['application']['output']), - configuration, - config['job_configuration']['executable'], - config['job_configuration']['workunit_id'], - self.nodelist, - self.memory, - job_notification_emails=self.B.config.job_notification_emails) - - return _cmd_template - - - def submitter_yaml(self): - """ - implements the default submitter - - the function fetches the yaml base64 configuration file linked to the external job id out of the B-Fabric - system. Since the file can not be stagged to the LRMS as argument, we copy the yaml file into the bash script - and stage it on execution the application. - - TODO(cp): create the output url before the application is started. + def get_version_message(self) -> str: + """Returns the version message as a string.""" + package_version = importlib.metadata.version("bfabric") + year = datetime.now().year + engine_name = self.engine.__class__.__name__ + base_url = self.config.base_url + user_name = f"U={self._auth.login if self._auth else None}" + return ( + f"--- bfabricPy v{package_version} ({engine_name}, {base_url}, {user_name}) ---\n" + f"--- Copyright (C) 2014-{year} Functional Genomics Center Zurich ---" + ) - return None + def print_version_message(self, stderr: bool = True) -> None: + """Prints the version message to the console. + :param stderr: Whether to print to stderr (True, default) or stdout (False) """ - - # foreach (executable in external job): - for executable in self.B.get_executable_of_externaljobid(): - self.B.logger("executable = {0}".format(executable)) - - try: - content = base64.b64decode(executable.base64.encode()).decode() - except: - raise ValueError("error: decoding executable.base64 failed.") - - - print(content) - _cmd_template = self.compose_bash_script(configuration=content, - configuration_parser=lambda x: yaml.safe_load(x)) - - _bash_script_filename = "/home/bfabric/prx/workunitid-{0}_externaljobid-{1}_executableid-{2}.bash"\ - .format(self.B.get_workunitid_of_externaljob(), self.B.externaljobid, executable._id) - - with open(_bash_script_filename, 'w') as f: - f.write(_cmd_template) - - if self.scheduler=="GridEngine" : - self.submit_gridengine(_bash_script_filename) - else: - self.submit_slurm(_bash_script_filename) - self.execfilelist.append(_bash_script_filename) - - - res = self.B.save_object(endpoint='externaljob', - obj={'id': self.B.externaljobid, 'status': 'done'}) - def get_job_script(self): - return self.execfilelist - - -class BfabricWrapperCreator(BfabricExternalJob): + console = Console(stderr=stderr, highlighter=HostnameHighlighter(), theme=DEFAULT_THEME) + console.print(self.get_version_message(), style="bright_yellow") + + +def get_system_auth( + login: str = None, + password: str = None, + base_url: str = None, + config_path: str = None, + config_env: str = None, + optional_auth: bool = True, + verbose: bool = False, +) -> tuple[BfabricConfig, BfabricAuth]: """ - the class is used for the wrapper_creator which is executed by the bfabtic system - (non batch) so each resource is processed seperate + :param login: Login string for overriding config file + :param password: Password for overriding config file + :param base_url: Base server url for overriding config file + :param config_path: Path to the config file, in case it is different from default + :param config_env: Which config environment to use. Can also specify via environment variable or use + default in the config file (at your own risk) + :param optional_auth: Whether authentication is optional. If yes, missing authentication will be ignored, + otherwise an exception will be raised + :param verbose: Verbosity (TODO: resolve potential redundancy with logger) """ - (externaljobid_submitter, workunit_executableid) = (None, None) - - def get_externaljobid_yaml_workunit(self): - return self.externaljobid_yaml_workunit - - def uploadGridEngineScript(self, para={'INPUTHOST': 'fgcz-r-035.uzh.ch'}): - """ - the methode creates and uploads an executebale. - """ - - self.warning( - "This python method is superfluously and will be removed. Please use the write_yaml method of the BfabricWrapperCreato class.") - - _cmd_template = """#!/bin/bash -# $HeadURL: http://fgcz-svn.uzh.ch/repos/scripts/trunk/linux/bfabric/apps/python/bfabric/bfabric.py $ -# $Id: bfabric.py 3000 2017-08-18 14:18:30Z cpanse $ -# Christian Panse -#$ -q PRX@fgcz-r-028 -#$ -e {1} -#$ -o {2} - -set -e -set -o pipefail - - -# debug -hostname -uptime -echo $0 -pwd - -# variables to be set by the wrapper_creator executable -{0} - - -# create output directory -ssh $SSHARGS $OUTPUTHOST "mkdir -p $OUTPUTPATH" || exit 1 - -# staging input and output data and proc -ssh $SSHARGS $INPUTHOST "cat $INPUTPATH/$INPUTFILE" \\ -| $APPLICATION --inputfile $INPUTFILE --ssh "$OUTPUTHOST:$OUTPUTPATH/$OUTPUTFILE" \\ -&& bfabric_setResourceStatus_available.py $RESSOURCEID \\ -&& bfabric_setExternalJobStatus_done.py $EXTERNALJOBID \\ -|| exit 1 - -exit 0 -""".format("\n".join(sorted(['%s="%s"' % (key, info) for key, info in para.iteritems()])), para['STDERR'], - para['STDOUT']) - - resExecutable = self.save_object('executable', {'name': os.path.basename(para['APPLICATION']) + "_executable", - 'context': 'WORKUNIT', - 'parameter': None, - 'description': "This script should run as 'bfabric' user in the FGCZ compute infrastructure.", - 'workunitid': para['WORKUNITID'], - 'base64': base64.b64encode(_cmd_template), - 'version': 0.2}) - - return (resExecutable) - - def get_executableid(self): - return (self.workunit_executableid) - - def write_yaml(self, data_serializer=lambda x: yaml.dump(x, default_flow_style=False, encoding=None)): - """ - This method writes all related parameters into a yaml file which is than upload as base64 encoded - file into the b-fabric system. - - if the method does not excepted at the end it reports also the status of the external_job. - - TODO(cp): make this function more generic so that it can also export xml, json, yaml, ... - """ - - # Inherits all parameters of the application executable out of B-Fabric to create an executable script - workunitid = self.get_workunitid_of_externaljob() - - if workunitid is None: - raise ValueError("no workunit available for the given externaljobid.") - - workunit = self.read_object(endpoint='workunit', obj={'id': workunitid})[0] - if workunit is None: - raise ValueError("ERROR: no workunit available for the given externaljobid.") - - assert isinstance(workunit._id, int) - - application = self.read_object('application', obj={'id': workunit.application._id})[0] - # TODO(cp): rename to application_execuatbel - workunit_executable = self.read_object('executable', obj={'id': workunit.applicationexecutable._id})[0] - try: - self.workunit_executableid = workunit_executable._id - except: - self.workunit_executableid = None - - # Get container details - container = workunit.container - fastasequence = "" - if container._classname=="order": - order = self.read_object('order', obj={'id': container._id})[0] - order_id = order._id - if "project" in order: - project_id = order.project._id - else: - project_id = None - if "fastasequence" in order: - fastasequence = "\n".join([x.strip() for x in str(order.fastasequence).split("\r")]) + have_config_path = config_path is not None + if not have_config_path: + # Get default path config file path + config_path = Path("~/.bfabricpy.yml").expanduser() + else: + config_path = Path(config_path).expanduser() + + + # Use the provided config data from arguments instead of the file + if not config_path.is_file(): + if have_config_path: + # NOTE: If user explicitly specifies a path to a wrong config file, this has to be an exception + raise OSError(f"Explicitly specified config file does not exist: {config_path}") + # TODO: Convert to log + print(f"Warning: could not find the config file in the default location: {config_path}") + config = BfabricConfig(base_url=base_url) + auth = None if login is None and password is None else BfabricAuth(login=login, password=password) + + # Load config from file, override some of the fields with the provided ones + else: + config, auth = read_config(config_path, config_env=config_env) + config = config.copy_with(base_url=base_url) + if (login is not None) and (password is not None): + auth = BfabricAuth(login=login, password=password) + elif (login is None) and (password is None): + auth = auth else: - order_id = None - project_id = container._id - - today = datetime.date.today() - - # merge all information into the executable script - _output_storage = self.read_object('storage', obj={'id': application.storage._id})[0] - - _output_relative_path = "p{0}/bfabric/{1}/{2}/{3}/workunit_{4}/".format( - container._id, - application.technology.replace(' ', '_'), - application.name.replace(' ', '_'), - today.strftime('%Y/%Y-%m/%Y-%m-%d/'), - workunitid) - - # Setup the log_storage to SlurmLog with id 13 - _log_storage = self.read_object('storage', obj={'id': 13})[0] - - #_cmd_applicationList = [workunit_executable.program] - - application_parameter = {} - - if not getattr(workunit, "parameter", None) is None: - for para in workunit.parameter: - parameter = self.read_object('parameter', obj={'id': para._id}) - if parameter: - for p in parameter: - try: - application_parameter["{}".format(p.key)] = "{}".format(p.value) - except: - application_parameter["{}".format(p.key)] = "" - - try: - input_resources = [x._id for x in workunit.inputresource] - input_resources = [self.read_object(endpoint='resource', obj={'id': x})[0] for x in input_resources] - except: - print("no input resources found. continue with empty list.") - input_resources = [] - - - # query all urls and ids of the input resources - resource_urls = dict() - resource_ids = dict() - - for resource_iterator in input_resources: - try: - _appication_id = self.read_object(endpoint='workunit', - obj={'id': resource_iterator.workunit._id})[0].application._id - - _application_name = "{0}".format(self.read_object('application', obj={'id': _appication_id})[0].name) - - _storage = self.read_object('storage', {'id': resource_iterator.storage._id})[0] - - _inputUrl = "bfabric@{0}:/{1}/{2}".format(_storage.host, _storage.basepath, resource_iterator.relativepath) - - if not _application_name in resource_urls: - resource_urls[_application_name] = [] - resource_ids[_application_name] = [] - - resource_urls[_application_name].append(_inputUrl) - - sample_id = self.get_sampleid(int(resource_iterator._id)) - - _resource_sample = {'resource_id': int(resource_iterator._id), - 'resource_url': "{0}/userlab/show-resource.html?id={1}".format(self.config.base_url,resource_iterator._id)} - - - if not sample_id is None: - _resource_sample['sample_id'] = int(sample_id) - _resource_sample['sample_url'] = "{0}/userlab/show-sample.html?id={1}".format(self.config.base_url, sample_id) - - resource_ids[_application_name].append(_resource_sample) - except: - print ("resource_iterator failed. continue ...") - pass - - - # create resources for output, stderr, stdout - _ressource_output = self.save_object('resource', { - 'name': "{0} {1} - resource".format(application.name, len(input_resources)), - 'workunitid': workunit._id, - 'storageid': int(application.storage._id), - 'relativepath': _output_relative_path})[0] - - - print(_ressource_output) - _output_filename = "{0}.{1}".format(_ressource_output._id, application.outputfileformat) - # we want to include the resource._id into the filename - _ressource_output = self.save_object('resource', - {'id': int(_ressource_output._id), - 'relativepath': "{0}/{1}".format(_output_relative_path, _output_filename)})[0] - - print (_ressource_output) - _resource_stderr = self.save_object('resource', { - 'name': 'slurm_stderr', - 'workunitid': int(workunit._id), - 'storageid': _log_storage._id, - 'relativepath': "/workunitid-{0}_resourceid-{1}.err".format(workunit._id, _ressource_output._id)})[0] - - _resource_stdout = self.save_object('resource', { - 'name': 'slurm_stdout', - 'workunitid': workunit._id, - 'storageid': _log_storage._id, - 'relativepath': "/workunitid-{0}_resourceid-{1}.out".format(workunit._id, _ressource_output._id)})[0] - - - # Creates the workunit executable - # The config includes the externaljobid: the yaml_workunit_externaljob has to be created before it. - # The yaml_workunit_externaljob cannot be created without specifying an executableid: - # a yaml_workunit_executable is thus created before the config definition in order to provide - # the correct executableid to the yaml_workunit_externaljob. - # However this yaml_workunit_executable has to be updated later to include 'base64': base64.b64encode(config_serialized.encode()).decode() - yaml_workunit_executable = self.save_object('executable', {'name': 'job configuration (executable) in YAML', - 'context': 'WORKUNIT', - 'workunitid': workunit._id, - 'description': "This is a job configuration as YAML base64 encoded. It is configured to be executed by the B-Fabric yaml submitter."})[0] - print(yaml_workunit_executable) - - yaml_workunit_externaljob = self.save_object('externaljob', - {"workunitid": workunit._id, - 'status': 'new', - 'executableid' : yaml_workunit_executable._id, - 'action': "WORKUNIT"})[0] - print(yaml_workunit_externaljob) - assert isinstance(yaml_workunit_externaljob._id, int) - self.externaljobid_yaml_workunit = int(yaml_workunit_externaljob._id) - print(("XXXXXXX self.externaljobid_yaml_workunit ={} XXXXXXX".format(self.externaljobid_yaml_workunit))) - - _output_url = "bfabric@{0}:{1}{2}/{3}".format(_output_storage.host, - _output_storage.basepath, - _output_relative_path, - _output_filename) - - try: - query_obj = {'id': workunit.inputdataset._id} - inputdataset = self.read_object(endpoint='dataset', obj=query_obj)[0] - inputdataset_json = json.dumps(inputdataset, cls=bfabricEncoder, sort_keys=True, indent=2) - inputdataset = json.loads(inputdataset_json) - except: - inputdataset = None - - # Compose configuration structure - config = { - 'job_configuration': { - 'executable': "{}".format(workunit_executable.program), - 'inputdataset': inputdataset, - 'input': resource_ids, - 'output': { - 'protocol': 'scp', - 'resource_id': int(_ressource_output._id), - 'ssh_args': "-o StrictHostKeyChecking=no -2 -l bfabric -x" - }, - 'stderr': { - 'protocol': 'file', - 'resource_id': int(_resource_stderr._id) , - 'url': "{0}/workunitid-{1}_resourceid-{2}.err".format(_log_storage.basepath, workunit._id, _ressource_output._id) - }, - 'stdout': { - 'protocol': 'file', - 'resource_id': int(_resource_stdout._id), - 'url': "{0}/workunitid-{1}_resourceid-{2}.out".format(_log_storage.basepath, workunit._id, _ressource_output._id) - }, - 'workunit_id': int(workunit._id), - 'workunit_createdby': str(workunit.createdby), - 'workunit_url': "{0}/userlab/show-workunit.html?workunitId={1}".format(self.config.base_url, workunit._id), - 'external_job_id': int(yaml_workunit_externaljob._id), - 'order_id': order_id, - 'project_id': project_id, - 'fastasequence': fastasequence - }, - 'application' : { - 'protocol': 'scp', - 'parameters': application_parameter, - 'input': resource_urls, - 'output': [_output_url] - } - } - - config_serialized = data_serializer(config) - print(config_serialized) - - yaml_workunit_executable = self.save_object('executable', {'id': yaml_workunit_executable._id, - 'base64': base64.b64encode(config_serialized.encode()).decode(), - 'version': "{}".format(10)})[0] - print(yaml_workunit_executable) - - # The WrapperCreator executable is successful, and the status of the its external job is set to done, - # which triggers B-Fabric to create an external job for the submitter executable. - - wrapper_creator_externaljob = self.save_object(endpoint='externaljob', - obj={'id': self.externaljobid, 'status': 'done'}) - - print(("\n\nquery_counter={0}".format(self.query_counter))) - + raise OSError("Must provide both username and password, or neither.") + if not config.base_url: + raise ValueError("base_url missing") + if not optional_auth and (not auth or not auth.login or not auth.password): + raise ValueError("Authentication not initialized but required") + if verbose: + pprint(config) -if __name__ == "__main__": - bfapp = Bfabric(verbose=True) + return config, auth diff --git a/bfabric/bfabric2.py b/bfabric/bfabric2.py new file mode 100755 index 00000000..869842f7 --- /dev/null +++ b/bfabric/bfabric2.py @@ -0,0 +1,6 @@ +#!/usr/bin/env python3 +import warnings + +warnings.warn("bfabric.bfabric2 module is deprecated, use bfabric instead", DeprecationWarning) +# TODO deprecated - import from bfabric instead +from bfabric.bfabric import Bfabric, BfabricAPIEngineType, get_system_auth diff --git a/bfabric/bfabric_config.py b/bfabric/bfabric_config.py index a8e2cc2e..d3970dba 100644 --- a/bfabric/bfabric_config.py +++ b/bfabric/bfabric_config.py @@ -1,11 +1,13 @@ from __future__ import annotations -import io -import json +import dataclasses import logging import os -from typing import Optional, Dict, Tuple -import dataclasses +from pathlib import Path + +import yaml + +from bfabric.errors import BfabricConfigError @dataclasses.dataclass(frozen=True) @@ -15,85 +17,181 @@ class BfabricAuth: login: str password: str - def __repr__(self): + def __repr__(self) -> str: return f"BfabricAuth(login={repr(self.login)}, password=...)" - def __str__(self): + def __str__(self) -> str: return repr(self) -@dataclasses.dataclass(frozen=True) class BfabricConfig: """Holds the configuration for the B-Fabric client for connecting to particular instance of B-Fabric. - Attributes: + Parameters: base_url (optional): The API base url application_ids (optional): Map of application names to ids. job_notification_emails (optional): Space-separated list of email addresses to notify when a job finishes. """ - base_url: str = "https://fgcz-bfabric.uzh.ch/bfabric" - application_ids: Dict[str, int] = dataclasses.field(default_factory=dict) - job_notification_emails: str = "" - - def with_overrides( + def __init__( + self, + base_url: str | None = None, + application_ids: dict[str, int] = None, + job_notification_emails: str | None = None, + ) -> None: + self._base_url = base_url or "https://fgcz-bfabric.uzh.ch/bfabric" + self._application_ids = application_ids or {} + self._job_notification_emails = job_notification_emails or "" + + @property + def base_url(self) -> str: + """The API base url.""" + return self._base_url + + @property + def application_ids(self) -> dict[str, int]: + """Map of known application names to ids.""" + return self._application_ids + + @property + def job_notification_emails(self) -> str: + """Space-separated list of email addresses to notify when a job finishes.""" + return self._job_notification_emails + + def copy_with( self, - base_url: Optional[str] = None, - application_ids: Optional[Dict[str, int]] = None, + base_url: str | None = None, + application_ids: dict[str, int] | None = None, ) -> BfabricConfig: """Returns a copy of the configuration with new values applied, if they are not None.""" return BfabricConfig( base_url=base_url if base_url is not None else self.base_url, - application_ids=application_ids - if application_ids is not None - else self.application_ids, + application_ids=(application_ids if application_ids is not None else self.application_ids), + job_notification_emails=self.job_notification_emails, ) + def __repr__(self) -> str: + return ( + f"BfabricConfig(base_url={repr(self.base_url)}, application_ids={repr(self.application_ids)}, " + f"job_notification_emails={repr(self.job_notification_emails)})" + ) -def parse_bfabricrc_py(file: io.FileIO) -> Tuple[BfabricConfig, Optional[BfabricAuth]]: - """Parses a .bfabricrc.py file and returns a tuple of BfabricConfig and BfabricAuth objects.""" - values = {} - file_path = os.path.realpath(file.name) + +def _read_config_env_as_dict(config_path: Path, config_env: str | None = None) -> tuple[str, dict]: + """ + Reads and partially parses a bfabricpy.yml file + :param config_path: Path to the configuration file. It is assumed that it exists + :param config_env: Specific environment to parse. If not provided, it is deduced from an environment variable + or the config file itself. + :return: Returns a target environment name, and the corresponding data from bfabricpy.yml file as a dictionary + """ logger = logging.getLogger(__name__) - logger.info(f"Reading configuration from: {file_path}") - - for line in file: - if line.startswith("#"): - continue - - key, _, value = [part.strip() for part in line.partition("=")] - if key not in [ - "_PASSWD", - "_LOGIN", - "_WEBBASE", - "_APPLICATION", - "_JOB_NOTIFICATION_EMAILS", - ]: - continue - - # In case of multiple definitions, the first rule counts! - if key not in values: - if key in ["_APPLICATION"]: - try: - values[key] = json.loads(value) - except json.JSONDecodeError as e: - raise ValueError( - f"While reading {file_path}. '{key}' is not a valid JSON string." - ) from e - else: - # to make it downward compatible; so we replace quotes in login and password - values[key] = value.replace('"', "").replace("'", "") - else: - logger.warning(f"While reading {file_path}. '{key}' is already set.") + logger.info(f"Reading configuration from: {config_path}") + + if config_path.suffix != ".yml": + raise OSError(f"Expected config file with .yml extension, got {config_path}") + + # Read the config file + config_dict = yaml.safe_load(config_path.read_text()) + + if "default_config" not in config_dict.get("GENERAL", {}): + raise BfabricConfigError("Config file must provide a `default_config` in the `GENERAL` section") + config_env_default = config_dict["GENERAL"]["default_config"] - args = dict( - base_url=values.get("_WEBBASE"), - application_ids=values.get("_APPLICATION"), - job_notification_emails=values.get("_JOB_NOTIFICATION_EMAILS"), + # Determine which environment we will use + # By default, use the one provided by config_env + config_env = _select_config_env( + explicit_config_env=config_env, config_file_default_config=config_env_default, logger=logger ) - config = BfabricConfig(**{k: v for k, v in args.items() if v is not None}) - if "_LOGIN" in values and "_PASSWD" in values: - auth = BfabricAuth(login=values["_LOGIN"], password=values["_PASSWD"]) + if config_env not in config_dict: + raise BfabricConfigError(f"The requested config environment {config_env} is not present in the config file") + + return config_env, config_dict[config_env] + + +def _select_config_env(explicit_config_env: str | None, config_file_default_config: str, logger: logging.Logger) -> str: + """Selects the appropriate configuration environment to use, based on the provided arguments. + :param explicit_config_env: Explicitly provided configuration environment to use (i.e. from a function argument) + :param config_file_default_config: Default configuration environment to use, as specified in the config file + :param logger: Logger to use for output + """ + if explicit_config_env is None: + config_env = os.getenv("BFABRICPY_CONFIG_ENV") + if config_env is None: + logger.info(f"BFABRICPY_CONFIG_ENV not found, using default environment {config_file_default_config}") + config_env = config_file_default_config + else: + logger.info(f"found BFABRICPY_CONFIG_ENV = {config_env}") else: + config_env = explicit_config_env + logger.info(f"config environment specified explicitly as {config_env}") + return config_env + + +def _have_all_keys(dict_: dict, expected_keys: list) -> bool: + """Returns True if all elements in list l are present as keys in dict d, otherwise false""" + return all(k in dict_ for k in expected_keys) + + +def _parse_dict(d: dict, mandatory_keys: list, optional_keys: list = None, error_prefix: str = " ") -> dict: + """ + Returns a copy of an existing dictionary, only keeping mandatory and optional keys + If a mandatory key is not found, an exception is raised + :param d: Starting dictionary + :param mandatory_keys: A list of mandatory keys + :param optional_keys: A list of optional keys + :param error_prefix: A string to print if a mandatory key is not found + :return: Copy of a starting dictionary, only containing mandatory and optional keys + """ + missing_keys = set(mandatory_keys) - set(d) + if missing_keys: + raise BfabricConfigError(f"{error_prefix}{missing_keys}") + result_keys = set(mandatory_keys) | set(optional_keys or []) + d_rez = {k: d[k] for k in result_keys if k in d} + + # Ignore all other fields + return d_rez + + +def read_config( + config_path: str | Path, + config_env: str = None, +) -> tuple[BfabricConfig, BfabricAuth | None]: + """ + Reads bfabricpy.yml file, parses it, extracting authentication and configuration data + :param config_path: Path to the configuration file. It is assumed the file exists + :param config_env: Configuration environment to use. If not given, it is deduced. + :return: Configuration and Authentication class instances + + NOTE: BFabricPy expects a .bfabricpy.yml of the format, as seen in bfabricPy/tests/unit/example_config.yml + * The general field always has to be present + * There may be any number of environments, with arbitrary names. Here, they are called PRODUCTION and TEST + * Must specify correct login, password and base_url for each environment. + * application and job_notification_emails fields are optional + * The default environment will be selected as follows: + - First, parser will check if the optional argument `config_env` is provided directly to the parser function + - If not, secondly, the parser will check if the environment variable `BFABRICPY_CONFIG_ENV` is declared + - If not, finally, the parser will select the default_config specified in [GENERAL] of the .bfabricpy.yml file + """ + + config_env_final, config_dict = _read_config_env_as_dict(Path(config_path), config_env=config_env) + + error_prefix = f"Config environment {config_env_final} does not have a compulsory field: " + + # Parse authentication + if not _have_all_keys(config_dict, ["login", "password"]): auth = None + else: + auth_dict = _parse_dict(config_dict, ["login", "password"], error_prefix=error_prefix) + auth = BfabricAuth(**auth_dict) + + # Parse config + config_dict = _parse_dict( + config_dict, + ["base_url"], + optional_keys=["application_ids", "job_notification_emails"], + error_prefix=error_prefix, + ) + config = BfabricConfig(**config_dict) + return config, auth diff --git a/bfabric/bfabric_legacy.py b/bfabric/bfabric_legacy.py new file mode 100644 index 00000000..aa864324 --- /dev/null +++ b/bfabric/bfabric_legacy.py @@ -0,0 +1,242 @@ +from __future__ import annotations +import base64 +import json +import os +import sys +from pprint import pprint +from typing import Any + +import yaml +from suds.client import Client +from suds.wsdl import Service + +from bfabric import BfabricConfig +from bfabric.bfabric_config import BfabricAuth, read_config + + +class BfabricLegacy: + """B-Fabric python3 module + Implements read and save object methods for B-Fabric wsdl interface + """ + + def warning(self, msg) -> None: + sys.stderr.write(f"\033[93m{msg}\033[0m\n") + + def __init__( + self, + login: str = None, + password: str = None, + base_url: str = None, + externaljobid=None, + config_path: str = None, + config_env: str = None, + optional_auth: bool = False, + verbose: bool = False, + ) -> None: + """ + :param login: Login string for overriding config file + :param password: Password for overriding config file + :param base_url: Base url of the BFabric server for overriding config file + :param externaljobid: ? + :param config_path: Path to the config file, in case it is different from default + :param config_env: Which config environment to use. Can also specify via environment variable or use + default in the config file (at your own risk) + :param optional_auth: Whether authentification is optional. If yes, missing authentification will be ignored, + otherwise an exception will be raised + :param verbose: Verbosity (TODO: resolve potential redundancy with logger) + """ + self.verbose = verbose + + self.cl = {} + self.verbose = False + self.query_counter = 0 + + # Get default path config file path + config_path = config_path or os.path.normpath(os.path.expanduser("~/.bfabricpy.yml")) + + # TODO: Convert to an exception when this branch becomes main + config_path or os.path.normpath(os.path.expanduser("~/.bfabricrc.py")) + if os.path.isfile(config_path): + self.warning( + "WARNING! The old .bfabricrc.py was found in the home directory. Delete and make sure to use the new .bfabricpy.yml" + ) + + # Use the provided config data from arguments instead of the file + if not os.path.isfile(config_path): + self.warning("could not find '.bfabricpy.yml' file in home directory.") + self.config = BfabricConfig(base_url=base_url) + self.auth = BfabricAuth(login=login, password=password) + + # Load config from file, override some of the fields with the provided ones + else: + config, auth = read_config(config_path, config_env=config_env, optional_auth=optional_auth) + self.config = config.with_overrides(base_url=base_url) + if (login is not None) and (password is not None): + self.auth = BfabricAuth(login=login, password=password) + elif (login is None) and (password is None): + self.auth = auth + else: + raise OSError("Must provide both username and password, or neither.") + + if not self.config.base_url: + raise ValueError("base server url missing") + if not optional_auth: + if not self.auth or not self.auth.login or not self.auth.password: + raise ValueError("Authentification not initialized but required") + + msg = f"\033[93m--- base_url {self.config.base_url}; login; {self.auth.login} ---\033[0m\n" + sys.stderr.write(msg) + + if self.verbose: + pprint(self.config) + + def read_object(self, endpoint, obj, page=1, plain=False, idonly=False): + """ + A generic method which can connect to any endpoint, e.g., workunit, project, order, + externaljob, etc, and returns the object with the requested id. + obj is a python dictionary which contains all the attributes of the endpoint + for the "query". + """ + return self._perform_request( + endpoint=endpoint, method="read", plain=plain, params=dict(query=obj, idonly=idonly, page=page) + ) + + def readid_object(self, endpoint, obj, page=1, plain=False): + """ + A generic method which can connect to any endpoint, e.g., workunit, project, order, + externaljob, etc, and returns the object with the requested id. + obj is a python dictionary which contains only the id of the endpoint for the "query". + """ + return self._perform_request(endpoint=endpoint, method="readid", plain=plain, params=dict(query=obj, page=page)) + + def save_object(self, endpoint, obj, debug=None): + """ + same as read_object above but uses the save method. + """ + return self._perform_request(endpoint=endpoint, method="save", plain=debug is not None, params={endpoint: obj}) + + def checkandinsert_object(self, endpoint, obj, debug=None): + """ + wsdl method to check iff dependencies are fulfilled + """ + # TODO This method was changed a while ago to use the "save"endpoint, which makes it functionally identical + # to the save_object method. Check if this was intended. + return self._perform_request(endpoint=endpoint, method="save", plain=debug is not None, params={endpoint: obj}) + + def delete_object(self, endpoint, id=None, debug=None): + """ + same as read_object above but uses the delete method. + """ + return self._perform_request(endpoint=endpoint, method="delete", plain=debug is not None, params=dict(id=id)) + + def upload_file(self, filename, workunitid): + with open(filename, "rb") as f: + content = f.read() + + resource_base64 = base64.b64encode(content).decode() + + res = self.save_object( + "resource", + { + "base64": resource_base64, + "name": os.path.basename(filename), + "description": "base64 encoded file", + "workunitid": workunitid, + }, + ) + + return res + + def _get_service(self, endpoint: str) -> Service: + """Returns a `suds.client.Service` object for the given endpoint name.""" + if endpoint not in self.cl: + self.cl[endpoint] = Client(f"{self.config.base_url}/{endpoint}?wsdl", cache=None) + return self.cl[endpoint].service + + def _perform_request(self, endpoint: str, method: str, plain: bool, params: dict[str, Any]) -> Any: + """Performs a request to the given endpoint and returns the result.""" + self.query_counter += 1 + request_params = dict(login=self.auth.login, password=self.auth.password, **params) + service = self._get_service(endpoint=endpoint) + response = getattr(service, method)(request_params) + if plain: + return response + elif getattr(response, "entitiesonpage", None) == 0: + return [] + return getattr(response, endpoint) + + @staticmethod + def print_json(queryres=None) -> None: + """ + This method prints the query result as returned by ``read_object`` in JSON format. + + Parameter + --------- + + queryres : the object returned by ``read_object`` method. + """ + if queryres is None: + raise TypeError( + "print_json() missing 1 required positional argument: please provide the output from read_object as parameter to print_json" + ) + + res = json.dumps(queryres, cls=bfabricEncoder, sort_keys=True, indent=2) + print(res) + + @staticmethod + def print_yaml(queryres=None) -> None: + """ + This method prints the query result as returned by ``read_object`` in YAML format. + + Parameter + --------- + + queryres : the object returned by ``read_object`` method. + """ + if queryres is None: + raise TypeError( + "print_yaml() missing 1 required positional argument: please provide the output from read_object as parameter to print_yaml" + ) + + res_json = json.dumps(queryres, cls=bfabricEncoder, sort_keys=True) + res = yaml.dump(res_json, default_flow_style=False, encoding=None, default_style=None) + print(res) + + def get_sampleid(self, resourceid=None): + """ + determines the sample_id of a given resource_id. + it performs a recursive dfs. + TODO(cp): check if the method should be implemented using a stack + + :param resourceid: + :return: (int, int) + """ + + assert isinstance(resourceid, int) + + try: + resource = self.read_object("resource", obj={"id": resourceid})[0] + except: + return None + + try: + workunit = self.read_object(endpoint="workunit", obj={"id": resource.workunit._id})[0] + return self.get_sampleid(resourceid=int(workunit.inputresource[0]._id)) + except: + self.warning(f"fetching sampleid of resource.workunitid = {resource.workunit._id} failed.") + return None + + +class bfabricEncoder(json.JSONEncoder): + """ + Implements json encoder for the Bfabric.print_json method + """ + + def default(self, o): + try: + return dict(o) + except TypeError: + pass + else: + return list(o) + return JSONEncoder.default(self, o) diff --git a/bfabric/cli_formatting.py b/bfabric/cli_formatting.py new file mode 100644 index 00000000..b8acc7ff --- /dev/null +++ b/bfabric/cli_formatting.py @@ -0,0 +1,12 @@ +from rich.highlighter import RegexHighlighter +from rich.theme import Theme + + +class HostnameHighlighter(RegexHighlighter): + """Highlights hostnames in URLs.""" + + base_style = "bfabric." + highlights = [r"https://(?P[^.]+)"] + + +DEFAULT_THEME = Theme({"bfabric.hostname": "bold red"}) diff --git a/bfabric/deprecated_scripts/__init__.py b/bfabric/deprecated_scripts/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/bfabric/scripts/bfabric_create_bfabricrc.py b/bfabric/deprecated_scripts/bfabric_create_bfabricrc.py similarity index 100% rename from bfabric/scripts/bfabric_create_bfabricrc.py rename to bfabric/deprecated_scripts/bfabric_create_bfabricrc.py diff --git a/bfabric/scripts/bfabric_csv2dataset.py b/bfabric/deprecated_scripts/bfabric_csv2dataset.py similarity index 100% rename from bfabric/scripts/bfabric_csv2dataset.py rename to bfabric/deprecated_scripts/bfabric_csv2dataset.py diff --git a/bfabric/scripts/bfabric_demo_register_resource.py b/bfabric/deprecated_scripts/bfabric_demo_register_resource.py similarity index 92% rename from bfabric/scripts/bfabric_demo_register_resource.py rename to bfabric/deprecated_scripts/bfabric_demo_register_resource.py index 4fa290fd..dc352156 100755 --- a/bfabric/scripts/bfabric_demo_register_resource.py +++ b/bfabric/deprecated_scripts/bfabric_demo_register_resource.py @@ -6,17 +6,13 @@ # $Date: 2017-06-12 12:55:55 +0200 (Mon, 12 Jun 2017) $ - -import os -import re -import time -import sys import bfabric +import bfabric.wrapper_creator.bfabric_feeder def main(): BFABRICSTORAGEID = 2 - bfapp = bfabric.BfabricFeeder() + bfapp = bfabric.wrapper_creator.bfabric_feeder.BfabricFeeder() # create workunit wuobj = { 'applicationid': 155, diff --git a/bfabric/scripts/bfabric_feeder_importresource.py b/bfabric/deprecated_scripts/bfabric_feeder_importresource.py similarity index 100% rename from bfabric/scripts/bfabric_feeder_importresource.py rename to bfabric/deprecated_scripts/bfabric_feeder_importresource.py diff --git a/bfabric/scripts/bfabric_list_executables.py b/bfabric/deprecated_scripts/bfabric_list_executables.py similarity index 73% rename from bfabric/scripts/bfabric_list_executables.py rename to bfabric/deprecated_scripts/bfabric_list_executables.py index 25ce1a1c..37cdcf7d 100755 --- a/bfabric/scripts/bfabric_list_executables.py +++ b/bfabric/deprecated_scripts/bfabric_list_executables.py @@ -24,6 +24,8 @@ if __name__ == "__main__": bfapp = Bfabric() - res = bfapp.read_object(endpoint='executable', obj={}) - map(lambda x: sys.stdout.write("{}\t{}\t{}\t{}\t{}\n" - .format(x._id, x.createdby, x.modified, x.context, x.name)), res) + res = bfapp.read_object(endpoint="executable", obj={}) + map( + lambda x: sys.stdout.write("{}\t{}\t{}\t{}\t{}\n".format(x._id, x.createdby, x.modified, x.context, x.name)), + res, + ) diff --git a/bfabric/scripts/bfabric_list_proteomics_projects.py b/bfabric/deprecated_scripts/bfabric_list_proteomics_projects.py similarity index 100% rename from bfabric/scripts/bfabric_list_proteomics_projects.py rename to bfabric/deprecated_scripts/bfabric_list_proteomics_projects.py diff --git a/bfabric/scripts/bfabric_read_dataset.py b/bfabric/deprecated_scripts/bfabric_read_dataset.py similarity index 89% rename from bfabric/scripts/bfabric_read_dataset.py rename to bfabric/deprecated_scripts/bfabric_read_dataset.py index 1c9631ee..20767918 100755 --- a/bfabric/scripts/bfabric_read_dataset.py +++ b/bfabric/deprecated_scripts/bfabric_read_dataset.py @@ -18,15 +18,18 @@ def signal_handler(signal, frame): - print('You pressed Ctrl+C!') + print("You pressed Ctrl+C!") sys.exit(0) + signal.signal(signal.SIGINT, signal_handler) + def print_color_msg(msg, color="93"): msg = "\033[{color}m--- {} ---\033[0m\n".format(msg, color=color) sys.stderr.write(msg) + def usage(): print("usage:\n") msg = "\t{} ".format(sys.argv[0]) @@ -34,14 +37,15 @@ def usage(): def dataset2csv(ds, sep="\t"): - print (type(ds.attribute)) + print(type(ds.attribute)) # print header - print (sep.join(map(lambda x: x.name, ds.attribute))) + print(sep.join(map(lambda x: x.name, ds.attribute))) # print values for i in ds.item: print(sep.join(map(lambda x: x.value, i.field))) + if __name__ == "__main__": bfapp = bfabric.Bfabric(verbose=False) @@ -50,12 +54,11 @@ def dataset2csv(ds, sep="\t"): query_obj = {} endpoint = "dataset" - if len(sys.argv) == 2: datasetid = sys.argv[1] start_time = time.time() - query_obj = {'id': '32003'} + query_obj = {"id": "32003"} print_color_msg("query = {}".format(query_obj)) res = bfapp.read_object(endpoint=endpoint, obj=query_obj) diff --git a/bfabric/scripts/bfabric_read_sample_of_order.py b/bfabric/deprecated_scripts/bfabric_read_sample_of_order.py similarity index 100% rename from bfabric/scripts/bfabric_read_sample_of_order.py rename to bfabric/deprecated_scripts/bfabric_read_sample_of_order.py index 3744fc1b..dd3fe5ff 100644 --- a/bfabric/scripts/bfabric_read_sample_of_order.py +++ b/bfabric/deprecated_scripts/bfabric_read_sample_of_order.py @@ -9,6 +9,7 @@ class bfabricEncoder(json.JSONEncoder): """ Implements json encoder for the Bfabric.print_json method """ + def default(self, o): try: return dict(o) @@ -23,4 +24,3 @@ def default(self, o): B = bfabric.Bfabric() - diff --git a/bfabric/scripts/bfabric_sample_graph_traversal.py b/bfabric/deprecated_scripts/bfabric_sample_graph_traversal.py similarity index 67% rename from bfabric/scripts/bfabric_sample_graph_traversal.py rename to bfabric/deprecated_scripts/bfabric_sample_graph_traversal.py index 4d8b64ce..12de5240 100755 --- a/bfabric/scripts/bfabric_sample_graph_traversal.py +++ b/bfabric/deprecated_scripts/bfabric_sample_graph_traversal.py @@ -36,7 +36,7 @@ class SampleGraph: # annotation.txt # data structure for keeping annotation.txt infos (de-multiplexed data) containing the tagging - #annotation = {} + # annotation = {} links = {} @@ -44,35 +44,40 @@ def __init__(self, annotation_template): self.annotation_template = annotation_template self.annotation = {} - def read_dataset(self, dataset_id): - ds = self.B.read_object(endpoint="dataset", obj={'id': dataset_id})[0] + ds = self.B.read_object(endpoint="dataset", obj={"id": dataset_id})[0] return ds def get_sampleID(self, relativepath): - res = self.B.read_object(endpoint='resource', obj={'relativepath': relativepath})[0] + res = self.B.read_object(endpoint="resource", obj={"relativepath": relativepath})[0] print("\t{} -> {}".format(res.sample._id, res._id)) return res.sample._id - def traverse(self, childSampleId): """ fill up the internal data structure for producing the manifest and annotation.txt files for each exp. """ - res = self.B.read_object(endpoint='sample', obj={'id': childSampleId}) + res = self.B.read_object(endpoint="sample", obj={"id": childSampleId}) childSample = res[0] - if "multiplexid" in childSample: + if "multiplexid" in childSample: # in this special case we reached last level keeping the tag - print ('''\t{} [shape=box label="{}\\n{}"];'''.format(childSample._id, childSample._id, childSample.multiplexid)) + print( + """\t{} [shape=box label="{}\\n{}"];""".format( + childSample._id, childSample._id, childSample.multiplexid + ) + ) try: self.annotation[childSample.multiplexid] = childSample.parent[0]._id except: - print("multiplexid {} for sample {} not in the annotation file template".format(childSample.multiplexid, childSample._id)) - + print( + "multiplexid {} for sample {} not in the annotation file template".format( + childSample.multiplexid, childSample._id + ) + ) - if 'parent' in childSample: + if "parent" in childSample: self.links[childSampleId] = [x._id for x in childSample.parent] for parent in childSample.parent: print("\t{} -> {}".format(parent._id, childSampleId)) @@ -80,9 +85,9 @@ def traverse(self, childSampleId): self.VISITED.append(parent._id) self.L.append(parent._id) - #print("\t# DEBUG = {}".format(len(self.L))) + # print("\t# DEBUG = {}".format(len(self.L))) - while (len(self.L) > 0): + while len(self.L) > 0: u = self.L[0] self.L.remove(u) self.traverse(u) @@ -93,69 +98,71 @@ def run(self, dataset_id): for i in ds.item: for x in i.field: if hasattr(x, "value") and x.attributeposition == attributeposition: - print ("# relativepath = {}".format(x.value)) + print("# relativepath = {}".format(x.value)) sampleID = self.get_sampleID(x.value) - print ("# inputSampleId = {}".format(sampleID)) + print("# inputSampleId = {}".format(sampleID)) self.annotation = self.annotation_template self.traverse(sampleID) experiment = self.links[sampleID] - if len(experiment)==1: + if len(experiment) == 1: self.write_annotation(experiment[0]) self.write_manifest(x.value, experiment[0]) else: - print("# Wrong inputSampleId, please check the sample ID {}, it should be after fractionation".format(sampleID)) + print( + "# Wrong inputSampleId, please check the sample ID {}, it should be after fractionation".format( + sampleID + ) + ) def write_annotation(self, experiment): dirname = str(experiment) if not os.path.isdir(dirname): print("# creating directory {}".format(dirname)) os.makedirs(dirname) - with open("./"+dirname+"/annotation.txt", "w") as f: - w = csv.writer(f, delimiter = '\t') + with open("./" + dirname + "/annotation.txt", "w") as f: + w = csv.writer(f, delimiter="\t") w.writerows(self.annotation.items()) else: pass def write_manifest(self, resource, experiment): filename = "manifest.fp-manifest" - pathtoresource = os.getcwd()+"/"+os.path.basename(resource) + pathtoresource = os.getcwd() + "/" + os.path.basename(resource) if not os.path.exists(filename): - with open (filename, "w") as f: - line = '\t'.join([pathtoresource, str(experiment), "", "", "DDA"]) + "\n" + with open(filename, "w") as f: + line = "\t".join([pathtoresource, str(experiment), "", "", "DDA"]) + "\n" f.write(line) else: - with open (filename, "a") as f: - line = '\t'.join([pathtoresource, str(experiment), "", "", "DDA"]) + "\n" + with open(filename, "a") as f: + line = "\t".join([pathtoresource, str(experiment), "", "", "DDA"]) + "\n" f.write(line) - if __name__ == "__main__": - dataset_id = 44384 #int(sys.argv[1]) - - infile = open(sys.argv[1], 'r') - annotation_template = {} + dataset_id = 44384 # int(sys.argv[1]) + + infile = open(sys.argv[1], "r") + annotation_template = {} for line in infile: line = line.strip() - content = line.split(' ', 1) - annotation_template.update({content[0]:content[1]}) + content = line.split(" ", 1) + annotation_template.update({content[0]: content[1]}) infile.close() # constructor - print ('''digraph G{\n\trankdir="LR";''') + print("""digraph G{\n\trankdir="LR";""") G = SampleGraph(annotation_template) G.run(dataset_id) - #for s in [461042, 461041, 461017]: + # for s in [461042, 461041, 461017]: # G.annotation = G.annotation_template.copy() # G.traverse(s) # G.write_annotation(s) # print("# {}".format(G.annotation)) # print("# {}".format(G.annotation_template)) - #print("# {}".format(G.links)) - - print ('''}''') + # print("# {}".format(G.links)) + print("""}""") """ diff --git a/bfabric/scripts/bfabric_save.py b/bfabric/deprecated_scripts/bfabric_save.py similarity index 100% rename from bfabric/scripts/bfabric_save.py rename to bfabric/deprecated_scripts/bfabric_save.py diff --git a/bfabric/scripts/bfabric_save_customattributes.py b/bfabric/deprecated_scripts/bfabric_save_customattributes.py similarity index 72% rename from bfabric/scripts/bfabric_save_customattributes.py rename to bfabric/deprecated_scripts/bfabric_save_customattributes.py index 064d978f..b60c2409 100755 --- a/bfabric/scripts/bfabric_save_customattributes.py +++ b/bfabric/deprecated_scripts/bfabric_save_customattributes.py @@ -20,8 +20,9 @@ """ bf = bfabric.Bfabric(verbose=False) + def annotate(sampleid=None, name=None, value=None): - res = bf.read_object(endpoint='sample', obj={'id': sampleid}) + res = bf.read_object(endpoint="sample", obj={"id": sampleid}) try: customattribute = res[0].customattribute @@ -33,29 +34,30 @@ def annotate(sampleid=None, name=None, value=None): # there are no customattributes defined yet customattribute = [] - customattribute.append({'name': "{}".format(name), 'value': "{}".format(value)}) - res = bf.save_object(endpoint='sample', - obj={'id': sampleid, 'customattribute': customattribute}) + customattribute.append({"name": "{}".format(name), "value": "{}".format(value)}) + res = bf.save_object(endpoint="sample", obj={"id": sampleid, "customattribute": customattribute}) print(res[0]) -def process(filename = "/Users/cp/Desktop/annotation.csv", tryrun = True): + +def process(filename="/Users/cp/Desktop/annotation.csv", tryrun=True): with open(filename) as csv_file: - csv_reader = csv.reader(csv_file, delimiter=',') + csv_reader = csv.reader(csv_file, delimiter=",") count = 0 for row in csv_reader: if count == 0: colnames = row else: - #print("{}\t{}".format(count, row)) + # print("{}\t{}".format(count, row)) x = re.search(".*_[sS]([0-9]+)_.+", row[0]) if x is not None: print("sampleID={sample}".format(sample=x.group(1))) for idx in range(1, len(row)): - print ("\t{}={}".format(colnames[idx], row[idx])) + print("\t{}={}".format(colnames[idx], row[idx])) if tryrun is False: annotate(sampleid=x.group(1), name=colnames[idx], value=row[idx]) count = count + 1 + if __name__ == "__main__": process(tryrun=False) diff --git a/bfabric/scripts/bfabric_save_dataset.py b/bfabric/deprecated_scripts/bfabric_save_dataset.py similarity index 100% rename from bfabric/scripts/bfabric_save_dataset.py rename to bfabric/deprecated_scripts/bfabric_save_dataset.py diff --git a/bfabric/scripts/bfabric_save_importresource.py b/bfabric/deprecated_scripts/bfabric_save_importresource.py similarity index 100% rename from bfabric/scripts/bfabric_save_importresource.py rename to bfabric/deprecated_scripts/bfabric_save_importresource.py diff --git a/bfabric/deprecated_scripts/bfabric_save_qcloud2_annotation.py b/bfabric/deprecated_scripts/bfabric_save_qcloud2_annotation.py new file mode 100755 index 00000000..b1882bae --- /dev/null +++ b/bfabric/deprecated_scripts/bfabric_save_qcloud2_annotation.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python3 +# -*- coding: latin1 -*- +import sys +import bfabric +import json + +if __name__ == "__main__": + B = bfabric.Bfabric(verbose=False) + obj = {} + obj["name"] = "qcloud2 annotaion test dataset by CP" + obj["containerid"] = 3000 + obj["attribute"] = [ + {"name": "user_date", "position": 1}, + {"name": "user_email", "position": 2}, + {"name": "additional_information", "position": 3}, + {"name": "problems", "position": 4}, + {"name": "actions", "position": 5}, + ] + obj["item"] = [] + + with open("LUMOS_2.json") as json_file: + d = json.load(json_file) + + for i in range(len(d)): + try: + problems = " | ".join(["{} ({})".format(j["name"], j["qccv"]) for j in d[i]["problems"]]) + except: + problems = "-" + + try: + actions = " | ".join(["{} ({})".format(j["name"], j["qccv"]) for j in d[i]["actions"]]) + except: + actions = "-" + + it = { + "field": [ + {"value": d[i]["user_date"], "attributeposition": 1}, + {"value": d[i]["user_email"], "attributeposition": 2}, + {"value": d[i]["additional_information"], "attributeposition": 3}, + {"value": problems, "attributeposition": 4}, + {"value": actions, "attributeposition": 5}, + ], + "position": i + 1, + } + obj["item"].append(it) + print(obj) + # res = B.save_object(endpoint='dataset', obj=obj) + # print (res[0]) + +""" +curl --location --request GET 'https://api.qcloud2.crg.eu/annotations?start_date=2019-04-01&end_date=2021-10-03&labsystem_name=LUMOS_2' --header "Authorization: Bearer ${ACCESSTOKEN}" > LUMOS_2.json +""" diff --git a/bfabric/scripts/bfabric_save_resource.py b/bfabric/deprecated_scripts/bfabric_save_resource.py similarity index 61% rename from bfabric/scripts/bfabric_save_resource.py rename to bfabric/deprecated_scripts/bfabric_save_resource.py index a7435a31..a1559323 100755 --- a/bfabric/scripts/bfabric_save_resource.py +++ b/bfabric/deprecated_scripts/bfabric_save_resource.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 -''' +""" author: Christian Panse 20200424-1300 @@ -22,7 +22,7 @@ && unzip -l ${resourcefile} \ | ./bfabric_save_resource.py -p 3000 -a 273 -r ${resourcefile} --stdin -''' +""" import sys @@ -37,12 +37,13 @@ BFABRICSTORAGEID = 2 + def save_resource(projectid=None, resourcefile=None, applicationid=None, read_stdin=False): bfapp = Bfabric() description = None - print ("DEBUG {}".format(read_stdin)) + print("DEBUG {}".format(read_stdin)) if read_stdin is True: try: print("reading stdin") @@ -52,60 +53,63 @@ def save_resource(projectid=None, resourcefile=None, applicationid=None, read_st raise try: - md5 = hashlib.md5(open(resourcefile, 'rb').read()).hexdigest() + md5 = hashlib.md5(open(resourcefile, "rb").read()).hexdigest() except: print("computing file checksum failed.") raise - resource = bfapp.read_object(endpoint='resource', obj={'filechecksum': md5}) + resource = bfapp.read_object(endpoint="resource", obj={"filechecksum": md5}) - try: + try: print("resource(s) already exist.".format(resource[0]._id)) - resource = bfapp.save_object(endpoint='resource', obj={'id': resource[0]._id, 'description': description}) + resource = bfapp.save_object(endpoint="resource", obj={"id": resource[0]._id, "description": description}) print(resource[0]) return except: pass - try: - workunit = bfapp.save_object(endpoint='workunit', - obj={'name': "{}".format(os.path.basename(resourcefile)), - 'projectid': projectid, - 'applicationid': applicationid}) + workunit = bfapp.save_object( + endpoint="workunit", + obj={ + "name": "{}".format(os.path.basename(resourcefile)), + "projectid": projectid, + "applicationid": applicationid, + }, + ) print(workunit) except: raise - - obj = {'workunitid': workunit[0]._id, - 'filechecksum': md5, - 'relativepath': "{}".format(resourcefile), - 'name': os.path.basename(resourcefile), - 'size': os.path.getsize(resourcefile), - 'status': 'available', - 'description': description, - 'storageid': BFABRICSTORAGEID - } - - - resource = bfapp.save_object(endpoint='resource', obj=obj)[0] + obj = { + "workunitid": workunit[0]._id, + "filechecksum": md5, + "relativepath": "{}".format(resourcefile), + "name": os.path.basename(resourcefile), + "size": os.path.getsize(resourcefile), + "status": "available", + "description": description, + "storageid": BFABRICSTORAGEID, + } + + resource = bfapp.save_object(endpoint="resource", obj=obj)[0] print(resource) - workunit = bfapp.save_object(endpoint='workunit', - obj={'id': workunit[0]._id, 'status': 'available'}) + workunit = bfapp.save_object(endpoint="workunit", obj={"id": workunit[0]._id, "status": "available"}) print(workunit) if __name__ == "__main__": - #resource_file = "/srv/www/htdocs/p3061/Proteomics/Analysis/fragpipe/cpanse_20200424/DS32024.zip" - #save_resource(projectid=3061, resource_file=resource_file, applicationid=274) + # resource_file = "/srv/www/htdocs/p3061/Proteomics/Analysis/fragpipe/cpanse_20200424/DS32024.zip" + # save_resource(projectid=3061, resource_file=resource_file, applicationid=274) (projectid, applicationid, resourefile) = (None, None, None) read_stdin = False try: - opts, args = getopt.getopt(sys.argv[1:],"hp:a:r:", ["help", "projectid=", "applicationid=", "resourcefile=", "stdin"]) + opts, args = getopt.getopt( + sys.argv[1:], "hp:a:r:", ["help", "projectid=", "applicationid=", "resourcefile=", "stdin"] + ) except getopt.GetoptError: usage() sys.exit(2) diff --git a/bfabric/scripts/bfabric_save_resource_description.py b/bfabric/deprecated_scripts/bfabric_save_resource_description.py similarity index 100% rename from bfabric/scripts/bfabric_save_resource_description.py rename to bfabric/deprecated_scripts/bfabric_save_resource_description.py diff --git a/bfabric/scripts/bfabric_submitter_yaml.py b/bfabric/deprecated_scripts/bfabric_submitter_yaml.py similarity index 100% rename from bfabric/scripts/bfabric_submitter_yaml.py rename to bfabric/deprecated_scripts/bfabric_submitter_yaml.py diff --git a/bfabric/scripts/bfabric_upload_wrapper_creator_executable.py b/bfabric/deprecated_scripts/bfabric_upload_wrapper_creator_executable.py similarity index 100% rename from bfabric/scripts/bfabric_upload_wrapper_creator_executable.py rename to bfabric/deprecated_scripts/bfabric_upload_wrapper_creator_executable.py diff --git a/bfabric/scripts/demo_config.yaml b/bfabric/deprecated_scripts/demo_config.yaml similarity index 100% rename from bfabric/scripts/demo_config.yaml rename to bfabric/deprecated_scripts/demo_config.yaml diff --git a/bfabric/scripts/fgcz_pd_rpc_client.py b/bfabric/deprecated_scripts/fgcz_pd_rpc_client.py similarity index 100% rename from bfabric/scripts/fgcz_pd_rpc_client.py rename to bfabric/deprecated_scripts/fgcz_pd_rpc_client.py diff --git a/bfabric/scripts/fgcz_pd_rpc_server.py b/bfabric/deprecated_scripts/fgcz_pd_rpc_server.py similarity index 100% rename from bfabric/scripts/fgcz_pd_rpc_server.py rename to bfabric/deprecated_scripts/fgcz_pd_rpc_server.py diff --git a/bfabric/scripts/fgcz_pd_wrapper.py b/bfabric/deprecated_scripts/fgcz_pd_wrapper.py similarity index 100% rename from bfabric/scripts/fgcz_pd_wrapper.py rename to bfabric/deprecated_scripts/fgcz_pd_wrapper.py diff --git a/bfabric/engine/__init__.py b/bfabric/engine/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/bfabric/engine/engine_suds.py b/bfabric/engine/engine_suds.py new file mode 100644 index 00000000..f01271a5 --- /dev/null +++ b/bfabric/engine/engine_suds.py @@ -0,0 +1,108 @@ +from __future__ import annotations + +import copy +from typing import Any, TYPE_CHECKING + +from suds import MethodNotFound +from suds.client import Client + +from bfabric.engine.response_format_suds import suds_asdict_recursive +from bfabric.errors import BfabricRequestError, get_response_errors +from bfabric.results.result_container import ResultContainer +from bfabric.results.response_format_dict import clean_result + +if TYPE_CHECKING: + from suds.serviceproxy import ServiceProxy + from bfabric.bfabric_config import BfabricAuth + + +class EngineSUDS: + """B-Fabric API SUDS Engine.""" + + def __init__(self, base_url: str, drop_underscores: bool = True) -> None: + self._cl = {} + self._base_url = base_url + self._drop_underscores = drop_underscores + + def read( + self, + endpoint: str, + obj: dict[str, Any], + auth: BfabricAuth, + page: int = 1, + return_id_only: bool = False, + include_deletable_and_updatable_fields: bool = False, + ) -> ResultContainer: + """Reads the requested `obj` from `endpoint`. + :param endpoint: the endpoint to read from, e.g. "sample" + :param obj: a dictionary containing the query, for every field multiple possible values can be provided, the + final query requires the condition for each field to be met + :param auth: the authentication handle of the user performing the request + :param page: the page number to read + :param return_id_only: whether to return only the ids of the objects + :param include_deletable_and_updatable_fields: whether to include the deletable and updatable fields + """ + query = copy.deepcopy(obj) + query["includedeletableupdateable"] = include_deletable_and_updatable_fields + + full_query = dict(login=auth.login, page=page, password=auth.password, query=query, idonly=return_id_only) + service = self._get_suds_service(endpoint) + response = service.read(full_query) + return self._convert_results(response=response, endpoint=endpoint) + + def save(self, endpoint: str, obj: dict, auth: BfabricAuth) -> ResultContainer: + """Saves the provided object to the specified endpoint. + :param endpoint: the endpoint to save to, e.g. "sample" + :param obj: the object to save + :param auth: the authentication handle of the user performing the request + """ + query = {"login": auth.login, "password": auth.password, endpoint: obj} + service = self._get_suds_service(endpoint) + try: + response = service.save(query) + except MethodNotFound as e: + raise BfabricRequestError(f"SUDS failed to find save method for the {endpoint} endpoint.") from e + return self._convert_results(response=response, endpoint=endpoint) + + def delete(self, endpoint: str, id: int | list[int], auth: BfabricAuth) -> ResultContainer: + """Deletes the object with the specified ID from the specified endpoint. + :param endpoint: the endpoint to delete from, e.g. "sample" + :param id: the ID of the object to delete + :param auth: the authentication handle of the user performing the request + """ + if isinstance(id, list) and len(id) == 0: + print("Warning, attempted to delete an empty list, ignoring") + # TODO maybe use error here (and make sure it's consistent) + return ResultContainer([], total_pages_api=0) + + query = {"login": auth.login, "password": auth.password, "id": id} + service = self._get_suds_service(endpoint) + response = service.delete(query) + return self._convert_results(response=response, endpoint=endpoint) + + def _get_suds_service(self, endpoint: str) -> ServiceProxy: + """Returns a SUDS service for the given endpoint. Reuses existing instances when possible.""" + if endpoint not in self._cl: + wsdl = "".join((self._base_url, "/", endpoint, "?wsdl")) + self._cl[endpoint] = Client(wsdl, cache=None) + return self._cl[endpoint].service + + def _convert_results(self, response: Any, endpoint: str) -> ResultContainer: + try: + n_available_pages = response["numberofpages"] + except AttributeError: + n_available_pages = 0 + errors = get_response_errors(response, endpoint=endpoint) + if not hasattr(response, endpoint): + return ResultContainer([], total_pages_api=0, errors=errors) + # TODO up until here it's duplicated with engine_zeep + results = [] + for result in response[endpoint]: + result_parsed = suds_asdict_recursive(result, convert_types=True) + result_parsed = clean_result( + result_parsed, + drop_underscores_suds=self._drop_underscores, + sort_keys=True, + ) + results += [result_parsed] + return ResultContainer(results=results, total_pages_api=n_available_pages, errors=errors) diff --git a/bfabric/engine/engine_zeep.py b/bfabric/engine/engine_zeep.py new file mode 100644 index 00000000..b91c120c --- /dev/null +++ b/bfabric/engine/engine_zeep.py @@ -0,0 +1,155 @@ +from __future__ import annotations +import copy +from typing import Any, TYPE_CHECKING + +import zeep +from zeep.helpers import serialize_object + +from bfabric.errors import BfabricRequestError, get_response_errors +from bfabric.results.result_container import ResultContainer +from bfabric.results.response_format_dict import clean_result + +if TYPE_CHECKING: + from bfabric.bfabric_config import BfabricAuth + + +class EngineZeep: + """B-Fabric API Zeep Engine""" + + def __init__(self, base_url: str) -> None: + self._cl = {} + self._base_url = base_url + + def read( + self, + endpoint: str, + obj: dict, + auth: BfabricAuth, + page: int = 1, + return_id_only: bool = False, + include_deletable_and_updatable_fields: bool = False, + ) -> ResultContainer: + """Reads the requested `obj` from `endpoint`. + :param endpoint: the endpoint to read from, e.g. "sample" + :param obj: a dictionary containing the query, for every field multiple possible values can be provided, the + final query requires the condition for each field to be met + :param auth: the authentication handle of the user performing the request + :param page: the page number to read + :param return_id_only: whether to return only the ids of the objects + :param include_deletable_and_updatable_fields: whether to include the deletable and updatable fields + """ + query = copy.deepcopy(obj) + query["includedeletableupdateable"] = include_deletable_and_updatable_fields + + # FIXME: Hacks for the cases where Zeep thinks a parameter is compulsory and it is actually not + if endpoint == "sample": + excl_keys = [ + "includefamily", + "includeassociations", + "includeplates", + "includeresources", + "includeruns", + "includechildren", + "includeparents", + "includereplacements", + ] + _zeep_query_append_skipped(query, excl_keys, inplace=True, overwrite=False) + + full_query = dict(login=auth.login, page=page, password=auth.password, query=query, idonly=return_id_only) + + client = self._get_client(endpoint) + with client.settings(strict=False, xml_huge_tree=True, xsd_ignore_sequence_order=True): + response = client.service.read(full_query) + return self._convert_results(response=response, endpoint=endpoint) + + def save(self, endpoint: str, obj: dict, auth: BfabricAuth) -> ResultContainer: + """Saves the provided object to the specified endpoint. + :param endpoint: the endpoint to save to, e.g. "sample" + :param obj: the object to save + :param auth: the authentication handle of the user performing the request + """ + query = copy.deepcopy(obj) + + # FIXME: Hacks for the cases where Zeep thinks a parameter is compulsory and it is actually not + if endpoint == "resource": + excl_keys = ["name", "sampleid", "storageid", "workunitid", "relativepath"] + _zeep_query_append_skipped(query, excl_keys, inplace=True, overwrite=False) + + full_query = {"login": auth.login, "password": auth.password, endpoint: query} + + client = self._get_client(endpoint) + + try: + with client.settings(strict=False): + response = client.service.save(full_query) + except AttributeError as e: + if e.args[0] == "Service has no operation 'save'": + raise BfabricRequestError(f"ZEEP failed to find save method for the {endpoint} endpoint.") from e + raise e + return self._convert_results(response=response, endpoint=endpoint) + + def delete(self, endpoint: str, id: int | list[int], auth: BfabricAuth) -> ResultContainer: + """Deletes the object with the specified ID from the specified endpoint. + :param endpoint: the endpoint to delete from, e.g. "sample" + :param id: the ID of the object to delete + :param auth: the authentication handle of the user performing the request + """ + if isinstance(id, list) and len(id) == 0: + print("Warning, attempted to delete an empty list, ignoring") + # TODO maybe use error here (and make sure it's consistent) + return ResultContainer([], total_pages_api=0) + + query = {"login": auth.login, "password": auth.password, "id": id} + + client = self._get_client(endpoint) + response = client.service.delete(query) + return self._convert_results(response=response, endpoint=endpoint) + + def _get_client(self, endpoint: str) -> zeep.Client: + if endpoint not in self._cl: + wsdl = "".join((self._base_url, "/", endpoint, "?wsdl")) + self._cl[endpoint] = zeep.Client(wsdl) + return self._cl[endpoint] + + def _convert_results(self, response: Any, endpoint: str) -> ResultContainer: + try: + n_available_pages = response["numberofpages"] + except AttributeError: + n_available_pages = 0 + errors = get_response_errors(response, endpoint=endpoint) + if not hasattr(response, endpoint): + return ResultContainer([], total_pages_api=0, errors=errors) + # TODO up until here it's duplicated with engine_suds + results = [] + for result in response[endpoint]: + results_parsed = dict(serialize_object(result, target_cls=dict)) + results_parsed = clean_result( + results_parsed, + drop_underscores_suds=False, # NOTE: Underscore problem specific to SUDS + sort_keys=True, + ) + results += [results_parsed] + return ResultContainer(results=results, total_pages_api=n_available_pages, errors=errors) + + +# TODO: The reason why Zeep requires to explicitly skip certain values remains unclear +# To the best of our current understanding, the fields are actually optional, but because of some differences in +# formatting they appear to zeep as compulsory. The current solution is envisioned by developers of Zeep, but +# it is a hack, and should ideally be handled internally by Zeep. +# If developers of Zeep ever resume its maintenance, it would make sense to revisit +def _zeep_query_append_skipped(query: dict, skipped_keys: list, inplace: bool = False, overwrite: bool = False) -> dict: + """ + This function is used to fix a buggy behaviour of Zeep/BFabric. Specifically, Zeep does not return correct + query results if some of the optional parameters are not mentioned in the query. + + :param query: Original query + :param skipped_keys: Optional keys to skip + :param inplace: Whether to change the argument, or make a new copy to return + :param overwrite: Whether to overwrite the key if it is already present in the query + :return: Adds optional keys to query as skipped values. + """ + query_this = copy.deepcopy(query) if not inplace else query + for key in skipped_keys: + if overwrite or (key not in query_this): + query_this[key] = zeep.xsd.SkipValue + return query_this diff --git a/bfabric/engine/response_format_suds.py b/bfabric/engine/response_format_suds.py new file mode 100644 index 00000000..aaf64d54 --- /dev/null +++ b/bfabric/engine/response_format_suds.py @@ -0,0 +1,40 @@ +from __future__ import annotations +from typing import Any +from suds.sax.text import Text +from suds.sudsobject import asdict + + +def convert_suds_type(item: Any) -> int | str: + """ + Converts the suds type to an equivalent python type. There is, to my knowledge, only a single suds type which + is currently ever return, namely 'Text'. Integers and doubles are already cast to their python equivalents and + thus do not need to be explicitly handled. This may be subject to change in future versions + :param item: The suds item + :return: The item as a built-in python type + """ + if isinstance(item, Text): + return str(item) + return item + + +def suds_asdict_recursive(d, convert_types: bool = False) -> dict: + """Convert Suds object into serializable format. + https://stackoverflow.com/a/15678861 + :param d: The input suds object + :param convert_types: A boolean to determine if the simple types return should be cast to python types + :return: The suds object converted to an OrderedDict + """ + out = {} + for k, v in asdict(d).items(): + if hasattr(v, "__keylist__"): + out[k] = suds_asdict_recursive(v, convert_types=convert_types) + elif isinstance(v, list): + out[k] = [] + for item in v: + if hasattr(item, "__keylist__"): + out[k].append(suds_asdict_recursive(item, convert_types=convert_types)) + else: + out[k].append(convert_suds_type(item) if convert_types else item) + else: + out[k] = convert_suds_type(v) if convert_types else v + return out diff --git a/bfabric/errors.py b/bfabric/errors.py new file mode 100644 index 00000000..28545748 --- /dev/null +++ b/bfabric/errors.py @@ -0,0 +1,35 @@ +from __future__ import annotations + +from typing import Any + + +class BfabricRequestError(Exception): + """An error that is returned by the server in response to a full request.""" + + def __init__(self, message: str) -> None: + self.message = message + + def __repr__(self) -> str: + return f"RequestError(message={repr(self.message)})" + + +class BfabricConfigError(RuntimeError): + """An error that is raised when the configuration is invalid.""" + + pass + + +# TODO: Also test for response-level errors +def get_response_errors(response: Any, endpoint: str) -> list[BfabricRequestError]: + """ + :param response: A raw response to a query from an underlying engine + :param endpoint: The target endpoint + :return: A list of errors for each query result, if that result failed + Thus, a successful query would result in an empty list + """ + if getattr(response, "errorreport", None): + return [BfabricRequestError(response.errorreport)] + elif endpoint in response: + return [BfabricRequestError(r.errorreport) for r in response[endpoint] if getattr(r, "errorreport", None)] + else: + return [] diff --git a/bfabric/examples/compare_zeep_suds_pagination.py b/bfabric/examples/compare_zeep_suds_pagination.py new file mode 100644 index 00000000..868a92c4 --- /dev/null +++ b/bfabric/examples/compare_zeep_suds_pagination.py @@ -0,0 +1,95 @@ +import os + +import pandas as pd + +from bfabric import Bfabric, BfabricAPIEngineType +from bfabric.bfabric import get_system_auth +from bfabric.results.pandas_helper import list_dict_to_df + +""" +This will eventually become a test that will compare Zeep and Suds paginated output +Strategy: +1. Make a query for 300 entries from user for both Zeep and Suds +2. Concatenate all entries into a dataframe, transforming all hierarchical non-basic types to a string +3. For all practical purposes, the resulting dataframes should be the same + +Observations: +* There are mismatches in the fields of "project" and "formerproject", where about half of projects are not + correctly parsed by Zeep. +""" + + +def report_test_result(rez: bool, prefix: str) -> None: + if rez: + print("--", prefix, "test passed --") + else: + print("--", prefix, "test failed --") + + +def _calc_query(config, auth, engine, endpoint): + print("Sending query via", engine) + b = Bfabric(config, auth, engine=engine) + + response_class = b.read(endpoint, {}, max_results=300, return_id_only=False, includedeletableupdateable=True) + response_dict = response_class.to_list_dict(drop_empty=True, have_sort_responses=True) + return list_dict_to_df(response_dict) + + +def _set_partition_test(a, b) -> bool: + aSet = set(a) + bSet = set(b) + shared = aSet.intersection(bSet) + unique1 = aSet - bSet + unique2 = bSet - aSet + + print("Shared:", shared) + print("Unique(1):", unique1) + print("Unique(2):", unique2) + + # Test passes if there are no entities unique to only one of the sets + return (len(unique1) == 0) and (len(unique2) == 0) + + +def dataframe_pagination_test(config, auth, endpoint, use_cached: bool = False, store_cached: bool = True): + pwd_zeep = "tmp_zeep_" + endpoint + ".csv" + pwd_suds = "tmp_suds_" + endpoint + ".csv" + + if use_cached and os.path.isfile(pwd_zeep) and os.path.isfile(pwd_suds): + print("Reading cached dataframes for", endpoint) + resp_df_suds = pd.read_csv(pwd_zeep) + resp_df_zeep = pd.read_csv(pwd_suds) + else: + print("Running queries from scratch for", endpoint) + resp_df_suds = _calc_query(config, auth, BfabricAPIEngineType.SUDS, endpoint) + resp_df_zeep = _calc_query(config, auth, BfabricAPIEngineType.ZEEP, endpoint) + if store_cached: + resp_df_suds.to_csv(pwd_zeep) + resp_df_zeep.to_csv(pwd_suds) + + # Rename suds to remove underscores + resp_df_suds.rename(columns={"_id": "id", "_classname": "classname"}, inplace=True) + + suds_cols = list(sorted(resp_df_suds.columns)) + zeep_cols = list(sorted(resp_df_zeep.columns)) + + # Report + set_test_result = _set_partition_test(suds_cols, zeep_cols) + report_test_result(set_test_result, "set") + if not set_test_result: + return False + + match_test_result = True + for col_name in suds_cols: + if not resp_df_suds[col_name].equals(resp_df_zeep[col_name]): + print("------- Mismatch in: ", col_name, "-------") + print("Suds", list(resp_df_suds[col_name])) + print("Zeep", list(resp_df_zeep[col_name])) + match_test_result = False + + return match_test_result + + +config, auth = get_system_auth(config_env="TEST") + +result = dataframe_pagination_test(config, auth, "user", use_cached=False, store_cached=True) +report_test_result(result, "pagination") diff --git a/bfabric/examples/compare_zeep_suds_query.py b/bfabric/examples/compare_zeep_suds_query.py new file mode 100644 index 00000000..28d7f58c --- /dev/null +++ b/bfabric/examples/compare_zeep_suds_query.py @@ -0,0 +1,200 @@ +from collections import OrderedDict +from contextlib import redirect_stdout +from copy import deepcopy + +import suds +import zeep + +from bfabric import BfabricAuth, BfabricConfig +from bfabric.bfabric import get_system_auth +from bfabric.results.response_format_dict import drop_empty_elements, map_element_keys +from bfabric.engine.response_format_suds import suds_asdict_recursive + +""" +This file is intended to eventually become a test to compare that Zeep and SUDS produce +the same or at least comparable output for the same requests. Important features +* Test if raw XML matches +* Test if parsed response (asdict) matches +For both, it is important to test +* different endpoints (user, run, ...) +* single match queries (e.g. {id=5}) vs multi match queries (e.g. {}) + +Observations: +* SUDS produces underscores in front of 'id', 'projectid' and 'classname'. Reasons currently unknown, may also affect + other keywords. Currently, we remove underscores by explicitly providing keywords which to purge +* ZEEP does not match XML + - Zeep generates additional keywords not present in XML, all of them have values None or empty list + - Zeep misses some important keywords like 'id' and 'projectid' inside of nested XML, such as user->project. This + behaviour is inconsistent, and only affects a fraction of users. +""" + + +def read_zeep(wsdl, fullQuery, raw=True): + client = zeep.Client(wsdl) + with client.settings(strict=False, raw_response=raw): + ret = client.service.read(fullQuery) + if raw: + return ret.content + else: + return dict(zeep.helpers.serialize_object(ret, target_cls=dict)) + + +def read_suds(wsdl, fullQuery, raw=True): + client = suds.client.Client(wsdl, cache=None, retxml=raw) + ret = client.service.read(fullQuery) + if raw: + return ret + else: + return suds_asdict_recursive(ret, convert_types=True) + + +def full_query(auth: BfabricAuth, query: dict, includedeletableupdateable: bool = False) -> dict: + thisQuery = deepcopy(query) + thisQuery["includedeletableupdateable"] = includedeletableupdateable + + return {"login": auth.login, "password": auth.password, "query": thisQuery} + + +def calc_both(auth: BfabricAuth, config: BfabricConfig, endpoint: str, query: dict, raw: bool = True): + wsdl = "".join((config.base_url, "/", endpoint, "?wsdl")) + fullQuery = full_query(auth, query) + retZeep = read_zeep(wsdl, fullQuery, raw=raw) + retSuds = read_suds(wsdl, fullQuery, raw=raw) + return retZeep, retSuds + + +###################### +# Raw XML tests +###################### + + +def raw_test(auth: BfabricAuth, config: BfabricConfig, endpoint, query) -> None: + print("Testing raw XML match for", endpoint, query) + retZeep, retSuds = calc_both(auth, config, endpoint, query, raw=True) + assert len(retZeep) == len(retSuds) + assert retZeep == retSuds + print("-- passed --") + + +config, auth = get_system_auth(config_env="TEST") +# raw_test(auth, config, 'user', {'id': 9026}) +# raw_test(auth, config, 'user', {}) + +# root = etree.fromstring(retZeep) +# print(etree.tostring(root, pretty_print=True).decode()) +# pprint(retZeepDict['user'][0]['order']) + + +###################### +# Parsed dict comparison +###################### + + +# Find the set of all basic types used in the nested container (made of dicts, ordered dicts and lists) +def recursive_get_types(generic_container) -> set: + if isinstance(generic_container, (dict, OrderedDict)): + type_sets_lst = [recursive_get_types(v) for k, v in generic_container.items()] + return set().union(*type_sets_lst) + elif isinstance(generic_container, list): + type_sets_lst = [recursive_get_types(el) for el in generic_container] + return set().union(*type_sets_lst) + else: + return {type(generic_container)} + + +def basic_data_type_match_test(auth, config, endpoint, query) -> None: + print("Testing data types for", endpoint, query) + retZeepDict, retSudsDict = calc_both(auth, config, endpoint, query, raw=False) + typesZeep = recursive_get_types(retZeepDict) + typesSuds = recursive_get_types(retZeepDict) + print("Zeep", typesZeep) + print("Suds", typesSuds) + + +# basic_data_type_match_test(auth, config, 'user', {'id': 9026}) +# basic_data_type_match_test(auth, config, 'user', {}) + + +# Compare two dictionaries/lists recursively. Print every time there is a discrepancy +def recursive_comparison(generic_container1, generic_container2, prefix: list) -> bool: + matched = True + + if type(generic_container1) != type(generic_container2): + print(prefix, "type", type(generic_container1), "!=", type(generic_container2)) + return False + if isinstance(generic_container1, dict): + allKeys = set(list(generic_container1.keys()) + list(generic_container2.keys())) + for k in allKeys: + if k not in generic_container1: + print(prefix, "Not in 1: ", k, "=", generic_container2[k]) + print("- 1:", generic_container1) + print("- 2:", generic_container2) + matched = False + elif k not in generic_container2: + print(prefix, "Not in 2: ", k, "=", generic_container1[k]) + matched = False + else: + matched_recursive = recursive_comparison(generic_container1[k], generic_container2[k], prefix + [k]) + matched = matched and matched_recursive + elif isinstance(generic_container1, list): + if len(generic_container1) != len(generic_container2): + print(prefix, "length", len(generic_container1), "!=", len(generic_container2)) + matched = False + else: + for i, (el1, el2) in enumerate(zip(generic_container1, generic_container2)): + matched_recursive = recursive_comparison(el1, el2, prefix + [i]) + matched = matched and matched_recursive + else: + if generic_container1 != generic_container2: + print(prefix, "value", generic_container1, "!=", generic_container2) + matched = False + + return matched + + +def parsed_data_match_test( + auth, + config, + endpoint, + query, + drop_empty: bool = True, + drop_underscores_suds: bool = True, + log_file_path: str = None, +) -> None: + print("Testing parsed data match for", endpoint, query) + retZeepDict, retSudsDict = calc_both(auth, config, endpoint, query, raw=False) + + if drop_empty: + drop_empty_elements(retZeepDict, inplace=True) + drop_empty_elements(retSudsDict, inplace=True) + + if drop_underscores_suds: + map_element_keys(retSudsDict, {"_id": "id", "_classname": "classname", "_projectid": "projectid"}, inplace=True) + + if log_file_path is not None: + with open(log_file_path, "w") as f, redirect_stdout(f): + matched = recursive_comparison(retZeepDict, retSudsDict, prefix=[]) + else: + matched = recursive_comparison(retZeepDict, retSudsDict, prefix=[]) + + if matched: + print("-- passed --") + else: + print("-- failed --") + + +parsed_data_match_test( + auth, config, "user", {"id": 9026}, drop_empty=True, drop_underscores_suds=True, log_file_path=None +) +# +# parsed_data_match_test(auth, config, 'user', {}, drop_empty=True, drop_underscores_suds=True, +# log_file_path=None) + +# parsed_data_match_test(auth, config, 'run', {}, drop_empty=True, drop_underscores_suds=True, +# log_file_path=None) + +# print("Zeep", retZeep['user'][0]['project'][0]) +# print("Suds", retSuds['user'][0]['project'][0]) + +# print("Zeep", retZeep['user'][0]) +# print("Suds", retSuds['user'][0]) diff --git a/bfabric/examples/exists_multi.py b/bfabric/examples/exists_multi.py new file mode 100644 index 00000000..0e322350 --- /dev/null +++ b/bfabric/examples/exists_multi.py @@ -0,0 +1,34 @@ +from bfabric import BfabricAPIEngineType, Bfabric +from bfabric.bfabric import get_system_auth +from bfabric.experimental.multi_query import MultiQuery + + +config, auth = get_system_auth(config_env="TEST") + +b1 = MultiQuery(Bfabric(config, auth, engine=BfabricAPIEngineType.SUDS)) +b2 = MultiQuery(Bfabric(config, auth, engine=BfabricAPIEngineType.ZEEP)) + + +################### +# Testing IDs +################### + +# target_user_ids = [1,2,3,4,5, 12345] +# +# response1 = b1.exists("user", 'id', target_user_ids) +# response2 = b2.exists("user", 'id', target_user_ids) +# +# print(response1) +# print(response2) + +################### +# Testing Names +################### + +target_workunit_names = ["tomcat", "tomcat2"] + +response1 = b1.exists_multi("workunit", "name", target_workunit_names) +response2 = b2.exists_multi("workunit", "name", target_workunit_names) + +print(response1) +print(response2) diff --git a/bfabric/examples/zeep_debug.py b/bfabric/examples/zeep_debug.py new file mode 100644 index 00000000..33ebdcab --- /dev/null +++ b/bfabric/examples/zeep_debug.py @@ -0,0 +1,66 @@ +from bfabric import BfabricAuth, BfabricConfig +from bfabric.bfabric import get_system_auth +import zeep +from copy import deepcopy +from lxml import etree + +""" +Attempt to understand why Zeep does not correctly parse XML +* Problem 1: (minor) Zeep generates additional Null fields not available in XML, but likely (hypothetically) available in XSD +* Problem 2: (major) Zeep fails to parse parameters in some users. + +Effort: +[+] helpers.serialize_object is NOT the culprit, the parsed XMLResponse is already missing the values. +[-] Manipulating client.settings does not seem to affect the output + +Intermediate conclusions: +* Both behaviours are most likely internal bugs of Zeep. Unfortunately, developer does not respond to issues at the moment. +""" + + +def full_query(auth: BfabricAuth, query: dict, includedeletableupdateable: bool = False) -> dict: + thisQuery = deepcopy(query) + thisQuery["includedeletableupdateable"] = includedeletableupdateable + + return {"login": auth.login, "password": auth.password, "query": thisQuery} + + +def read_zeep(wsdl, fullQuery, raw=True): + client = zeep.Client(wsdl) + with client.settings(strict=False, raw_response=raw): + ret = client.service.read(fullQuery) + if raw: + return ret.content + else: + return ret + + +def read(auth: BfabricAuth, config: BfabricConfig, endpoint: str, query: dict, raw: bool = True): + wsdl = "".join((config.base_url, "/", endpoint, "?wsdl")) + fullQuery = full_query(auth, query) + return read_zeep(wsdl, fullQuery, raw=raw) + + +bfconfig, bfauth = get_system_auth(config_env="TEST") + +print("============== RAW ================") + +rez = read(bfauth, bfconfig, "user", {"id": 9026}, raw=True) +root = etree.fromstring(rez) +print(etree.tostring(root, pretty_print=True).decode()) + +rez = read(bfauth, bfconfig, "user", {"id": 9026}, raw=False) + +print("============== ORIG ================") +print(rez["user"][0]["project"]) +print(rez["user"][0]["project"]["id"]) + +# trg = rez['project'] +# +# print('============== ORIG ================') +# print(trg) +# +# +# print('============== SERIAL ================') +# +# print(zeep.helpers.serialize_object(trg, target_cls=dict)) diff --git a/bfabric/experimental/__init__.py b/bfabric/experimental/__init__.py new file mode 100644 index 00000000..1ad6df74 --- /dev/null +++ b/bfabric/experimental/__init__.py @@ -0,0 +1,3 @@ +from .multi_query import MultiQuery + +__all__ = ["MultiQuery"] diff --git a/bfabric/experimental/multi_query.py b/bfabric/experimental/multi_query.py new file mode 100644 index 00000000..13ef6638 --- /dev/null +++ b/bfabric/experimental/multi_query.py @@ -0,0 +1,119 @@ +from __future__ import annotations + +from copy import deepcopy + +from bfabric.results.result_container import ResultContainer +from bfabric.utils.paginator import page_iter +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from bfabric.bfabric import Bfabric + + +class MultiQuery: + """Some advanced functionality that supports paginating over a list of conditions that is larger than the 100 + conditions limit of the API. + This functionality might eventually be merged into the main Bfabric class but will probably be subject to some + breaking changes and is not as thoroughly tested as the main classes functionality. + """ + + def __init__(self, client: Bfabric) -> None: + self._client = client + + # TODO: Is this scope sufficient? Is there ever more than one multi-query parameter, and/or not at the root of dict? + def read_multi( + self, + endpoint: str, + obj: dict, + multi_query_key: str, + multi_query_vals: list, + return_id_only: bool = False, + ) -> ResultContainer: + """Performs a 1-parameter multi-query (there is 1 parameter that takes a list of values) + Since the API only allows BFABRIC_QUERY_LIMIT queries per page, split the list into chunks before querying + :param endpoint: endpoint + :param obj: query dictionary + :param multi_query_key: key for which the multi-query is performed + :param multi_query_vals: list of values for which the multi-query is performed + :param return_id_only: whether to return only the ids of the objects + :return: List of responses, packaged in the results container + + NOTE: It is assumed that there is only 1 response for each value. + """ + # TODO add `check` parameter + response_tot = ResultContainer([], total_pages_api=0) + obj_extended = deepcopy(obj) # Make a copy of the query, not to make edits to the argument + + # Iterate over request chunks that fit into a single API page + for page_vals in page_iter(multi_query_vals): + obj_extended[multi_query_key] = page_vals + + # TODO: Test what happens if there are multiple responses to each of the individual queries. + # * What would happen? + # * What would happen if total number of responses would exceed 100 now? + # * What would happen if we naively made a multi-query with more than 100 values? Would API paginate + # automatically? If yes, perhaps we don't need this method at all? + # TODO: It is assumed that a user requesting multi_query always wants all of the pages. Can anybody think of + # exceptions to this? + response_this = self._client.read(endpoint, obj_extended, max_results=None, return_id_only=return_id_only) + response_tot.extend(response_this, reset_total_pages_api=True) + + return response_tot + + # NOTE: Save-multi method is likely useless. When saving multiple objects, they all have different fields. + # One option would be to provide a dataframe, but it might struggle with nested dicts + # Likely best solution is to not provide this method, and let users run a for-loop themselves. + # def save_multi(self, endpoint: str, obj_lst: list, **kwargs) -> ResultContainer: + # response_tot = ResultContainer([], self.result_type, total_pages_api = 0) + # + # # Iterate over request chunks that fit into a single API page + # for page_objs in page_iter(obj_lst): + # response_page = self.save(endpoint, page_objs, **kwargs) + # response_tot.extend(response_page, reset_total_pages_api=True + # + # return response_tot + + def delete_multi(self, endpoint: str, id_list: list[int]) -> ResultContainer: + """Deletes multiple objects from `endpoint` by their ids.""" + # TODO document and test error handling + # TODO add `check` parameter + response_tot = ResultContainer([], total_pages_api=0) + + if not id_list: + print("Warning, empty list provided for deletion, ignoring") + return response_tot + + # Iterate over request chunks that fit into a single API page + for page_ids in page_iter(id_list): + response_page = self._client.delete(endpoint, page_ids) + response_tot.extend(response_page, reset_total_pages_api=True) + + return response_tot + + def exists_multi(self, endpoint: str, key: str, value: list[int | str] | int | str) -> bool | list[bool]: + """ + :param endpoint: endpoint + :param key: A key for the query (e.g. id or name) + :param value: A value or a list of values + :return: Return a single bool or a list of bools for each value + For each value, test if a key with that value is found in the API. + """ + is_scalar = isinstance(value, (int, str)) + if is_scalar: + return self._client.exists(endpoint=endpoint, key=key, value=value, check=True) + elif not isinstance(value, list): + raise ValueError("Unexpected data type", type(value)) + + # 1. Read data for this id + results = self.read_multi(endpoint, {}, key, value) + + # 2. Extract all the ids for which there was a response + result_vals = [] + for r in results.results: + if key in r: + result_vals += [r[key]] + elif "_" + key in r: # TODO: Remove this if SUDS bug is ever resolved + result_vals += [r["_" + key]] + + # 3. For each of the requested ids, return true if there was a response and false if there was not + return [val in result_vals for val in value] diff --git a/bfabric/results/__init__.py b/bfabric/results/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/bfabric/results/pandas_helper.py b/bfabric/results/pandas_helper.py new file mode 100644 index 00000000..beff67e9 --- /dev/null +++ b/bfabric/results/pandas_helper.py @@ -0,0 +1,36 @@ +from __future__ import annotations +import pandas as pd +from typing import Any + + +def _stringify(a: Any) -> Any: + """ + :param a: Any variable + :return: Stringified variable + + Convert variable to a string if it is of non-basic data type, otherwise keep it as it is + TODO: Make a better separation between what is and what is not a basic data type + """ + if isinstance(a, (list, dict, tuple)): + return str(a) + else: + return a + + +def _stringify_dict(d: dict) -> dict: + """ + :param d: A dictionary + :return: Same dictionary, with all values stringified if necessary + """ + return {k: _stringify(v) for k, v in d.items()} + + +def list_dict_to_df(l: list[dict]) -> pd.DataFrame: + """ + :param l: A list of dictionaries + :return: Pandas dataframe, where every list element is a new row + + * Columns are a union of all keys that appear in the dictionaries. Any missing key is treated as a NAN + * All non-basic data types are converted to strings + """ + return pd.DataFrame([_stringify_dict(r) for r in l]) diff --git a/bfabric/results/response_format_dict.py b/bfabric/results/response_format_dict.py new file mode 100644 index 00000000..fb7e0cc5 --- /dev/null +++ b/bfabric/results/response_format_dict.py @@ -0,0 +1,134 @@ +from __future__ import annotations + +from copy import deepcopy + + +def sort_dict(d: dict) -> dict: + """Returns a copy of the dictionary with items sorted by key. + Affects how the dictionary appears, when mapped to a string. + :param d: A dictionary + :return: A dictionary with items sorted by key. + """ + return dict(sorted(d.items())) + + +def _recursive_drop_empty(response_elem: list | dict) -> None: + """ + Iterates over all nested lists, dictionaries and basic values. Whenever a dictionary value is encountered, that is + either an empty list or None, the key-value pair gets deleted from the dictionary + :param response_elem: One of the sub-objects in the hierarchical storage of the response. Initially the root + :return: Nothing + """ + if isinstance(response_elem, list): + for el in response_elem: + _recursive_drop_empty(el) + elif isinstance(response_elem, dict): + keys_to_delete = [] # NOTE: Avoid deleting keys inside iterator, may break iterator + for k, v in response_elem.items(): + if (v is None) or (isinstance(v, list) and len(v) == 0): + keys_to_delete += [k] + else: + _recursive_drop_empty(v) + for k in keys_to_delete: + del response_elem[k] + + +def drop_empty_elements(response: list | dict, inplace: bool = True) -> list | dict | None: + """ + Iterates over all nested lists, dictionaries and basic values. Whenever a dictionary value is encountered, that is + either an empty list or None, the key-value pair gets deleted from the dictionary + :param response: A parsed query response, consisting of nested lists, dicts and basic types (int, str) + :param inplace: If true, will return nothing and edit the argument. Otherwise, will preserve the argument + and return an edited copy + :return: Nothing, or an edited response, depending on `inplace` + """ + response_filtered = deepcopy(response) if not inplace else response + _recursive_drop_empty(response_filtered) + return response_filtered + + +def _recursive_map_keys(response_elem: list | dict, keymap: dict) -> None: + """ + Iterates over all nested lists, dictionaries and basic values. Whenever a dictionary key is found for which + the mapping is requested, that the key is renamed to the corresponding mapped one + :param response_elem: One of the sub-objects in the hierarchical storage of the response. Initially the root + :param keymap: A map containing key names that should be renamed, and values - the new names. + :return: Nothing + """ + if isinstance(response_elem, list): + for el in response_elem: + _recursive_map_keys(el, keymap) + elif isinstance(response_elem, dict): + keys_to_delete = [] # NOTE: Avoid deleting keys inside iterator, may break iterator + for k, v in response_elem.items(): + _recursive_map_keys(v, keymap) + if k in keymap: + keys_to_delete += [k] + + for k in keys_to_delete: + response_elem[keymap[k]] = response_elem[k] # Copy old value to the new key + del response_elem[k] # Delete old key + + +def map_element_keys(response: list | dict, keymap: dict, inplace: bool = True) -> list | dict: + """ + Iterates over all nested lists, dictionaries and basic values. Whenever a dictionary key is found for which + the mapping is requested, that the key is renamed to the corresponding mapped one + :param response: A parsed query response, consisting of nested lists, dicts and basic types (int, str) + :param keymap: A map containing key names that should be renamed, and values - the new names. + :param inplace: If true, will return nothing and edit the argument. Otherwise, will preserve the argument + and return an edited copy + :return: The edited response (original or copy, depending on inplace) + """ + response_filtered = deepcopy(response) if not inplace else response + _recursive_map_keys(response_filtered, keymap) + return response_filtered + + +def _recursive_sort_dicts_by_key(response_elem: list | dict) -> None: + """ + Iterates over all nested lists, dictionaries and basic values. Whenever a nested dictionary is found, it is sorted + by key by converting into OrderedDict and back + :param response_elem: One of the sub-objects in the hierarchical storage of the response. Initially the root + :return: Nothing + """ + if isinstance(response_elem, list): + for idx, el in enumerate(response_elem): + if isinstance(el, dict): + response_elem[idx] = sort_dict(el) + _recursive_sort_dicts_by_key(el) + elif isinstance(response_elem, dict): + for k, v in response_elem.items(): + if isinstance(v, dict): + response_elem[k] = sort_dict(v) + _recursive_sort_dicts_by_key(v) + + +def sort_dicts_by_key(response: list | dict, inplace: bool = True) -> list | dict | None: + """ + Iterates over all nested lists, dictionaries and basic values. Whenever a nested dictionary is found, it is sorted + by key by converting into OrderedDict and back + :param response: A parsed query response, consisting of nested lists, dicts and basic types (int, str) + :param inplace: If true, will return nothing and edit the argument. Otherwise, will preserve the argument + and return an edited copy + :return: Nothing, or an edited response, depending on `inplace` + """ + response_filtered = deepcopy(response) if not inplace else response + _recursive_sort_dicts_by_key(response_filtered) + return response_filtered + + +def clean_result(result: dict, drop_underscores_suds: bool = True, sort_keys: bool = False) -> dict: + """ + :param result: the response dictionary to clean + :param drop_underscores_suds: if True, the keys of the dictionaries in the response will have leading + underscores removed in some cases (relevant for SUDS) + :param sort_keys: the keys of the dictionaries in the response will be sorted (recursively) + """ + result = deepcopy(result) + if drop_underscores_suds: + map_element_keys(result, {"_id": "id", "_classname": "classname", "_projectid": "projectid"}, inplace=True) + if sort_keys: + sort_dicts_by_key(result, inplace=True) + + return result diff --git a/bfabric/results/result_container.py b/bfabric/results/result_container.py new file mode 100644 index 00000000..7f714b8c --- /dev/null +++ b/bfabric/results/result_container.py @@ -0,0 +1,103 @@ +from __future__ import annotations + +import logging +from typing import Any, TYPE_CHECKING, Iterable + +import bfabric.results.response_format_dict as formatter + +if TYPE_CHECKING: + import polars + + +class ResultContainer: + """Container structure for query results.""" + + def __init__( + self, results: list[dict[str, Any]], total_pages_api: int | None = None, errors: list | None = None + ) -> None: + """ + :param results: List of BFabric query results + :param total_pages_api: Maximal number of pages that were available for reading. + NOTE: User may have requested to cap the total number of results. Thus, it may be of interest to know + the (approximate) total number of results the API had for the query. The total number of results is + somewhere between max_pages * (BFABRIC_QUERY_LIMIT - 1) and max_pages * BFABRIC_QUERY_LIMIT + :param errors: List of errors that occurred during the query (if any) + """ + self.results = results + self._total_pages_api = total_pages_api + self._errors = errors or [] + + def __getitem__(self, idx: int) -> dict[str, Any]: + return self.results[idx] + + def __iter__(self) -> Iterable[dict[str, Any]]: + return iter(self.results) + + def __repr__(self) -> str: + return self.__str__() + + def __str__(self) -> str: + return str(self.to_list_dict()) + + def __len__(self) -> int: + return len(self.results) + + def get_first_n_results(self, n_results: int | None) -> ResultContainer: + """Returns a shallow copy of self, containing at most `n_results` results.""" + if n_results is None: + return self + else: + return ResultContainer(self.results[:n_results], total_pages_api=self._total_pages_api, errors=self._errors) + + def assert_success(self) -> None: + """Asserts that the query was successful. Raises a `RuntimeError` if it was not.""" + if not self.is_success: + raise RuntimeError("Query was not successful", self._errors) + + @property + def is_success(self) -> bool: + """Whether the query was successful.""" + return len(self._errors) == 0 + + @property + def errors(self) -> list: + """List of errors that occurred during the query. An empty list means the query was successful.""" + return self._errors + + def extend(self, other: ResultContainer, reset_total_pages_api: bool = False) -> None: + """Merges the results of `other` into this container. + :param other: The container whose elements to append to the end of this container + :param reset_total_pages_api: If True, the total_pages_api attribute will be reset to None + """ + self.results += other.results + self._errors += other.errors + if reset_total_pages_api: + self._total_pages_api = None + elif self._total_pages_api != other.total_pages_api: + logging.warning( + f"Results observed with different total pages counts: " + f"{self._total_pages_api} != {other.total_pages_api}" + ) + + @property + def total_pages_api(self) -> int | None: + """Number of pages available from the API.""" + return self._total_pages_api + + def to_list_dict(self, drop_empty: bool = False) -> list[dict[str, Any]]: + """ + Converts the results to a list of dictionaries. + :param drop_empty: If True, empty attributes will be removed from the results + """ + if drop_empty: + return formatter.drop_empty_elements(self.results, inplace=False) + else: + return self.results + + def to_polars(self, drop_empty: bool = False) -> polars.DataFrame: + """Returns the results as a polars DataFrame. + :param drop_empty: If True, empty attributes will be removed from the results + """ + import polars + + return polars.DataFrame(self.to_list_dict(drop_empty=drop_empty)) diff --git a/bfabric/scripts/__init__.py b/bfabric/scripts/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/bfabric/scripts/bfabric_delete.py b/bfabric/scripts/bfabric_delete.py index 643b8c4c..a29d2325 100755 --- a/bfabric/scripts/bfabric_delete.py +++ b/bfabric/scripts/bfabric_delete.py @@ -1,6 +1,4 @@ #!/usr/bin/env python3 -# -*- coding: latin1 -*- - """ Copyright (C) 2014 Functional Genomics Center Zurich ETHZ|UZH. All rights reserved. @@ -9,35 +7,29 @@ Christian Panse Licensed under GPL version 3 - -$HeadURL: http://fgcz-svn.uzh.ch/repos/scripts/trunk/linux/bfabric/apps/python/bfabric/scripts/bfabric_delete.py $ -$Id: bfabric_delete.py 2525 2016-10-17 09:52:59Z cpanse $ - - - -http://fgcz-bfabric.uzh.ch/bfabric/executable?wsdl - """ +import argparse +import json -import sys -import bfabric +import bfabric +from bfabric import Bfabric -if __name__ == "__main__": - bfapp = bfabric.Bfabric() - query_obj = {} - - print (len(sys.argv)) +def bfabric_delete(client: Bfabric, endpoint: str, id: int) -> None: + """Deletes the object with id `id` from the `endpoint`.""" + res = client.delete(endpoint=endpoint, id=id).to_list_dict() + print(json.dumps(res, indent=2)) - endpoint = sys.argv[1] - if len(sys.argv) == 3: - id = sys.argv[2] +def main() -> None: + """Parses arguments and calls `bfabric_delete`.""" + client = Bfabric.from_config(verbose=True) + parser = argparse.ArgumentParser() + parser.add_argument("endpoint", help="endpoint", choices=bfabric.endpoints) + parser.add_argument("id", help="id", type=int) + args = parser.parse_args() + bfabric_delete(client=client, endpoint=args.endpoint, id=args.id) - if endpoint in bfabric.endpoints: - res = bfapp.delete_object(endpoint=endpoint, id=id) - for i in res: - print (i) - else: - raise "1st argument must be a valid endpoint." +if __name__ == "__main__": + main() diff --git a/bfabric/scripts/bfabric_executable_submitter_functionalTest.py b/bfabric/scripts/bfabric_executable_submitter_functionalTest.py index da5ce62c..fbd3a543 100755 --- a/bfabric/scripts/bfabric_executable_submitter_functionalTest.py +++ b/bfabric/scripts/bfabric_executable_submitter_functionalTest.py @@ -1,5 +1,4 @@ -#!/usr/bin/env python3 -# -*- coding: latin1 -*- +#!/usr/bin/env python3 """ Submitter for B-Fabric functional test @@ -31,21 +30,24 @@ """ -#import os -#import sys +# import os +# import sys from optparse import OptionParser -def main(): - parser = OptionParser(usage="usage: %prog -j ", - version="%prog 1.0") +def main() -> None: - parser.add_option("-j", "--externaljobid", - type='int', - action="store", - dest="externaljobid", - default=None, - help="external job id is required.") + parser = OptionParser(usage="usage: %prog -j ", version="%prog 1.0") + + parser.add_option( + "-j", + "--externaljobid", + type="int", + action="store", + dest="externaljobid", + default=None, + help="external job id is required.", + ) (options, args) = parser.parse_args() @@ -54,5 +56,6 @@ def main(): print("Dummy submitter xecutable defined for the bfabricPy functional test") + if __name__ == "__main__": main() diff --git a/bfabric/scripts/bfabric_executable_submitter_gridengine.py b/bfabric/scripts/bfabric_executable_submitter_gridengine.py index 6c8859b4..baef6e43 100755 --- a/bfabric/scripts/bfabric_executable_submitter_gridengine.py +++ b/bfabric/scripts/bfabric_executable_submitter_gridengine.py @@ -1,5 +1,4 @@ -#!/usr/bin/env python3 -# -*- coding: latin1 -*- +#!/usr/bin/env python3 """ Submitter for B-Fabric @@ -31,22 +30,25 @@ """ -#import os -#import sys +# import os +# import sys from optparse import OptionParser from bfabric import BfabricSubmitter -def main(): - parser = OptionParser(usage="usage: %prog -j ", - version="%prog 1.0") +def main() -> None: - parser.add_option("-j", "--externaljobid", - type='int', - action="store", - dest="externaljobid", - default=None, - help="external job id is required.") + parser = OptionParser(usage="usage: %prog -j ", version="%prog 1.0") + + parser.add_option( + "-j", + "--externaljobid", + type="int", + action="store", + dest="externaljobid", + default=None, + help="external job id is required.", + ) (options, args) = parser.parse_args() @@ -59,5 +61,6 @@ def main(): # TODO(cp): fix that # print(bfapp.query_counter) + if __name__ == "__main__": main() diff --git a/bfabric/scripts/bfabric_executable_submitter_slurm.py b/bfabric/scripts/bfabric_executable_submitter_slurm.py index b2d25762..bba8a5a3 100755 --- a/bfabric/scripts/bfabric_executable_submitter_slurm.py +++ b/bfabric/scripts/bfabric_executable_submitter_slurm.py @@ -1,5 +1,4 @@ -#!/usr/bin/env python3 -# -*- coding: latin1 -*- +#!/usr/bin/env python3 """ Submitter for B-Fabric @@ -32,22 +31,25 @@ """ -#import os -#import sys +# import os +# import sys from optparse import OptionParser from bfabric import BfabricSubmitter -def main(): - parser = OptionParser(usage="usage: %prog -j ", - version="%prog 1.0") +def main() -> None: - parser.add_option("-j", "--externaljobid", - type='int', - action="store", - dest="externaljobid", - default=None, - help="external job id is required.") + parser = OptionParser(usage="usage: %prog -j ", version="%prog 1.0") + + parser.add_option( + "-j", + "--externaljobid", + type="int", + action="store", + dest="externaljobid", + default=None, + help="external job id is required.", + ) (options, args) = parser.parse_args() @@ -60,5 +62,6 @@ def main(): # TODO(cp): fix that # print(bfapp.query_counter) + if __name__ == "__main__": main() diff --git a/bfabric/scripts/bfabric_executable_wrappercreator.py b/bfabric/scripts/bfabric_executable_wrappercreator.py index 42640a53..5f1d7e02 100755 --- a/bfabric/scripts/bfabric_executable_wrappercreator.py +++ b/bfabric/scripts/bfabric_executable_wrappercreator.py @@ -1,5 +1,4 @@ #!/usr/bin/env python3 -# -*- coding: latin1 -*- """ A wrapper_creator for B-Fabric @@ -21,21 +20,19 @@ # Licensed under GPL version 3 # # $HeadURL: http://fgcz-svn/repos/scripts/trunk/linux/bfabric/apps/python/wrapper_creator_yaml.py $ -# $Id: wrapper_creator_yaml.py 2397 2016-09-06 07:04:35Z cpanse $ +# $Id: wrapper_creator_yaml.py 2397 2016-09-06 07:04:35Z cpanse $ -import os import sys from bfabric import BfabricWrapperCreator if __name__ == "__main__": - externaljobid = -1 - if len(sys.argv) == 3 and sys.argv[1] == '-j' and int(sys.argv[2]) > 0: + if len(sys.argv) == 3 and sys.argv[1] == "-j" and int(sys.argv[2]) > 0: externaljobid = int(sys.argv[2]) else: - print("usage: " + sys.argv[0] + " -j ") + print("usage: " + sys.argv[0] + " -j ") sys.exit(1) bfapp = BfabricWrapperCreator(externaljobid=externaljobid) diff --git a/bfabric/scripts/bfabric_feeder_mascot.py b/bfabric/scripts/bfabric_feeder_mascot.py index b599b997..e0eb201e 100755 --- a/bfabric/scripts/bfabric_feeder_mascot.py +++ b/bfabric/scripts/bfabric_feeder_mascot.py @@ -1,12 +1,5 @@ #!/usr/bin/env python3 -# -*- coding: latin1 -*- - """ -# $HeadURL: https://fgcz-svn.uzh.ch/repos/fgcz/computer/fgcz-s-018/bfabric-feeder/fgcz_dataFeederMascot.py $ -# $Id: fgcz_dataFeederMascot.py 9097 2021-02-05 15:38:38Z cpanse $ -# $Date: 2021-02-05 16:38:38 +0100 (Fri, 05 Feb 2021) $ - - # Author 2012-10-08 Christian Panse 2012-10-10 Christian Panse @@ -24,114 +17,109 @@ 3 */2 * * 1-6 nice -19 /usr/local/fgcz-s-018/bfabric-feeder/run_fgcz_dataFeederMascot.bash 7 2>&1 >/dev/null */7 5-22 * * 1-5 nice -19 /usr/local/fgcz-s-018/bfabric-feeder/run_fgcz_dataFeederMascot.bash 1 2>&1 >/dev/null """ +from __future__ import annotations +import argparse +import hashlib +import itertools +import json import os import re import sys import urllib -import hashlib -import getopt -from suds.client import Client +from collections import Counter from datetime import datetime -import json -import itertools -import http.client -http.client.HTTPConnection._http_vsn = 10 -http.client.HTTPConnection._http_vsn_str = 'HTTP/1.0' +from pathlib import Path +from typing import Any + +from suds.client import Client -workuniturl = 'http://fgcz-bfabric.uzh.ch/bfabric/workunit?wsdl' +workuniturl = "http://fgcz-bfabric.uzh.ch/bfabric/workunit?wsdl" clientWorkUnit = Client(workuniturl) -BFLOGIN = 'pfeeder' -BFPASSWORD = '!ForYourEyesOnly!' +BFLOGIN = "pfeeder" +BFPASSWORD = "!ForYourEyesOnly!" -DB = dict() -DBfilename = "{}/mascot.json".format(os.getenv("HOME")) -DBwritten = False +DB = {} +DBfilename = Path.home() / "mascot.json" try: - DB = json.load(open(DBfilename)) - print("Read {len} data items from {name} using {size:.1f} GBytes.".format(len=len(DB), - name=DBfilename, - size=sum(map(lambda x: int(x['resource']['size']), DB.values())) / (1024 * 1024 * 1024))) -except: - print("loading '{}' failed".format(DBfilename)) + with DBfilename.open() as file: + DB = json.load(file) + print( + "Read {len} data items from {name} using {size:.1f} GBytes.".format( + len=len(DB), + name=DBfilename, + size=sum(map(lambda x: int(x["resource"]["size"]), DB.values())) / (1024 * 1024 * 1024), + ) + ) +except OSError: + print(f"loading '{DBfilename}' failed") pass -def signal_handler(signal, frame): - print(("sys exit 1; signal=" + str(signal) + "; frame=" + str(frame))) - sys.exit(1) - - -# TODO(cp): read .bfabricrc.py -def read_bfabricrc(): - with open(os.environ['HOME'] + "/.bfabricrc") as myfile: - for line in myfile: - return (line.strip()) - - -def query_mascot_result(f): - global DBwritten - regex2 = re.compile(".*.+/(data/.+\.dat)$") - regex2Result = regex2.match(f) - if True: - print("{} input>".format(datetime.now())) - print("\t{}".format(f)) - if f in DB: - print("\thit") - wu = DB[f] - if 'workunitid' in wu: - print("\tdat file {} already registered as workunit id {}. continue ...".format(f, wu['workunitid'])) - return - else: - print('\tno workunitid found') +def query_mascot_result(file_path: str) -> bool: + db_written = False + print(f"{datetime.now()} input>") + print(f"\t{file_path}") + if file_path in DB: + print("\thit") + wu = DB[file_path] + if "workunitid" in wu: + print( + "\tdat file {} already registered as workunit id {}. continue ...".format(file_path, wu["workunitid"]) + ) + return else: - print("\tparsing mascot result file '{}'...".format(f)) - wu = parse_mascot_result_file(f) - print("\tupdating cache '{}' file ...".format(DBfilename)) - DBwritten = True - DB[f] = wu - - if len(wu['inputresource']) > 0: - if re.search("autoQC4L", wu['name']) or re.search("autoQC01", wu['name']): - print("WARNING This script ignores autoQC based mascot dat file {}.".format(f)) - return - - print("\tquerying bfabric ...") - - # jsut in case - if 'errorreport' in wu: - del (wu['errorreport']) - - try: - resultClientWorkUnit = clientWorkUnit.service.checkandinsert( - dict(login=BFLOGIN, password=BFPASSWORD, workunit=wu)) - except ValueError: - print("Exception {}".format(ValueError)) - raise - - try: - rv = resultClientWorkUnit.workunit[0] - except ValueError: - print("Exception {}".format(ValueError)) - raise - - print("{} output>".format(datetime.now())) - if 'errorreport' in rv: - print("\tfound errorreport '{}'.".format(rv['errorreport'])) - - if '_id' in rv: - wu['workunitid'] = rv['_id'] - print("\tfound workunitid'{}'.".format(wu['workunitid'])) - DB[f] = wu - DBwritten = True - - if not '_id' in rv and not 'errorreport' in rv: - print("something went wrong.") - raise - # print(resultClientWorkUnit) - # print("exception for file {} with error {}".format(f, e)) - return + print("\tno workunitid found") + else: + print(f"\tparsing mascot result file '{file_path}'...") + wu = parse_mascot_result_file(file_path) + print(f"\tupdating cache '{DBfilename}' file ...") + db_written = True + DB[file_path] = wu + + if len(wu["inputresource"]) > 0: + if re.search("autoQC4L", wu["name"]) or re.search("autoQC01", wu["name"]): + print(f"WARNING This script ignores autoQC based mascot dat file {file_path}.") + return + + print("\tquerying bfabric ...") + + # just in case + if "errorreport" in wu: + del wu["errorreport"] + + try: + resultClientWorkUnit = clientWorkUnit.service.checkandinsert( + dict(login=BFLOGIN, password=BFPASSWORD, workunit=wu) + ) + except ValueError: + print(f"Exception {ValueError}") + raise + + try: + rv = resultClientWorkUnit.workunit[0] + except ValueError: + print(f"Exception {ValueError}") + raise + + print(f"{datetime.now()} output>") + if "errorreport" in rv: + print("\tfound errorreport '{}'.".format(rv["errorreport"])) + + if "_id" in rv: + wu["workunitid"] = rv["_id"] + print("\tfound workunitid'{}'.".format(wu["workunitid"])) + DB[file_path] = wu + db_written = True + + if "_id" not in rv and "errorreport" not in rv: + print("something went wrong.") + raise + # print(resultClientWorkUnit) + # print("exception for file {} with error {}".format(f, e)) + + return db_written """ @@ -185,35 +173,33 @@ def query_mascot_result(f): """ -def parse_mascot_result_file(f): - +def parse_mascot_result_file(file_path: str) -> dict[str, Any]: # Getting the current date and time - print("{} DEBUG parse_mascot_result_file".format(datetime.now())) + print(f"{datetime.now()} DEBUG parse_mascot_result_file") regex0 = re.compile("^title=.*(p([0-9]+).+Proteomics.*(raw|RAW|wiff)).*") - regex3 = re.compile("^(FILE|COM|release|USERNAME|USERID|TOL|TOLU|ITOL|ITOLU|MODS|IT_MODS|CHARGE|INSTRUMENT|QUANTITATION|DECOY)=(.+)$") - - # control_chars = ''.join(map(chr, [range(0x00, 0x20) , range(0x7f, 0xa0)])) - control_chars = ''.join(map(chr, itertools.chain(range(0x00, 0x20), range(0x7f, 0xa0)))) + regex3 = re.compile( + "^(FILE|COM|release|USERNAME|USERID|TOL|TOLU|ITOL|ITOLU|MODS|IT_MODS|CHARGE|INSTRUMENT|QUANTITATION|DECOY)=(.+)$" + ) - control_char_re = re.compile('[%s]' % re.escape(control_chars)) + control_chars = "".join(map(chr, itertools.chain(range(0x00, 0x20), range(0x7F, 0xA0)))) + control_char_re = re.compile(f"[{re.escape(control_chars)}]") line_count = 0 - meta_data_dict = dict(COM='', FILE='', release='', relativepath=f.replace('/usr/local/mascot/', '')) + meta_data_dict = dict(COM="", FILE="", release="", relativepath=file_path.replace("/usr/local/mascot/", "")) inputresourceHitHash = dict() inputresourceList = list() md5 = hashlib.md5() project = -1 desc = "" - with open(f) as dat: + with Path(file_path).open() as dat: for line in dat: line_count = line_count + 1 md5.update(line.encode()) # check if the first character of the line is a 't' for title to save regex time - if line[0] == 't': - # result = regex0.match(urllib.url2pathname(line.strip()).replace('\\', "/").replace("//", "/")) - result = regex0.match(urllib.parse.unquote(line.strip()).replace('\\', "/").replace("//", "/")) - if result and not result.group(1) in inputresourceHitHash: + if line[0] == "t": + result = regex0.match(urllib.parse.unquote(line.strip()).replace("\\", "/").replace("//", "/")) + if result and result.group(1) not in inputresourceHitHash: inputresourceHitHash[result.group(1)] = result.group(2) inputresourceList.append(dict(storageid=2, relativepath=result.group(1))) project = result.group(2) @@ -228,78 +214,78 @@ def parse_mascot_result_file(f): desc = desc + result.group(1) + "=" + result.group(2) + "; " meta_data_dict[result.group(1)] = result.group(2) - desc = desc.encode('ascii', errors='ignore') - - name = "{}; {}".format(meta_data_dict['COM'], os.path.basename(meta_data_dict['relativepath']))[:255] - + desc = desc.encode("ascii", errors="ignore") + name = f"{meta_data_dict['COM']}; {os.path.basename(meta_data_dict['relativepath'])}"[:255] rv = dict( applicationid=19, containerid=project, - name=control_char_re.sub('', name), - description=control_char_re.sub('', desc.decode()), + name=control_char_re.sub("", name), + description=control_char_re.sub("", desc.decode()), inputresource=inputresourceList, resource=dict( - name=meta_data_dict['relativepath'], + name=meta_data_dict["relativepath"], storageid=4, - status='available', - relativepath=meta_data_dict['relativepath'], - size=os.path.getsize(f), - filechecksum=md5.hexdigest() - ) + status="available", + relativepath=meta_data_dict["relativepath"], + size=os.path.getsize(file_path), + filechecksum=md5.hexdigest(), + ), ) - #TODO + # TODO - print("{}".format(datetime.now())) + print(f"{datetime.now()}") print(rv) print("DEBUG END") - return (rv) - + return rv -def printFrequency(S): - count = dict() - for x in S: - if x in count: - count[x] = count[x] + 1 - else: - count[x] = 1 - for key in sorted(count.keys(), key=lambda key: int(key)): - print("p{}\t{}".format(key, count[key])) +def print_project_frequency(project_numbers: list[int | str]) -> None: + """Prints the frequency of the project numbers in the list, assuming they are either integers or strings of + individual integers.""" + count = Counter(project_numbers) + for key in sorted(count.keys(), key=int): + print(f"p{key}\t{count[key]}") -def statistics(): +def print_statistics() -> None: + """Prints statistics about the provided database.""" print("Statistics ...") - print("len(DB)\t=\t{}".format(len(DB))) - printFrequency(map(lambda x: x['containerid'], DB.values())) - print("file size\t=\t{} GBytes".format(sum(map(lambda x: int(x['resource']['size']), DB.values())) / (1024 * 1024 * 1024))) + print(f"len(DB)\t=\t{len(DB)}") + print_project_frequency(map(lambda x: x["containerid"], DB.values())) + print( + "file size\t=\t{} GBytes".format( + sum(map(lambda x: int(x["resource"]["size"]), DB.values())) / (1024 * 1024 * 1024) + ) + ) - # printFrequency(map(lambda x: x['description'].split(";"), DB.values())) - # print(json.dumps(list(DB.values())[100], indent=4)) +def main() -> None: + """Parses the CLI arguments and calls the appropriate functions.""" + parser = argparse.ArgumentParser() + group = parser.add_mutually_exclusive_group(required=True) + group.add_argument("--stdin", action="store_true", help="read file names from stdin") + group.add_argument("--file", type=str, help="processes the provided file") + parser.add_argument("--statistics", action="store_true", help="print statistics") -if __name__ == "__main__": - BFPASSWORD = read_bfabricrc() - try: - opts, args = getopt.getopt(sys.argv[1:], "f:s", ["file=", "stdin", "statistics"]) - except getopt.GetoptError as err: - print(str(err)) - sys.exit(2) - - for o, value in opts: - if o == "--stdin": - print("reading file names from stdin ...") - for f in sys.stdin.readlines(): - query_mascot_result(f.strip()) - elif o == "--file" or o == '-f': - print("processesing", value, "...") - query_mascot_result(value) - elif o == "--statistics" or o == '-s': - statistics() - sys.exit(0) - -if DBwritten: - print("dumping json file '{}' ...".format(DBfilename)) - json.dump(DB, open(DBfilename, 'w'), sort_keys=True, indent=4) - sys.exit(0) + args = parser.parse_args() + + db_written = False + if args.stdin: + print("reading file names from stdin ...") + for filename in sys.stdin.readlines(): + db_written = query_mascot_result(filename.strip()) or db_written + elif args.file: + print("processesing", args.file, "...") + db_written = query_mascot_result(args.file) + if args.statistics: + print_statistics() + if db_written: + print(f"dumping json file '{DBfilename}' ...") + with DBfilename.open("w") as file: + json.dump(DB, file, sort_keys=True, indent=4) + + +if __name__ == "__main__": + main() diff --git a/bfabric/scripts/bfabric_feeder_resource_autoQC.py b/bfabric/scripts/bfabric_feeder_resource_autoQC.py index 841c9108..6a8c1d4f 100755 --- a/bfabric/scripts/bfabric_feeder_resource_autoQC.py +++ b/bfabric/scripts/bfabric_feeder_resource_autoQC.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 """ feeds autoQC runs into bfabric @@ -12,97 +12,91 @@ | ./bfabric/scripts/bfabric_feeder_resource_autoQC.py """ -import sys import os -import yaml import re +import sys import time import unittest + from bfabric import Bfabric -class autoQC(): +class AutoQC: """ - feeder for autoQC raw files + feeder for autoQC raw files """ - bfabric_storageid = 2 - configfile = os.path.normpath("{0}/{1}".format(os.path.expanduser('~'), r'.bfabricrc.yaml')) - with open(configfile, 'r') as file: - config = yaml.load(file, Loader=yaml.FullLoader) - bfabric_application_ids = config['applicationId'] - bfapp = Bfabric(verbose=False) + def __init__(self) -> None: + self.bfabric_storageid = 2 + self.client = Bfabric.from_config(verbose=True) + self.bfabric_application_ids = self.client.config.application_ids @property - def getId(self, obj): - print ("==============") - print (obj) - - try: - print ("DEBGUG obj: {}".format(obj[0]._id)) - return int(obj[0]._id) - except: - raise - - def __init__(self): - pass + def get_id(self, obj) -> int: + print("==============") + print(obj) + print(f"DEBGUG obj: {obj[0]._id}") + return int(obj[0]._id) - def sample_check(self, projectid, name): + def sample_check(self, projectid: int, name: str): """ checks wether a S exists or not. if not the S is created. :param projectid: :param name: :return: SID """ - try: - res = self.bfapp.read_object(endpoint='sample', - obj={'containerid': projectid, 'name': name}) - except: + res = self.client.read(endpoint="sample", obj={"containerid": projectid, "name": name}).to_list_dict() + except Exception: print(res) raise + sample_type = "Biological Sample - Proteomics" + + query_autoQC01 = { + "name": f"{name}", + "type": sample_type, + "containerid": projectid, + "species": "Bos taurus", + "groupingvar": "A", + "samplingdate": "2018-11-15", + "description": "core4life standard: sample BSA + iRT 1:800", + } + + query_autoQC4L = { + "name": f"{name}", + "type": sample_type, + "containerid": projectid, + "species": "n/a", + "groupingvar": "A", + "samplingdate": "2018-11-15", + "description": "core4life standard: 6 x 5 LC-MS/MS Peptide Reference Mix", + } + + query_lipidQC01 = { + "name": f"{name}", + "type": "Biological Sample - Metabolomics", + "containerid": projectid, + "species": "n/a", + "extractionprotocolannotation": "n/a", + "organismpart": "n/a", + "compoundclass": "Lipids", + "description": "Lipidmix containing 2uM of FFA, BA, LPC. positive mode, C18.", + } - sample_type = 'Biological Sample - Proteomics' - - query_autoQC01 = {'name': "{}".format(name), - 'type': sample_type, - 'containerid': projectid, - 'species': "Bos taurus", - 'groupingvar': "A", - 'samplingdate': "2018-11-15", - 'description': 'core4life standard: sample BSA + iRT 1:800'} - - query_autoQC4L = {'name': "{}".format(name), - 'type': sample_type, - 'containerid': projectid, - 'species': "n/a", - 'groupingvar': "A", - 'samplingdate': "2018-11-15", - 'description': 'core4life standard: 6 x 5 LC-MS/MS Peptide Reference Mix'} - - query_lipidQC01 = {'name': "{}".format(name), - 'type': 'Biological Sample - Metabolomics', - 'containerid': projectid, - 'species': "n/a", - 'extractionprotocolannotation': "n/a", - 'organismpart': "n/a", - 'compoundclass': "Lipids", - 'description': 'Lipidmix containing 2uM of FFA, BA, LPC. positive mode, C18.'} - - if res is None: - if name == 'autoQC4L': - res = self.bfapp.save_object(endpoint='sample', obj=query_autoQC4L) - elif name == 'autoQC01': - res = self.bfapp.save_object(endpoint='sample', obj=query_autoQC01) - elif name == 'lipidQC01': - res = self.bfapp.save_object(endpoint='sample', obj=query_lipidQC01) + if not res: + if name == "autoQC4L": + res = self.client.save(endpoint="sample", obj=query_autoQC4L).to_list_dict() + elif name == "autoQC01": + res = self.client.save(endpoint="sample", obj=query_autoQC01).to_list_dict() + elif name == "lipidQC01": + res = self.client.save(endpoint="sample", obj=query_lipidQC01).to_list_dict() print(res) print(res[0]) - return res[0]._id + return res[0]["id"] - def workunit_check(self, projectid, name, applicationid): + def workunit_check(self, projectid: int, name: str, applicationid: int): """ checks wether a WU exists or not. if not the WU is created. @@ -113,11 +107,8 @@ def workunit_check(self, projectid, name, applicationid): :return: int WUID """ - query = {'projectid': projectid, 'name': name, 'applicationid': applicationid} - try: - res = self.bfapp.read_object(endpoint='workunit', obj=query) - except: - raise + query = {"projectid": projectid, "name": name, "applicationid": applicationid} + res = self.client.read(endpoint="workunit", obj=query).to_list_dict() description = """ contains automatic registered quality control (QC) @@ -126,35 +117,38 @@ def workunit_check(self, projectid, name, applicationid): listed below. """ - if name == 'autoQC4L': - links = ['http://fgcz-ms.uzh.ch/~cpanse/autoQC4L.html', - 'http://fgcz-ms-shiny.uzh.ch:8080/bfabric_rawDiag/', - 'http://qcloud.crg.eu', - 'https://panoramaweb.org'] - elif name == 'autoQC01': - links = ['http://fgcz-ms.uzh.ch/~cpanse/autoQC01.html', - 'http://fgcz-ms-shiny.uzh.ch:8080/bfabric_rawDiag/', - 'http://qcloud.crg.eu', - 'https://panoramaweb.org'] - elif name == 'lipidQC01': + if name == "autoQC4L": + links = [ + "http://fgcz-ms.uzh.ch/~cpanse/autoQC4L.html", + "http://fgcz-ms-shiny.uzh.ch:8080/bfabric_rawDiag/", + "http://qcloud.crg.eu", + "https://panoramaweb.org", + ] + elif name == "autoQC01": + links = [ + "http://fgcz-ms.uzh.ch/~cpanse/autoQC01.html", + "http://fgcz-ms-shiny.uzh.ch:8080/bfabric_rawDiag/", + "http://qcloud.crg.eu", + "https://panoramaweb.org", + ] + elif name == "lipidQC01": description = "Contains automatic registered quality control (QC) measurements, positive mode." - links = ['http://fgcz-ms.uzh.ch/~cpanse/lipidQC01.html'] + links = ["http://fgcz-ms.uzh.ch/~cpanse/lipidQC01.html"] - if res is None: - query = {'projectid': projectid, 'name': name, - 'applicationid': applicationid, - 'description': description, - 'link': links} - - res = self.bfapp.save_object(endpoint='workunit', - obj=query) + if not res: + query = { + "projectid": projectid, + "name": name, + "applicationid": applicationid, + "description": description, + "link": links, + } - else: - pass + res = self.client.save(endpoint="workunit", obj=query).to_list_dict() - return res[0]._id + return res[0]["id"] - def resource_check(self, projectid, name, workunitid, filename, filedate, size, md5, sampleid): + def resource_check(self, projectid: int, name: str, workunitid: int, filename: str, filedate, size, md5, sampleid): """ checks wether a R exists or not. if not the R is created. :param projectid: @@ -172,88 +166,80 @@ def resource_check(self, projectid, name, workunitid, filename, filedate, size, _file_date = time.strftime("%FT%H:%M:%S-01:00", time.gmtime(int(filedate))) query = { - 'filechecksum': md5, - 'workunitid': workunitid, - 'projectid': projectid, + "filechecksum": md5, + "workunitid": workunitid, + "projectid": projectid, } - try: - res = self.bfapp.read_object(endpoint='resource', obj=query) - except: - raise + res = self.client.read(endpoint="resource", obj=query).to_list_dict() - if res is None: + if not res: query = { - 'workunitid': workunitid, - 'sampleid': sampleid, - 'filechecksum': md5, - 'relativepath': filename, - 'name': os.path.basename(filename), - 'status': 'available', - 'size': size, - 'storageid': self.bfabric_storageid + "workunitid": workunitid, + "sampleid": sampleid, + "filechecksum": md5, + "relativepath": filename, + "name": os.path.basename(filename), + "status": "available", + "size": size, + "storageid": self.bfabric_storageid, } - res = self.bfapp.save_object(endpoint='resource', obj=query) - - query = {'id': workunitid, 'status': 'available'} - res2 = self.bfapp.save_object(endpoint='workunit', obj=query) + res = self.client.save(endpoint="resource", obj=query).to_list_dict() - return res[0]._id + query = {"id": workunitid, "status": "available"} + self.client.save(endpoint="workunit", obj=query).to_list_dict() + return res[0]["id"] - - def feed(self, line): + def feed(self, line) -> None: """ feeds one line example: :param line: :return: """ - try: (_md5, _file_date, _file_size, filename) = line.split(";") - except Exception as err: + except Exception: return try: - m = re.search(r"p([0-9]+)\/((Metabolomics|Proteomics)\/[A-Z]+_[1-9])\/.*(autoQC01|autoQC4L|lipidQC01).+raw$", - filename) + m = re.search( + r"p([0-9]+)\/((Metabolomics|Proteomics)\/[A-Z]+_[1-9])\/.*(autoQC01|autoQC4L|lipidQC01).+raw$", filename + ) projectid = m.group(1) applicationid = self.bfabric_application_ids[m.group(2)] autoQCType = m.group(4) - - except Exception as err: - print ("# no match '{}'.".format(filename)) + except Exception: + print(f"# no match '{filename}'.") return - print ("{}\t{}\t{}\n".format(projectid, applicationid, autoQCType)) + print(f"{projectid}\t{applicationid}\t{autoQCType}\n") try: sampleid = self.sample_check(projectid, name=autoQCType) sys.exit(0) - #print sampleid + # print sampleid workunitid = self.workunit_check(projectid, name=autoQCType, applicationid=applicationid) - #print "WUID={}".format(workunitid) - - resourceid = self.resource_check(projectid=projectid, name=os.path.basename(filename), - workunitid=workunitid, - filename=filename, - filedate=_file_date, - size=_file_size, - md5=_md5, - sampleid=sampleid) + # print "WUID={}".format(workunitid) + + resourceid = self.resource_check( + projectid=projectid, + name=os.path.basename(filename), + workunitid=workunitid, + filename=filename, + filedate=_file_date, + size=_file_size, + md5=_md5, + sampleid=sampleid, + ) # sampleid=0 - print ("p{p}\tA{A}\t{filename}\tS{S}\tWU{WU}\tR{R}".format(p=projectid, - A=applicationid, - filename=filename, - S=sampleid, - WU=workunitid, - R=resourceid)) + print(f"p{projectid}\tA{applicationid}\t{filename}\tS{sampleid}\tWU{workunitid}\tR{resourceid}") except Exception as err: - print('# Failed to register to bfabric: {}'.format(err)) + print(f"# Failed to register to bfabric: {err}") class TestCaseAutoQC(unittest.TestCase): @@ -261,21 +247,22 @@ class TestCaseAutoQC(unittest.TestCase): python -m unittest bfabric_feeder_resource_autoQC """ - BF = autoQC() - def setUp(self): + BF = AutoQC() + + def setUp(self) -> None: pass - def test_feed(self): + def test_feed(self) -> None: line = "61cf7e172713344bdf6ebe5b1ed61d99;1549963879;306145606;p2928/Proteomics/QEXACTIVEHF_2/ciuffar_20190211_190211_TNF_PRM_rT_again_AQUA_LHration/20190211_013_autoQC4L.raw" - #self.BF.feed(line) + # self.BF.feed(line) line = "efdf5e375d6e0e4e4abf9c2b3e1e97d5;1542134408;59129652;p1000/Proteomics/QEXACTIVEHF_2/tobiasko_20181113/20181113_003_autoQC01.raw" - #self.BF.feed(line) + # self.BF.feed(line) line = "d0412c1aae029d21bb261c1e4c682ea9;1549441215;207803452;p2947/Metabolomics/QEXACTIVE_3/sstreb_20190206_o5292/p2947_o5292_20190205_FFA_BA_LPC_2um_lipidQC01_1.raw" self.BF.feed(line) -if __name__ == '__main__': - BF = autoQC() +if __name__ == "__main__": + BF = AutoQC() for input_line in sys.stdin: BF.feed(input_line.rstrip()) diff --git a/bfabric/scripts/bfabric_flask.py b/bfabric/scripts/bfabric_flask.py index b49e9bff..4ff84afa 100755 --- a/bfabric/scripts/bfabric_flask.py +++ b/bfabric/scripts/bfabric_flask.py @@ -1,7 +1,4 @@ #!/usr/bin/env python3 -# -*- coding: latin1 -*- - - """ This script is thought to be used as a Rest SOAP proxy. @@ -34,443 +31,289 @@ Of note, do not forget rerun the flask service after modification! """ - -import base64 +from __future__ import annotations +import os import json -from flask import Flask, jsonify, request -from flask.json import JSONEncoder -from slugify import slugify -from os.path import exists - -import bfabric - import logging import logging.handlers -from flask.logging import default_handler +from pathlib import Path +from typing import Any +from flask import Flask, Response, jsonify, request -def create_logger(name="bfabric11_flask", address=("fgcz-ms.uzh.ch", 514)): - """ - create a logger object - """ - syslog_handler = logging.handlers.SysLogHandler(address=address) - formatter = logging.Formatter('%(name)s %(message)s') - syslog_handler.setFormatter(formatter) +from bfabric import Bfabric, BfabricAuth - logger = logging.getLogger(name) - logger.setLevel(20) - logger.addHandler(syslog_handler) - - - return logger - -logger = create_logger() - -class BfabricJSONEncoder(JSONEncoder): - """ - enables to serialize (jsonify) bfabric wsdl objects - """ - - def default(self, obj): - try: - iterable = iter(obj) - except TypeError: - pass - else: - return(dict(iterable)) - - return JSONEncoder.default(self, obj) - - -address=("fgcz-ms.uzh.ch", 514) -name="bfabric_flask" -formatter = logging.Formatter('%(name)s %(message)s') - -syslog_handler = logging.handlers.SysLogHandler(address=address) -syslog_handler.setFormatter(formatter) +if "BFABRICPY_CONFIG_ENV" not in os.environ: + # Set the environment to the name of the PROD config section to use + os.environ["BFABRICPY_CONFIG_ENV"] = "TEST" +DEFAULT_LOGGER_NAME = "bfabric13_flask" +logger = logging.getLogger(DEFAULT_LOGGER_NAME) app = Flask(__name__) +client = Bfabric.from_config(auth=None, verbose=True) -app.json_encoder = BfabricJSONEncoder -bfapp = bfabric.Bfabric() - -inlcude_child_extracts = True - -@app.route('/read', methods=['GET', 'POST']) -def read(): - idonly = None - try: - content = json.loads(request.data) - except: - return jsonify({'error': 'could not get POST content.'}) - - try: - # TODO(cp): check if meaningful page - page = content['page'][0] - print("page = ", page) - except: - logger.info("set page to 1.") - page = 1 - - try: - # TODO(cp): check if meaningful page - idonly = content['idonly'][0] - print("idonly = ", idonly) - except: - idonly = False - - try: - webservicepassword = content['webservicepassword'][0].replace("\t", "") - login = content['login'][0] - # logger.info("debug {}".format(webservicepassword)) - - bf = bfabric.Bfabric(login=login, password=webservicepassword) - res = bf.read_object(endpoint=content['endpoint'][0], obj=content['query'], plain=True, page=page, idonly=idonly) - logger.info("'{}' login success query {} ...".format(login, content['query'])) - except: - logger.info("'{}' query failed ...".format(login)) - return jsonify({'status': 'jsonify failed: bfabric python module.'}) - - try: - return jsonify({'res': res}) - except: - logger.info("'{}' query failed ...".format(login)) - return jsonify({'status': 'jsonify failed'}) - - -""" -generic query interface for read interface - -example (assumes the proxy runs on localhost): - -R> rv <- POST('http://localhost:5000/query', - body = toJSON(list(login = login, - webservicepassword = webservicepassword, - query = 'resource', - containerid = 3000, - applicationid = 205)), - encode = 'json') - -R> rv <- content(rv) - -TODO(cp@fgcz.ethz.ch): also provide an argument for the webbase -""" -@app.route('/q', methods=['GET', 'POST']) -def q(): - try: - content = json.loads(request.data) - except: - return jsonify({'error': 'could not get POST content.'}) - - try: - # TODO(cp): check if meaningful page - page = content['page'][0] - except: - logger.info("set page to 1.") - page = 1 - - # TODO(cp): more finetuning on paging - try: - webservicepassword = content['webservicepassword'][0].replace("\t", "") - login = content['login'][0] - #logger.info("debug {}".format(webservicepassword)) - - - bf = bfabric.Bfabric(login=login, password=webservicepassword) - res = bf.read_object(endpoint=content['endpoint'][0], obj=content['query'], page=page) - logger.info("'{}' login success query {} ...".format(login, content['query'])) - except: - logger.info("'{}' login failed ...".format(login)) - return jsonify({'status': 'jsonify failed: bfabric python module.'}) - - try: - return jsonify({'res': res}) - except: - logger.info("'{}' query failed ...".format(login)) - return jsonify({'status': 'jsonify failed'}) - - -@app.route('/s', methods=['GET', 'POST']) -def s(): - - try: - content = json.loads(request.data) - except: - msg = 'could not get POST content.' - print("Exception: {}".format(msg)) - return jsonify({'error': msg}) - - try: - webservicepassword = content['webservicepassword'][0].replace("\t", "") - login = content['login'][0] - except: - msg = 'Could not extract login|webservicepassword.' - print("Exception: {}".format(msg)) - return jsonify({'error': msg}) - - try: - endpoint = content['endpoint'][0] - except: - msg = 'Could not extract endpoint.' - print("Exception: {}".format(msg)) - return jsonify({'error': msg}) - - try: - query = content['query'] - except: - msg = 'Could not extract query.' - print("Exception: {}".format(msg)) - return jsonify({'error': msg}) - - try: - print("Calling constructor and save method using login", login) - bf = bfabric.Bfabric(login=login, password=webservicepassword) - res = bf.save_object(endpoint=endpoint, obj=content['query']) - - logger.info("'{}' login success save method ...".format(login)) - except: - logger.info("save method failed for login {}.".format(login)) - return jsonify({'status': 'jsonify failed: bfabric python module.'}) - - try: - return jsonify({'res': res}) - except: - return jsonify({'status': 'jsonify failed'}) - -def dfs__(extract_id): - stack = list() - visited = dict() - stack.append(extract_id) - - extract_dict = dict() - - while len(stack) > 0: - o = stack.pop() - visited[u] = True - - - extract = bfapp.read_object(endpoint='extract', obj={'id': u}) - extract_dict[u] = extract[0] - - try: - for child_extract in extract[0].childextract: - if (child_extract._id not in visited): - - stack.append(child_extract._id) - - except: - pass - - return extract_dict - - -#def wsdl_sample(containerid): -# try: -# return map(lambda x: {'id': x._id, 'name': x.name}, -# bfapp.read_object(endpoint='sample', obj={'containerid': containerid})) -# except: -# pass -def compose_ms_queue_dataset(jsoncontent, workunitid, containerid): - obj = {} - try: - obj['name'] = 'generated through http://fgcz-s-028.uzh.ch:8080/queue_generator/' - obj['workunitid'] = workunitid - obj['containerid'] = containerid - obj['attribute'] = [ - {'name': 'File Name', 'position':1, 'type':'String'}, - {'name': 'Condition', 'position':2, 'type':'String'}, - {'name': 'Path', 'position': 3}, - {'name': 'Position', 'position': 4}, - {'name': 'Inj Vol', 'position': 5, 'type': 'numeric'}, - {'name': 'ExtractID', 'position': 6, 'type': 'extract'} ] +def get_request_auth(request_data: dict[str, Any]) -> BfabricAuth: + """Extracts the login and password from a JSON request body. Assumes it has been filtered beforehand.""" + webservicepassword = request_data["webservicepassword"].replace("\t", "") + login = request_data["login"] + return BfabricAuth(login=login, password=webservicepassword) - obj['item'] = list() - for idx in range(0, len(jsoncontent)): - obj['item'].append({'field': map(lambda x: {'attributeposition': x + 1, 'value': jsoncontent[idx][x]}, range(0, len(jsoncontent[idx]))), 'position': idx + 1}) +@app.errorhandler(Exception) +def handle_unknown_exception(e: Exception) -> Response: + """Handles exceptions which are not handled by a more specific handler.""" + logger.error("Unknown exception", exc_info=e) + return jsonify({"error": f"unknown exception occurred: {e}"}) - except: - pass - return obj +@app.errorhandler(json.JSONDecodeError) +def handle_json_decode_error(e: json.JSONDecodeError) -> Response: + """Handles JSON decode errors.""" + logger.error("JSON decode error", exc_info=e) + return jsonify({"error": "could not parse JSON request content"}) -@app.route('/add_resource', methods=['POST']) -def add_resource(): - try: - queue_content = json.loads(request.data) - print (queue_content) - print("--") - except: - print ("failed: could not get POST content") - return jsonify({'error': 'could not get POST content.'}) +class InvalidRequestContent(RuntimeError): + """Raised when the request content is invalid.""" - res = bfapp.save_object('workunit', {'name': queue_content['name'], - 'description': "{}".format(queue_content['workunitdescription'][0]), - 'containerid': queue_content['containerid'], - 'applicationid': queue_content['applicationid'] - }) - print (res) + def __init__(self, missing_fields: list[str]) -> None: + super().__init__(f"missing fields: {missing_fields}") - workunit_id = res[0]._id - print (workunit_id) +@app.errorhandler(InvalidRequestContent) +def handle_invalid_request_content(e: InvalidRequestContent) -> Response: + """Handles invalid request content errors.""" + logger.error("Invalid request content", exc_info=e) + return jsonify({"error": f"invalid request content: {e}"}) - res = bfapp.save_object('resource', {'base64': queue_content['base64'], - 'name': queue_content['resourcename'], - 'workunitid': workunit_id}) - res = bfapp.save_object('workunit', {'id': workunit_id, 'status': 'available'}) +def get_fields(required_fields: list[str], optional_fields: dict[str, Any]) -> dict[str, Any]: + """Extracts fields from a JSON request body. All `required_fields` must be present, or an error will be raised + indicating the missing fields. The optional fields are filled with the default values if not present. + :param required_fields: list of required fields + :param optional_fields: dictionary of optional fields with default values + :return: dictionary of all field values, if all required fields are present + """ + available_fields = request.json.keys() + missing_fields = set(required_fields) - set(available_fields) + if missing_fields: + raise InvalidRequestContent(sorted(missing_fields)) + else: + required_values = {field: request.json[field] for field in required_fields} + optional_values = {field: request.json.get(field, default) for field, default in optional_fields.items()} + return {**required_values, **optional_values} + + +@app.route("/read", methods=["POST"]) +def read() -> Response: + """Reads data from a particular B-Fabric endpoint matching a query.""" + params = get_fields( + required_fields=["endpoint", "login", "webservicepassword"], + optional_fields={"query": {}, "page_offset": 0, "page_max_results": 100}, + ) + query = params["query"] + page_offset = params["page_offset"] + page_max_results = params["page_max_results"] + endpoint = params["endpoint"] + auth = get_request_auth(params) + + logger.info(f"'{auth.login}' /read {page_offset=}, {page_max_results=}, {query=}") + with client.with_auth(auth): + client.print_version_message() + res = client.read( + endpoint=endpoint, + obj=query, + offset=page_offset, + max_results=page_max_results, + ) + logger.info(f"'{auth.login}' login success query {query} ...") + + return jsonify({"res": res.to_list_dict()}) + + +@app.route("/save", methods=["POST"]) +def save() -> Response: + """Saves data to a particular B-Fabric endpoint.""" + params = get_fields(required_fields=["endpoint", "query", "login", "webservicepassword"], optional_fields={}) + endpoint = params["endpoint"] + query = params["query"] + auth = get_request_auth(params) + + with client.with_auth(auth): + res = client.save(endpoint=endpoint, obj=query) + logger.info(f"'{auth.login}' login success save method ...") + + return jsonify({"res": res.to_list_dict()}) + + +@app.route("/add_resource", methods=["POST"]) +def add_resource() -> Response: + """Adds a resource to a workunit.""" + params = get_fields( + required_fields=[ + "name", + "workunitdescription", + "containerid", + "applicationid", + "base64", + "resourcename", + "login", + "webservicepassword", + ], + optional_fields={}, + ) + auth = get_request_auth(params) + + # Save the workunit + with client.with_auth(auth): + res = client.save( + "workunit", + { + "name": params["name"], + "description": params["workunitdescription"], + "containerid": params["containerid"], + "applicationid": params["applicationid"], + }, + ).to_list_dict() + logger.info(res) + + workunit_id = res[0]["id"] + logger.info(f"workunit_id = {workunit_id}") + + with client.with_auth(auth): + client.save( + "resource", + { + "base64": params["base64"], + "name": params["resourcename"], + "workunitid": workunit_id, + }, + ) + client.save("workunit", {"id": workunit_id, "status": "available"}) return jsonify(dict(workunit_id=workunit_id)) -@app.route('/add_dataset/', methods=['GET', 'POST']) -def add_dataset(containerid): - try: - queue_content = json.loads(request.data) - except: - return jsonify({'error': 'could not get POST content.'}) - - try: - obj = {} - obj['name'] = 'autogenerated dataset by http://fgcz-s-028.uzh.ch:8080/queue_generator/' - obj['containerid'] = containerid - obj['attribute'] = [ {'name':'File Name', 'position':1, 'type':'String'}, - {'name':'Path', 'position':2}, - {'name':'Position', 'position':3}, - {'name':'Inj Vol', 'position':4, 'type':'numeric'}, - {'name':'ExtractID', 'position':5, 'type':'extract'} ] - - obj['item'] = list() - - for idx in range(0, len(queue_content)): - obj['item']\ - .append({'field': map(lambda x: {'attributeposition': x + 1, 'value': queue_content[idx][x]}, range(0, len(queue_content[idx]))), 'position': idx + 1}) - - print (obj) - - except: - return jsonify({'error': 'composing bfabric object failed.'}) - - try: - res = bfapp.save_object(endpoint='dataset', obj=obj)[0] - print ("added dataset {} to bfabric.".format(res._id)) - return (jsonify({'id':res._id})) - - except: - print(res) - return jsonify({'error': 'beaming dataset to bfabric failed.'}) - - - -# @deprecated("Use read instead") -@app.route('/user/', methods=['GET']) -def get_user(containerid): - - users = bfapp.read_object(endpoint='user', obj={'containerid': containerid}) - #not users or - if not users or len(users) == 0: - return jsonify({'error': 'no resources found.'}) - # abort(404) - - return jsonify({'user': users}) +# @app.route("/add_dataset/", methods=["GET", "POST"]) +# def add_dataset(containerid): +# try: +# queue_content = json.loads(request.data) +# except: +# return jsonify({"error": "could not get POST content."}) +# +# try: +# obj = {} +# obj["name"] = "autogenerated dataset by http://fgcz-s-028.uzh.ch:8080/queue_generator/" +# obj["containerid"] = containerid +# obj["attribute"] = [ +# {"name": "File Name", "position": 1, "type": "String"}, +# {"name": "Path", "position": 2}, +# {"name": "Position", "position": 3}, +# {"name": "Inj Vol", "position": 4, "type": "numeric"}, +# {"name": "ExtractID", "position": 5, "type": "extract"}, +# ] +# +# obj["item"] = list() +# +# for idx in range(0, len(queue_content)): +# obj["item"].append( +# { +# "field": map( +# lambda x: { +# "attributeposition": x + 1, +# "value": queue_content[idx][x], +# }, +# range(0, len(queue_content[idx])), +# ), +# "position": idx + 1, +# } +# ) +# +# print(obj) +# +# except: +# return jsonify({"error": "composing bfabric object failed."}) +# +# try: +# res = bfapp.save_object(endpoint="dataset", obj=obj)[0] +# print("added dataset {} to bfabric.".format(res._id)) +# return jsonify({"id": res._id}) +# +# except: +# print(res) +# return jsonify({"error": "beaming dataset to bfabric failed."}) + + +# @app.route("/zip_resource_of_workunitid/", methods=["GET"]) +# def get_zip_resources_of_workunit(workunitid): +# res = map( +# lambda x: x.relativepath, +# bfapp.read_object(endpoint="resource", obj={"workunitid": workunitid}), +# ) +# print(res) +# res = filter(lambda x: x.endswith(".zip"), res) +# return jsonify(res) + +# @app.route("/addworkunit", methods=["GET", "POST"]) +# def add_workunit(): +# appid = request.args.get("appid", None) +# pid = request.args.get("pid", None) +# rname = request.args.get("rname", None) +# +# try: +# content = json.loads(request.data) +# # print content +# except: +# return jsonify({"error": "could not get POST content.", "appid": appid}) +# +# resource_base64 = content["base64"] +# # base64.b64encode(content) +# print(resource_base64) +# +# return jsonify({"rv": "ok"}) -# @deprecated("Use read instead") -@app.route('/sample/', methods=['GET']) -def get_all_sample(containerid): - samples = [] - rv = list(map(lambda p: bfapp.read_object(endpoint='sample', obj={'containerid': containerid}, page=p), range(1,10))) - rv = list(map(lambda x: [] if x is None else x, rv)) - for el in rv: samples.extend(el) +def setup_logger_prod(name: str = DEFAULT_LOGGER_NAME, address: tuple[str, int] = ("fgcz-ms.uzh.ch", 514)) -> None: + """Sets up the production logger.""" + syslog_handler = logging.handlers.SysLogHandler(address=address) + formatter = logging.Formatter("%(name)s %(message)s") + syslog_handler.setFormatter(formatter) - try: - annotationDict = {} - for annotationId in filter(lambda x: x is not None, set(map(lambda x: x.groupingvar._id if "groupingvar" in x else None, samples))): - print (annotationId) - annotation = bfapp.read_object(endpoint='annotation', obj={'id': annotationId}) - annotationDict[annotationId] = annotation[0].name - except: - pass + logger = logging.getLogger(name) + logger.setLevel(logging.INFO) + logger.addHandler(syslog_handler) + return logger - for sample in samples: - try: - sample['condition'] = annotationDict[sample.groupingvar._id] - except: - sample['condition'] = None - if len(samples) == 0: - return jsonify({'error': 'no extract found.'}) - # abort(404) +def setup_logger_debug(name: str = DEFAULT_LOGGER_NAME) -> None: + """Sets up the debug logger.""" + logger = logging.getLogger(name) + logger.setLevel(logging.DEBUG) + return logger - return jsonify({'samples': samples}) -""" -example -curl http://localhost:5000/zip_resource_of_workunitid/154547 -""" -@app.route('/zip_resource_of_workunitid/', methods=['GET']) -def get_zip_resources_of_workunit(workunitid): - res = map(lambda x: x.relativepath, bfapp.read_object(endpoint='resource', obj={'workunitid': workunitid})) - print (res) - res = filter(lambda x: x.endswith(".zip"), res) - return jsonify(res) - - -@app.route('/query', methods=['GET', 'POST']) -def query(): - try: - content = json.loads(request.data) - except: - return jsonify({'error': 'could not get POST content.', 'appid': appid}) - - print ("PASSWORD CLEARTEXT", content['webservicepassword']) - - bf = bfabric.Bfabric(login=content['login'], - password=content['webservicepassword'], - webbase='http://fgcz-bfabric.uzh.ch/bfabric') - - for i in content.keys(): - print ("{}\t{}".format(i, content[i])) - - if 'containerid' in content: - workunits = bf.read_object(endpoint='workunit', - obj={'applicationid': content['applicationid'], - 'containerid': content['containerid']}) - print (workunits) - return jsonify({'workunits': map(lambda x: x._id, workunits)}) - #elif 'query' in content and "{}".format(content['query']) is 'project': - else: - user = bf.read_object(endpoint='user', obj={'login': content['login']})[0] - projects = map(lambda x: x._id, user.project) - return jsonify({'projects': projects}) - - return jsonify({'error': 'could not process query'}) - -@app.route('/addworkunit', methods=['GET', 'POST']) -def add_workunit(): - appid = request.args.get('appid', None) - pid = request.args.get('pid', None) - rname = request.args.get('rname', None) - - try: - content = json.loads(request.data) - # print content - except: - return jsonify({'error': 'could not get POST content.', 'appid': appid}) - - resource_base64 = content['base64'] - #base64.b64encode(content) - print (resource_base64) - - return jsonify({'rv': 'ok'}) - -if __name__ == '__main__': - if exists('/etc/ssl/fgcz-host.pem') and exists('/etc/ssl/private/fgcz-host_key.pem'): - app.run(debug=False, host="0.0.0.0", port=5001, ssl_context=('/etc/ssl/fgcz-host.pem', '/etc/ssl/private/fgcz-host_key.pem')) +def main() -> None: + """Starts the server, auto-detecting production mode if SSL keys are present.""" + ssl_key_pub = Path("/etc/ssl/fgcz-host.pem") + ssl_key_priv = Path("/etc/ssl/private/fgcz-host_key.pem") + if ssl_key_pub.exists() and ssl_key_priv.exists(): + setup_logger_prod() + app.run( + debug=False, + host="0.0.0.0", + port=5001, + ssl_context=( + str(ssl_key_pub), + str(ssl_key_priv), + ), + ) else: + setup_logger_debug() app.run(debug=False, host="127.0.0.1", port=5000) + + +if __name__ == "__main__": + main() diff --git a/bfabric/scripts/bfabric_list.py b/bfabric/scripts/bfabric_list.py deleted file mode 120000 index 5084efd0..00000000 --- a/bfabric/scripts/bfabric_list.py +++ /dev/null @@ -1 +0,0 @@ -bfabric_read.py \ No newline at end of file diff --git a/bfabric/scripts/bfabric_list_not_available_proteomics_workunits.py b/bfabric/scripts/bfabric_list_not_available_proteomics_workunits.py index 56e01f49..55fb9502 100755 --- a/bfabric/scripts/bfabric_list_not_available_proteomics_workunits.py +++ b/bfabric/scripts/bfabric_list_not_available_proteomics_workunits.py @@ -1,57 +1,87 @@ #!/usr/bin/env python3 -# -*- coding: latin1 -*- """ Copyright (C) 2023 Functional Genomics Center Zurich ETHZ|UZH. All rights reserved. -Lists applications that are not available on bfabric. +Lists proteomics workunits that are not available on bfabric. Author: Christian Panse Licensed under GPL version 3 """ +from __future__ import annotations -import sys -import bfabric - +from argparse import ArgumentParser from datetime import datetime, timedelta +from typing import Any + +from rich.console import Console +from rich.table import Column, Table + +from bfabric import Bfabric, BfabricConfig + + +def render_output(workunits_by_status: dict[str, list[dict[str, Any]]], config: BfabricConfig) -> None: + """Renders the output as a table.""" + table = Table( + Column("AID", no_wrap=True), + Column("WUID", no_wrap=True), + Column("Created", no_wrap=True), + Column("Status", no_wrap=True), + Column("Created by", no_wrap=True, max_width=12), + Column("Name", no_wrap=False), + ) + + for status, workunits in workunits_by_status.items(): + workunits = [x for x in workunits if x["createdby"] not in ["gfeeder", "itfeeder"]] + status_color = { + "Pending": "yellow", + "Processing": "blue", + "Failed": "red", + }.get(status, "black") + + for wu in workunits: + app_url = f"{config.base_url}/application/show.html?id={wu['application']['id']}" + wu_url = f"{config.base_url}/workunit/show.html?id={wu['id']}&tab=details" + table.add_row( + f"[link={app_url}]A{wu['application']['id']:3}[/link]", + f"[link={wu_url}]WU{wu['id']}[/link]", + wu["created"], + f"[{status_color}]{status}[/{status_color}]", + wu["createdby"], + wu["name"], + ) + + console = Console() + console.print(table) + + +def list_not_available_proteomics_workunits(date_cutoff: datetime) -> None: + """Lists proteomics work units that are not available on bfabric.""" + client = Bfabric.from_config(verbose=True) + Console(stderr=True).print( + f"--- list not available proteomics work units created after {date_cutoff}---", + style="bright_yellow", + ) + + workunits_by_status = {} + for status in ["Pending", "Processing", "Failed"]: + workunits_by_status[status] = client.read( + endpoint="workunit", + obj={"status": status, "createdafter": date_cutoff.isoformat()}, + ).to_list_dict() + + render_output(workunits_by_status, config=client.config) + + +def main() -> None: + """Parses the command line arguments and calls `list_not_available_proteomics_workunits`.""" + parser = ArgumentParser(description="Lists proteomics work units that are not available on bfabric.") + parser.add_argument("--max-age", type=int, help="Max age of work units in days", default=14) + args = parser.parse_args() + date_cutoff = datetime.today() - timedelta(days=args.max_age) + list_not_available_proteomics_workunits(date_cutoff) -def print_color_msg(msg, color="93"): - msg = "\033[{color}m--- {} ---\033[0m\n".format(msg, color=color) - sys.stderr.write(msg) - -def render_output(wu): - wu = list(filter(lambda x: x.createdby not in ["gfeeder", "itfeeder"], wu)) - - cm = {"PENDING" : "\033[33mPending \033[0m", - "PROCESSING": "\033[34mProcessing\033[0m", - "FAILED" : "\033[31mFailed \033[0m"} - - for x in wu: - if x.status in cm: - statuscol = cm[x.status] - else: - statuscol = "\033[36m{} \033[0m".format(x.status) - print("A{aid:3} WU{wuid} {cdate} {status} {createdby:12} {name}" - .format(status = statuscol, - cdate = x.created, - wuid = x._id, - createdby = x.createdby, - name = x.name, - aid = x.application._id)) if __name__ == "__main__": - B = bfabric.Bfabric() - d = datetime.today() - timedelta(days=14) - - print_color_msg("list not available proteomics workunits created after {}".format(d)) - - for status in ['Pending', 'Processing', 'Failed']: - pwu = B.read_object(endpoint = 'workunit', - obj = {'status': status, 'createdafter': d}, - plain = True, - page = 1) - try: - render_output(pwu.workunit) - except: - pass + main() diff --git a/bfabric/scripts/bfabric_list_not_existing_storage_directories.py b/bfabric/scripts/bfabric_list_not_existing_storage_directories.py index ba26da36..570ec630 100755 --- a/bfabric/scripts/bfabric_list_not_existing_storage_directories.py +++ b/bfabric/scripts/bfabric_list_not_existing_storage_directories.py @@ -1,6 +1,4 @@ -#!/usr/bin/python3 -# -*- coding: latin1 -*- - +#!/usr/bin/env python3 """ Copyright (C) 2020 Functional Genomics Center Zurich ETHZ|UZH. All rights reserved. @@ -12,23 +10,31 @@ http://fgcz-bfabric.uzh.ch/bfabric/executable?wsdl """ -import os +from __future__ import annotations + +from pathlib import Path + +from bfabric import Bfabric +ROOTDIR = Path("/srv/www/htdocs/") -import bfabric -B = bfabric.Bfabric() -ROOTDIR="/srv/www/htdocs/" +def list_not_existing_storage_dirs(client: Bfabric, technologyid: int = 2) -> None: + """Lists not existing storage directories for a given technologyid.""" + results = client.read(endpoint="container", obj={"technologyid": technologyid}).to_list_dict() + container_ids = sorted({x["id"] for x in results}) -def listNotExistingStorageDirs(technologyid=2): - rv = B.read_object('container', {'technologyid': technologyid}) - containerIDs = list(set(map(lambda x: x._id, rv))) + for cid in container_ids: + if not (ROOTDIR / f"p{cid}").is_dir(): + print(cid) - for cid in containerIDs: - if not os.path.isdir("{}/p{}".format(ROOTDIR, cid)): - print (cid) +def main() -> None: + """Parses CLI arguments and calls `list_not_existing_storage_dirs`.""" + client = Bfabric.from_config(verbose=True) + list_not_existing_storage_dirs(client=client, technologyid=2) + list_not_existing_storage_dirs(client=client, technologyid=4) -listNotExistingStorageDirs(technologyid=2) -listNotExistingStorageDirs(technologyid=4) +if __name__ == "__main__": + main() diff --git a/bfabric/scripts/bfabric_list_workunit_parameters.py b/bfabric/scripts/bfabric_list_workunit_parameters.py new file mode 100644 index 00000000..c15b299f --- /dev/null +++ b/bfabric/scripts/bfabric_list_workunit_parameters.py @@ -0,0 +1,107 @@ +import argparse +import json +import sys + +import polars as pl +import rich + +from bfabric import Bfabric + + +def bfabric_list_workunit_parameters(client: Bfabric, application_id: int, max_workunits: int, format: str) -> None: + """Lists the workunit parameters of the provided application. + :param client: The Bfabric client to use. + :param application_id: The application ID to list the workunit parameters for. + :param max_workunits: The maximum number of workunits to fetch. + :param format: The output format to use. + """ + workunits_table_full = get_workunits_table_full(application_id, client, max_workunits) + workunits_table_explode = workunits_table_full.explode("parameter").with_columns( + parameter_id=pl.col("parameter").struct[1] + ) + parameter_table_wide = get_parameter_table(client, workunits_table_explode) + + merged_result = workunits_table_full[ + ["workunit_id", "created", "createdby", "name", "container_id", "inputdataset_id", "resource_ids"] + ].join(parameter_table_wide, on="workunit_id", how="left") + + print_results(format, merged_result) + + +def get_workunits_table_full(application_id: int, client: Bfabric, max_workunits: int) -> pl.DataFrame: + """Returns a table with the workunits for the specified application.""" + # read the workunit data + workunits_table_full = ( + client.read("workunit", {"applicationid": application_id}, max_results=max_workunits) + .to_polars() + .rename({"id": "workunit_id"}) + ) + # add some extra columns flattening the structure for the output + workunits_table_full = workunits_table_full.with_columns( + container_id=pl.col("container").struct[1], + resource_ids=pl.col("resource").map_elements( + lambda x: json.dumps([xx["id"] for xx in x]), return_dtype=pl.String + ), + ) + if "inputdataset" in workunits_table_full.columns: + workunits_table_full = workunits_table_full.with_columns( + inputdataset_id=pl.col("inputdataset").struct[1], + ) + else: + workunits_table_full = workunits_table_full.with_columns( + inputdataset_id=pl.lit(None) + ) + return workunits_table_full + + +def print_results(format: str, merged_result: pl.DataFrame) -> None: + """Prints the results to the console, in the requested format.""" + if format == "tsv": + print(merged_result.write_csv(file=None, separator="\t")) + elif format == "json": + print(merged_result.write_json(file=None)) + elif format == "pretty": + # use rich + rich_table = rich.table.Table() + for column in merged_result.columns: + rich_table.add_column(column) + for row in merged_result.iter_rows(): + rich_table.add_row(*map(str, row)) + console = rich.console.Console() + console.print(rich_table) + else: + raise ValueError("Unsupported format") + + +def get_parameter_table(client: Bfabric, workunits_table_explode: pl.DataFrame) -> pl.DataFrame: + """Returns a wide format table for the specified parameters, with the key `workunit_id` indicating the source.""" + # load the parameters table + collect = [] + for i_frame, frame in enumerate(workunits_table_explode.iter_slices(100)): + print(f"-- Reading parameters chunk {i_frame + 1} of {len(workunits_table_explode) // 100 + 1}", file=sys.stderr) + chunk = ( + client.read("parameter", {"id": frame["parameter_id"].to_list()}).to_polars().rename({"id": "parameter_id"}) + ) + collect.append(chunk) + parameter_table_full = pl.concat(collect, how="align")[["parameter_id", "key", "value"]] + # add workunit id to parameter table + parameter_table_full = parameter_table_full.join( + workunits_table_explode[["workunit_id", "parameter_id"]], on="parameter_id", how="left" + ) + # convert to wide format + return parameter_table_full.pivot(values="value", index="workunit_id", columns="key") + + +def main() -> None: + """Parses command line arguments and calls `bfabric_list_workunit_parameters`.""" + client = Bfabric.from_config(verbose=True) + parser = argparse.ArgumentParser() + parser.add_argument("application_id", type=int, help="The application ID to list the workunit parameters for.") + parser.add_argument("--max-workunits", type=int, help="The maximum number of workunits to fetch.", default=200) + parser.add_argument("--format", choices=["tsv", "json", "pretty"], default="tsv") + args = vars(parser.parse_args()) + bfabric_list_workunit_parameters(client, **args) + + +if __name__ == "__main__": + main() diff --git a/bfabric/scripts/bfabric_logthis.py b/bfabric/scripts/bfabric_logthis.py index 0684e0b0..91a58e04 100755 --- a/bfabric/scripts/bfabric_logthis.py +++ b/bfabric/scripts/bfabric_logthis.py @@ -1,30 +1,28 @@ #!/usr/bin/env python3 -# -*- coding: latin1 -*- - -""" -set status of a resource of a given external job -input -""" - # Copyright (C) 2023 Functional Genomics Center Zurich ETHZ|UZH. All rights reserved. # # Author: # Christian Panse +from __future__ import annotations +import argparse -import sys -import bfabric +from bfabric import Bfabric -from random import randint -from time import sleep +def bfabric_logthis(client: Bfabric, external_job_id: int, message: str) -> None: + """Logs a message for an external job.""" + client.save("externaljob", {"id": external_job_id, "logthis": message}) -if __name__ == "__main__": - if len(sys.argv) > 1: - B = bfabric.BfabricFeeder() - try: - externaljobid, msg = (int(sys.argv[1]), sys.argv[2]) - except: - raise ("Usage: bfabric_logthis.py ") - rv = B.save_object('externaljob', {'id': externaljobid, 'logthis': msg}) - # B.print_json(rv) +def main() -> None: + """Parses the command line arguments and calls `bfabric_logthis`.""" + client = Bfabric.from_config() + parser = argparse.ArgumentParser(description="log message of external job") + parser.add_argument("external_job_id", type=int, help="external job id") + parser.add_argument("message", type=str, help="message") + args = vars(parser.parse_args()) + bfabric_logthis(client=client, **args) + + +if __name__ == "__main__": + main() diff --git a/bfabric/scripts/bfabric_read.py b/bfabric/scripts/bfabric_read.py index 15bcbd30..5b22e16c 100755 --- a/bfabric/scripts/bfabric_read.py +++ b/bfabric/scripts/bfabric_read.py @@ -1,6 +1,4 @@ #!/usr/bin/env python3 -# -*- coding: latin1 -*- - """B-Fabric command line reader Copyright: @@ -15,95 +13,112 @@ See also: http://fgcz-bfabric.uzh.ch/bfabric/executable?wsdl """ - -import signal -import sys +from __future__ import annotations +import argparse +import json import time -import bfabric - - -def signal_handler(signal, frame): - print('You pressed Ctrl+C!') - sys.exit(0) - -signal.signal(signal.SIGINT, signal_handler) - -def print_color_msg(msg, color = "93"): - sys.stderr.write(f"\033[{color}m--- {msg} ---\033[0m\n") - -def usage(): - print(__doc__) - print("usage:\n") - msg = f"\t{sys.argv[0]} " - print(msg) - msg = "\t{} \n\n".format(sys.argv[0]) - print(msg) - print("valid endpoints are: [{}]\n\n".format(",\n\t ".join(bfabric.endpoints))) - print("example:") - msg = "\t{} user login cpanse\n\n".format(sys.argv[0]) - print(msg) - -if __name__ == "__main__": - B = bfabric.Bfabric(verbose=False) - - sys.stderr.write(bfabric.msg) - - query_obj = {} - - try: - endpoint = sys.argv[1] - except: - usage() - sys.exit(1) - - if len(sys.argv) == 4: - attribute = sys.argv[2] - name = sys.argv[3] - query_obj[attribute] = name - - if endpoint in bfabric.endpoints: - print_color_msg(f"query = {query_obj}") - start_time = time.time() - res = B.read_object(endpoint = endpoint, obj = query_obj) - end_time = time.time() - - if res is None: - print_color_msg("Empty result set or invalid query.", color=95) - sys.exit(0) - - try: - res = sorted(res, key=lambda x: x._id) - except: - print_color_msg("sorting failed.") - - try: - # print json object - if len(res) < 2: - print(res[0]) - except Exception as e: - print_color_msg(f"invalid query. {e}.", color=95) - sys.exit(1) - - try: - print_color_msg("possible attributes are: {}.".format((", ".join([at[0] for at in res[0]])))) - except Exception as e: - print_color_msg(f"Exception: {e}") - - for x in res: - try: - print(f"{x._id}\t{x.createdby}\t{x.modified}\t{x.name}\t{x.groupingvar.name}") - except Exception as e: - print(f"{x._id}\t{x.createdby}\t{x.modified}") +import yaml +from typing import Any +from rich.console import Console +from rich.table import Table +import bfabric +from bfabric import Bfabric, BfabricConfig + + +def bfabric_read(client: Bfabric, endpoint: str, attribute: str | None, value: str | None, output_format: str) -> None: + """Reads one or several items from a B-Fabric endpoint and prints them.""" + if attribute is not None and value is None: + message = "value must be provided if attribute is provided" + raise ValueError(message) + + query_obj = {attribute: value} if value is not None else {} + console_info = Console(style="bright_yellow", stderr=True) + console_info.print(f"--- query = {query_obj} ---") + console_out = Console() + + start_time = time.time() + results = client.read(endpoint=endpoint, obj=query_obj) + end_time = time.time() + res = sorted(results.to_list_dict(drop_empty=False), key=lambda x: x["id"]) + if res: + possible_attributes = sorted(set(res[0].keys())) + console_info.print(f"--- possible attributes = {possible_attributes} ---") + + output_format = _determine_output_format(console_out=console_out, output_format=output_format, n_results=len(res)) + console_info.print(f"--- output format = {output_format} ---") + + if output_format == "json": + print(json.dumps(res, indent=2)) + elif output_format == "yaml": + print(yaml.dump(res)) + elif output_format == "table_tsv": + _print_table_tsv(res) + elif output_format == "table_rich": + _print_table_rich(client.config, console_out, endpoint, res) else: - print_color_msg("The first argument must be a valid endpoint.", color=95) - usage() - sys.exit(1) + raise ValueError(f"output format {output_format} not supported") + + console_info.print(f"--- number of query result items = {len(res)} ---") + console_info.print(f"--- query time = {end_time - start_time:.2f} seconds ---") + + +def _print_table_rich(config: BfabricConfig, console_out: Console, endpoint: str, res: list[dict[str, Any]]) -> None: + """Prints the results as a rich table to the console.""" + table = Table("Id", "Created By", "Modified", "Name", "Grouping Var") + for x in res: + entry_url = f"{config.base_url}/{endpoint}/show.html?id={x['id']}" + table.add_row( + f"[link={entry_url}]{x['id']}[/link]", + str(x["createdby"]), + str(x["modified"]), + str(x["name"]), + str(x.get("groupingvar", {}).get("name", "")), + ) + console_out.print(table) + + +def _print_table_tsv(res: list[dict[str, Any]]) -> None: + """Prints the results as a tab-separated table, using the original cols this script returned.""" + for x in res: + try: + print(f'{x["id"]}\t{x["createdby"]}\t{x["modified"]}\t{x["name"]}\t{x["groupingvar"]["name"]}') + except (KeyError, TypeError): + print(f'{x["id"]}\t{x["createdby"]}\t{x["modified"]}') + + +def _determine_output_format(console_out: Console, output_format: str, n_results: int) -> str: + """Returns the format to use, based on the number of results, and whether the output is an interactive console. + If the format is already set to a concrete value instead of "auto", it will be returned unchanged. + """ + if output_format == "auto": + if n_results < 2: + output_format = "json" + elif console_out.is_interactive: + output_format = "table_rich" + else: + output_format = "table_tsv" + return output_format + + +def main() -> None: + """Parses command line arguments and calls `bfabric_read`.""" + client = Bfabric.from_config(verbose=True) + parser = argparse.ArgumentParser() + parser.add_argument( + "--format", + help="output format", + choices=["json", "yaml", "table_tsv", "table_rich", "auto"], + default="auto", + dest="output_format", + ) + parser.add_argument("endpoint", help="endpoint to query", choices=bfabric.endpoints) + parser.add_argument("attribute", help="attribute to query for", nargs="?") + parser.add_argument("value", help="value to query for", nargs="?") + args = parser.parse_args() + bfabric_read(client=client, **vars(args)) - try: - print_color_msg(f"number of query result items = {len(res)}") - except: - pass - print_color_msg(f"query time = {round(end_time - start_time, 2)} seconds") +if __name__ == "__main__": + main() diff --git a/bfabric/scripts/bfabric_read_samples_from_dataset.py b/bfabric/scripts/bfabric_read_samples_from_dataset.py index 77b4f92b..e40d5ced 100755 --- a/bfabric/scripts/bfabric_read_samples_from_dataset.py +++ b/bfabric/scripts/bfabric_read_samples_from_dataset.py @@ -1,6 +1,4 @@ #!/usr/bin/env python3 -# -*- coding: latin1 -*- - """ Author: Maria d'Errico @@ -16,43 +14,45 @@ Usage: bfabric_read_samples_from_dataset.py datasetid """ +import argparse +from bfabric import Bfabric -import sys -import os -import csv -import bfabric +def get_table_row(client: Bfabric, relative_path: str) -> tuple[str, int, str, str, str]: + """Returns the row of the table with the information of the resource with the given relative path.""" + resource = client.read(endpoint="resource", obj={"relativepath": relative_path}).to_list_dict()[0] + sample = client.read(endpoint="sample", obj={"id": resource["sample"]["id"]}).to_list_dict()[0] + groupingvar = (sample.get("groupingvar") or {}).get("name") or "" + return resource["workunit"]["id"], resource["id"], resource["name"], sample["name"], groupingvar -B = bfabric.Bfabric() +def bfabric_read_samples_from_dataset(dataset_id: int) -> None: + """Prints the workunit id, inputresource id, inputresource name, sample name and groupingvar name for each resource + in the dataset with the given id.""" + client = Bfabric.from_config(verbose=True) + dataset = client.read(endpoint="dataset", obj={"id": dataset_id}).to_list_dict()[0] -def read_dataset(dataset_id): - ds = B.read_object(endpoint="dataset", obj={'id': dataset_id})[0] - return ds + positions = [a["position"] for a in dataset["attribute"] if a["name"] == "Relative Path"] + if not positions: + raise ValueError(f"No 'Relative Path' attribute found in the dataset {dataset_id}") + relative_path_position = positions[0] -def get_table(relativepath): - res = B.read_object(endpoint='resource', obj={'relativepath': relativepath})[0] - sample = B.read_object(endpoint='sample', obj={'id': res.sample._id})[0] - try: - groupingvar = sample.groupingvar.name - except: - groupingvar = "" - pass - return res.workunit._id, res._id, res.name, sample.name, groupingvar + print("\t".join(["workunit.id", "inputresource.id", "inputresource.name", "sample.name", "groupingvar.name"])) + for item in dataset["item"]: + relative_path = [ + field["value"] for field in item["field"] if field["attributeposition"] == relative_path_position + ][0] + workunitid, resourceid, resourcename, samplename, groupingvar = get_table_row(client, relative_path) + print(f"{workunitid}\t{resourceid}\t{resourcename}\t{samplename}\t{groupingvar}") -def run(dataset_id): - ds = read_dataset(dataset_id) - attributeposition = [x.position for x in ds.attribute if x.name == "Relative Path"][0] - print ("{}\t{}\t{}\t{}\t{}".format('workunit.id', 'inputresource.id', 'inputresource.name', 'sample.name', 'groupingvar.name')) - for i in ds.item: - for x in i.field: - if hasattr(x, "value") and x.attributeposition == attributeposition: - workunitid, resourceid, resourcename, samplename, groupingvar = get_table(x.value) - print ("{}\t{}\t{}\t{}\t{}".format(workunitid, resourceid, resourcename, samplename, groupingvar)) +def main() -> None: + """Parses the command line arguments and calls the function bfabric_read_samples_from_dataset.""" + parser = argparse.ArgumentParser() + parser.add_argument("dataset_id", type=int) + args = parser.parse_args() + bfabric_read_samples_from_dataset(dataset_id=args.dataset_id) if __name__ == "__main__": - dataset_id = int(sys.argv[1]) - run(dataset_id) - + main() diff --git a/bfabric/scripts/bfabric_read_samples_of_workunit.py b/bfabric/scripts/bfabric_read_samples_of_workunit.py index 1841949f..2a69daad 100755 --- a/bfabric/scripts/bfabric_read_samples_of_workunit.py +++ b/bfabric/scripts/bfabric_read_samples_of_workunit.py @@ -1,6 +1,4 @@ #!/usr/bin/env python3 -# -*- coding: latin1 -*- - """ Copyright (C) 2022 Functional Genomics Center Zurich ETHZ|UZH. All rights reserved. @@ -15,64 +13,43 @@ Usage example: bfabric_read_samples_of_workunit.py 278175 """ - -import signal -import sys +import argparse import time -import bfabric - - -def signal_handler(signal, frame): - print('You pressed Ctrl+C!') - sys.exit(0) -signal.signal(signal.SIGINT, signal_handler) +from rich.console import Console -def print_color_msg(msg, color="93"): - msg = "\033[{color}m--- {} ---\033[0m\n".format(msg, color=color) - sys.stderr.write(msg) - -def usage(): - print("usage:\n") - msg = "\t{} ".format(sys.argv[0]) - print(msg) - - -if __name__ == "__main__": +from bfabric import Bfabric - try: - if len(sys.argv) == 2: - workunitid = sys.argv[1] - except: - raise - - B = bfabric.Bfabric(verbose=False) - sys.stderr.write(bfabric.msg) +def bfabric_read_samples_of_workunit(workunit_id: int) -> None: + """Reads the samples of the specified workunit and prints the results to stdout.""" + client = Bfabric.from_config(verbose=True) start_time = time.time() + res_workunit = client.read(endpoint="workunit", obj={"id": workunit_id}).to_list_dict()[0] + input_resource_ids = [x["id"] for x in res_workunit.get("inputresource", [])] + input_resources = client.read(endpoint="resource", obj={"id": input_resource_ids}).to_list_dict() + input_resources_name = [(r["id"], r["name"]) for r in input_resources] - res = B.read_object(endpoint="workunit", obj={'id': workunitid}) - - inputresources = list(map(lambda x: B.read_object(endpoint="resource", obj={'id': x._id})[0], res[0].inputresource)) - - inputresourcesname = list(map(lambda x: (x._id, x.name), inputresources)) - - samples = list(map(lambda x: B.read_object(endpoint="sample", obj={'id': x.sample._id})[0], inputresources)) + samples = client.read(endpoint="sample", obj={"id": [x["sample"]["id"] for x in input_resources]}).to_list_dict() + groupingvars = [(s["id"], s["name"], (s.get("groupingvar") or {}).get("name", "NA")) for s in samples] + print("\t".join(["workunit_id", "inputresource_id", "inputresource_name", "sample_name", "groupingvar_name"])) + for i in zip(input_resources_name, groupingvars): + print("\t".join([str(workunit_id), str(i[0][0]), i[0][1], i[1][1], i[1][2]])) - # no x.groupingvar.name defined - try: - groupingvars = list(map(lambda x: (x._id, x.name, x.groupingvar.name), samples)) - except: - groupingvars = list(map(lambda x: (x._id, x.name, "NA"), samples)) + end_time = time.time() + Console(stderr=True).print(f"--- query time = {end_time - start_time:.2f} seconds ---", style="bright_yellow") - print ("{}\t{}\t{}\t{}\t{}".format('workunit.id', 'inputresource.id', 'inputresource.name', 'sample.name', 'groupingvar.name')) - for i in zip(inputresourcesname, groupingvars): - print ("{}\t{}\t{}\t{}\t{}".format(workunitid, i[0][0], i[0][1], i[1][1], i[1][2])) +def main() -> None: + """Parses the command line arguments and calls `bfabric_read_samples_of_workunit`.""" + parser = argparse.ArgumentParser() + parser.add_argument("workunit_id", type=int, help="workunit id") + args = parser.parse_args() + bfabric_read_samples_of_workunit(workunit_id=args.workunit_id) - end_time = time.time() - print_color_msg("query time = {} seconds".format(round(end_time - start_time, 2))) - sys.exit(0) +if __name__ == "__main__": + # main() + bfabric_read_samples_of_workunit(285689) diff --git a/bfabric/scripts/bfabric_save_csv2dataset.py b/bfabric/scripts/bfabric_save_csv2dataset.py index cc67048f..50df9e31 100755 --- a/bfabric/scripts/bfabric_save_csv2dataset.py +++ b/bfabric/scripts/bfabric_save_csv2dataset.py @@ -1,5 +1,4 @@ -#!/usr/bin/python3 - +#!/usr/bin/env python3 """ Author: Maria d'Errico @@ -27,71 +26,88 @@ Usage: bfabric_save_csv2dataset.py [-h] --csvfile CSVFILE --name NAME --containerid int [--workunitid int] """ +from __future__ import annotations + +import argparse +from pathlib import Path + +import polars as pl -import sys from bfabric import Bfabric -import csv - -def csv2json(csvFilePath): - obj = {} - obj["item"] = [] - obj["attribute"] = [] - types = {int: "Integer", str: "String", float: "Float"} - # Open the csv file in read mode and create a file object - with open(csvFilePath, encoding='utf-8') as csv_file: - # Creating the DictReader iterator - csv_reader = csv.DictReader(csv_file) - nrow = 0 - # Read individual rows of the csv file as a dictionary - for row in csv_reader: - nrow = nrow + 1 - fields = [] - for attr in range(0, len(list(row.keys()))): - if nrow == 1: - # Fill in attributes info - attr_type = type(list(row.values())[attr]) - entry = {"name": list(row.keys())[attr], "position": attr+1, - "type": types[attr_type]} - obj["attribute"].append(entry) - else: - pass - # Fill in values info - field = {"attributeposition": attr+1, - "value": list(row.values())[attr]} - fields.append(field) - item = {"field": fields, "position": nrow} - obj["item"].append(item) - return(obj) - -def main(csv_file, dataset_name, container_id, workunit_id = None): - bfapp = Bfabric() - obj = csv2json(csv_file) - obj['name'] = dataset_name - obj['containerid'] = container_id - if workunit_id is not None: - obj['workunitid'] = workunit_id + + +def polars_to_bfabric_type(dtype: pl.DataType) -> str | None: + """Returns the B-Fabric type for a given Polars data type, defaulting to String if no correspondence is found.""" + if str(dtype).startswith("Int"): + return "Integer" + elif str(dtype).startswith("String"): + return "String" + elif str(dtype).startswith("Float"): + return "Float" else: - pass - endpoint = 'dataset' - res = bfapp.save_object(endpoint=endpoint, obj=obj) - print(res[0]) + return "String" -if __name__ == "__main__": - import argparse - parser = argparse.ArgumentParser(description='Create a B-Fabric dataset') - parser.add_argument('--csvfile', required=True, - help='the path to the csv file to be uploaded as dataset') - parser.add_argument('--name', required=True, - help='dataset name as a string') - parser.add_argument('--containerid', metavar='int', required=True, - help='container id') - parser.add_argument('--workunitid', metavar='int', required=False, - help='workunit id') +def polars_to_bfabric_dataset(data: pl.DataFrame) -> dict[str, list[dict[str, int | str | float]]]: + """Converts a Polars DataFrame to a B-Fabric dataset representation.""" + attributes = [ + {"name": col, "position": i + 1, "type": polars_to_bfabric_type(data[col].dtype)} + for i, col in enumerate(data.columns) + ] + items = [ + { + "field": [{"attributeposition": i_field + 1, "value": value} for i_field, value in enumerate(row)], + "position": i_row + 1, + } + for i_row, row in enumerate(data.iter_rows()) + ] + return {"attribute": attributes, "item": items} + + +def bfabric_save_csv2dataset( + client: Bfabric, + csv_file: Path, + dataset_name: str, + container_id: int, + workunit_id: int | None, + sep: str, + has_header: bool, +) -> None: + """Creates a dataset in B-Fabric from a csv file.""" + data = pl.read_csv(csv_file, separator=sep, has_header=has_header) + obj = polars_to_bfabric_dataset(data) + obj["name"] = dataset_name + obj["containerid"] = container_id + if workunit_id is not None: + obj["workunitid"] = workunit_id + endpoint = "dataset" + res = client.save(endpoint=endpoint, obj=obj) + print(res.to_list_dict()[0]) + + +def main() -> None: + """Parses command line arguments and calls `bfabric_save_csv2dataset`.""" + client = Bfabric.from_config(verbose=True) + parser = argparse.ArgumentParser(description="Create a B-Fabric dataset") + parser.add_argument( + "--csvfile", required=True, help="the path to the csv file to be uploaded as dataset", type=Path + ) + parser.add_argument("--name", required=True, help="dataset name as a string") + parser.add_argument("--containerid", type=int, required=True, help="container id") + parser.add_argument("--workunitid", type=int, required=False, help="workunit id") + parser.add_argument("--sep", type=str, default=",", help="the separator to use in the csv file e.g. ',' or '\\t'") + parser.add_argument("--no-header", action="store_false", dest="has_header", help="the csv file has no header") args = parser.parse_args() - if args.workunitid is None: - main(csv_file = args.csvfile, dataset_name = args.name, container_id = args.containerid) - else: - main(csv_file = args.csvfile, dataset_name = args.name, container_id = args.containerid, - workunit_id = args.workunitid) + bfabric_save_csv2dataset( + client=client, + csv_file=args.csvfile, + dataset_name=args.name, + container_id=args.containerid, + workunit_id=args.workunitid, + sep=args.sep, + has_header=args.has_header, + ) + +if __name__ == "__main__": + main() diff --git a/bfabric/scripts/bfabric_save_dataset2csv.py b/bfabric/scripts/bfabric_save_dataset2csv.py index a583d9a1..5d3599fb 100755 --- a/bfabric/scripts/bfabric_save_dataset2csv.py +++ b/bfabric/scripts/bfabric_save_dataset2csv.py @@ -1,5 +1,4 @@ -#!/usr/bin/python3 - +#!/usr/bin/env python3 """ Author: Maria d'Errico @@ -14,46 +13,65 @@ Usage: bfabric_save_dataset2csv.py [-h] --id DATASET_ID [--dir SCRATCHDIR] Example: bfabric_save_dataset2csv.py --id 32335 && cat dataset.csv """ +from __future__ import annotations + +import argparse +from pathlib import Path + +import polars as pl -import sys from bfabric import Bfabric -def dataset2csv(ds, outputfile, sep=","): - # ds.attribute contains the list of columns name - with open(outputfile, "w") as f: - f.write("{}\n".format(sep.join(map(lambda x: x.name, ds.attribute)))) - for i in ds.item: - # sort values based on the columns order in attributeposition - for x in i.field: - if not hasattr(x,"value") or x.value == None: - x.value = '' - fields = [(x.value, x.attributeposition) for x in i.field] - fields.sort(key=lambda y: int(y[1])) - f.write("{}\n".format(sep.join([t[0] for t in fields]))) - - -def main(dataset_id, scratchdir): - bfapp = Bfabric() - try: - query_obj = {'id': dataset_id} - ds = bfapp.read_object(endpoint='dataset', obj=query_obj)[0] - except: - print("No input dataset found") - raise +def dataset2csv(dataset: dict, output_path: Path, sep: str) -> None: + """Writes the `dataset` content to csv file at `output_path`.""" + column_names = [x["name"] for x in dataset["attribute"]] + data = [] + for item in dataset["item"]: + row_values = [x.get("value") for x in item["field"]] + data.append(dict(zip(column_names, row_values))) + df = pl.DataFrame(data) + df.write_csv(output_path, separator=sep) + + +def bfabric_save_dataset2csv(client: Bfabric, dataset_id: int, out_dir: Path, out_filename: Path, sep: str) -> None: + """Saves the dataset with id `dataset_id` to a csv file at `out_dir/out_filename` or `out_filename` if it's an + absolute path. + """ + results = client.read(endpoint="dataset", obj={"id": dataset_id}).to_list_dict() + if not results: + raise RuntimeError(f"No dataset found with id '{dataset_id}'") + dataset = results[0] + output_path = out_dir / out_filename try: - dataset2csv(ds, "{}/dataset.csv".format(scratchdir)) - except: - print("The writing process to '{}'/dataset.csv failed.".format(scratchdir)) + dataset2csv(dataset, output_path=output_path, sep=sep) + except Exception: + print(f"The writing process to '{output_path}' failed.") raise -if __name__ == "__main__": - import argparse - parser = argparse.ArgumentParser(description='Save a B-Fabric dataset to a csv file') - parser.add_argument('--id', metavar='int', required=True, - help='dataset id') - parser.add_argument('--dir', required=False, default='./', - help='the path to the directory where to save the csv file') +def main() -> None: + """Parses arguments and calls `bfabric_save_dataset2csv`.""" + client = Bfabric.from_config(verbose=True) + parser = argparse.ArgumentParser(description="Save a B-Fabric dataset to a csv file") + parser.add_argument("--id", metavar="int", required=True, help="dataset id", type=int) + parser.add_argument( + "--dir", + type=Path, + default=".", + help="the path to the directory where to save the csv file", + ) + parser.add_argument( + "--file", + default="dataset.csv", + help="the name of the csv file to save the dataset content", + ) + parser.add_argument("--sep", default=",", help="the separator to use in the csv file e.g. ',' or '\\t'") args = parser.parse_args() - main(scratchdir = args.dir, dataset_id = args.id) + bfabric_save_dataset2csv( + client=client, out_dir=args.dir, out_filename=args.file, dataset_id=args.id, sep=args.sep + ) + + +if __name__ == "__main__": + main() diff --git a/bfabric/scripts/bfabric_save_fasta.py b/bfabric/scripts/bfabric_save_fasta.py index 3ebbb15c..4e2da311 100755 --- a/bfabric/scripts/bfabric_save_fasta.py +++ b/bfabric/scripts/bfabric_save_fasta.py @@ -1,74 +1,81 @@ -#!/usr/bin/python +#!/usr/bin/env python3 -import sys -import os -import yaml -#import xmlrpclib +# TODO this file was refactored without testing anything +# TODO this file was refactored without testing anything +import argparse import hashlib -from optparse import OptionParser +import json +import sys +from pathlib import Path + from bfabric import Bfabric -FASTAHTTPROOT="/fasta/" +FASTAHTTPROOT = "/fasta/" BFABRICSTORAGEID = 2 -BFABRICAPPLIATIONID = 61 - -def save_fasta(containerid=1875, fasta_file="p1875_db10_20170817.fasta"): - bfapp = Bfabric() - - try: - print("reading stdin") - description = sys.stdin.read() - except: - print("reading from stdin failed.") - raise - - try: - md5 = hashlib.md5(open(fasta_file, 'rb').read()).hexdigest() - except: - print("computing file checksum failed.") - raise - - resource = bfapp.read_object(endpoint='resource', obj={'filechecksum': md5}) - - try: - print("resource(s) already exist.".format(resource[0]._id)) - resource = bfapp.save_object(endpoint='resource', obj={'id': resource[0]._id, 'description': description}) - print(resource) - return - except: - pass - - - try: - workunit = bfapp.save_object(endpoint='workunit', - obj={'name': "FASTA: {}".format(os.path.basename(fasta_file)), - 'containerid': containerid, - 'applicationid': BFABRICAPPLIATIONID}) - print (workunit) - except: - raise - - - obj = {'workunitid': workunit[0]._id, - 'filechecksum': md5, - 'relativepath': "{}{}".format(FASTAHTTPROOT, os.path.basename(fasta_file)), - 'name': os.path.basename(fasta_file), - 'size': os.path.getsize(fasta_file), - 'status': 'available', - 'description': description, - 'storageid': BFABRICSTORAGEID - } - - - resource = bfapp.save_object(endpoint='resource', obj=obj) - print(resource) - - workunit = bfapp.save_object(endpoint='workunit', - obj={'id': workunit[0]._id, 'status': 'available'}) - print (workunit) - -if __name__ == "__main__": - save_fasta(containerid=sys.argv[1], fasta_file=sys.argv[2]) +BFABRIC_APPLICATION_ID = 61 + + +def save_fasta(container_id: int, fasta_file: Path) -> None: + """Save a fasta file to bfabric.""" + client = Bfabric.from_config(verbose=True) + + print("Reading description from stdin") + description = sys.stdin.read() + + if not fasta_file.exists(): + raise FileNotFoundError(fasta_file) + + with fasta_file.open("rb") as f: + md5 = hashlib.md5(f.read()).hexdigest() + + resources = client.read(endpoint="resource", obj={"filechecksum": md5}).to_list_dict() + if resources: + print("resource(s) already exist.") + # TODO this logic was mostly carried over from before, does it still make sense? + try: + resources = client.save(endpoint="resource", obj={"id": resources[0]["id"], "description": description}) + print(json.dumps(resources.to_list_dict(), indent=2)) + return + except Exception: + pass + + workunit = client.save( + endpoint="workunit", + obj={ + "name": f"FASTA: {fasta_file.name}", + "containerid": container_id, + # TODO make configurable if needed in the future + "applicationid": BFABRIC_APPLICATION_ID, + }, + ).to_list_dict() + print(json.dumps(workunit, indent=2)) + + obj = { + "workunitid": workunit[0]["id"], + "filechecksum": md5, + "relativepath": f"{FASTAHTTPROOT}{fasta_file.name}", + "name": fasta_file.name, + "size": fasta_file.stat().st_size, + "status": "available", + "description": description, + "storageid": BFABRICSTORAGEID, + } + + resource = client.save(endpoint="resource", obj=obj).to_list_dict() + print(json.dumps(resource, indent=2)) + + workunit = client.save(endpoint="workunit", obj={"id": workunit[0]._id, "status": "available"}).to_list_dict() + print(json.dumps(workunit, indent=2)) + + +def main() -> None: + """Parses command line arguments and calls `save_fasta`.""" + parser = argparse.ArgumentParser() + parser.add_argument("container_id", help="container_id", type=int) + parser.add_argument("fasta_file", help="fasta_file", type=Path) + args = parser.parse_args() + save_fasta(container_id=args.container_id, fasta_file=args.fasta_file) - #p#rint (workunit) +if __name__ == "__main__": + main() diff --git a/bfabric/scripts/bfabric_save_importresource_sample.py b/bfabric/scripts/bfabric_save_importresource_sample.py index 12611806..fa4eccc4 100755 --- a/bfabric/scripts/bfabric_save_importresource_sample.py +++ b/bfabric/scripts/bfabric_save_importresource_sample.py @@ -1,6 +1,5 @@ -#!/usr/bin/python3 -# -*- coding: latin1 -*- - +#!/usr/bin/env python3 +# TODO add integration test (with and without sample id) """General Importresource Feeder for bfabric Author: @@ -14,38 +13,23 @@ History: The first version of the script appeared on Wed Oct 24 17:02:04 CEST 2012. """ +from __future__ import annotations - - +import logging +import logging.handlers import os import re -import time import sys -from bfabric import Bfabric - - -import logging, logging.handlers - -logger = logging.getLogger('sync_feeder') -hdlr_syslog = logging.handlers.SysLogHandler(address=("130.60.81.21", 514)) -formatter = logging.Formatter('%(name)s %(message)s', datefmt="%Y-%m-%d %H:%M:%S") -hdlr_syslog.setFormatter(formatter) -logger.addHandler(hdlr_syslog) -logger.setLevel(logging.INFO) +import time +import json +from bfabric import Bfabric -################################################################################ -bfabric_storageid = 2 -bfapp = Bfabric() +BFABRIC_STORAGE_ID = 2 -# maps the 'real world' to the BFabric application._id -if bfapp.config.application_ids is None: - raise RuntimeError("No bfapp.config.application_ids variable configured. check '~/.bfabricrc.py' file!") -print(bfapp.config.application_ids) -bfabric_application_ids = bfapp.config.application_ids -def save_importresource(line): - """ reads, splits and submit the input line to the bfabric system +def save_importresource(client: Bfabric, line: str) -> None: + """reads, splits and submit the input line to the bfabric system Input: a line containg md5sum;date;size;path @@ -57,70 +41,98 @@ def save_importresource(line): Output: True on success otherwise an exception raise """ + md5_checksum, file_date, file_size, file_path = line.split(";") + + # Format the timestamp for bfabric + file_date = time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime(int(file_date))) + + bfabric_application_ids = client.config.application_ids + if not bfabric_application_ids: + raise RuntimeError("No bfabric_application_ids configured. check '~/.bfabricpy.yml' file!") + + bfabric_application_id, bfabric_projectid = get_bfabric_application_and_project_id( + bfabric_application_ids, file_path + ) + + obj = { + "applicationid": bfabric_application_id, + "filechecksum": md5_checksum, + "containerid": bfabric_projectid, + "filedate": file_date, + "relativepath": file_path, + "name": os.path.basename(file_path), + "size": file_size, + "storageid": BFABRIC_STORAGE_ID, + } + + match = re.search( + r"p([0-9]+)\/(Proteomics\/[A-Z]+_[1-9])\/.*_\d\d\d_S([0-9][0-9][0-9][0-9][0-9][0-9]+)_.*(raw|zip)$", + file_path, + ) + if match: + print(f"found sampleid={match.group(3)} pattern") + obj["sampleid"] = int(match.group(3)) + + print(obj) + res = client.save(endpoint="importresource", obj=obj) + print(json.dumps(res, indent=2)) + + +def get_sample_id_from_path(file_path: str) -> int | None: + """Returns the sample id for a given file path, if it's present in the correct format.""" + match = re.search( + r"p([0-9]+)\/(Proteomics\/[A-Z]+_[1-9])\/.*_\d\d\d_S([0-9][0-9][0-9][0-9][0-9][0-9]+)_.*(raw|zip)$", + file_path, + ) + if match: + print(f"found sampleid={match.group(3)} pattern") + return int(match.group(3)) + else: + return None - _bfabric_applicationid = -1 - _bfabric_projectid = -1, - _file_size = -1 - _file_date = -1 - - # empty string / file - _md5 = "d41d8cd98f00b204e9800998ecf8427e" - - _sampleid = None - - try: - (_md5, _file_date, _file_size, _file_path) = line.split(";") - except: - raise - - - - # the timeformat bfabric understands - #_file_date = time.strftime("%FT%H:%M:%S-01:00",time.gmtime(int(_file_date))) - _file_date = time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime(int(_file_date))) +def get_bfabric_application_and_project_id(bfabric_application_ids: dict[str, int], file_path: str) -> tuple[int, int]: + """Returns the bfabric application id and project id for a given file path.""" # linear search through dictionary. first hit counts! - for i in bfabric_application_ids.keys(): + bfabric_applicationid = -1 + bfabric_projectid = (-1,) + for i in bfabric_application_ids: # first match counts! - if re.search(i, _file_path): - _bfabric_applicationid = bfabric_application_ids[i] - re_result = re.search(r"^p([0-9]+)\/.+", _file_path) - _bfabric_projectid = re_result.group(1) + if re.search(i, file_path): + bfabric_applicationid = bfabric_application_ids[i] + re_result = re.search(r"^p([0-9]+)\/.+", file_path) + bfabric_projectid = re_result.group(1) break - - if _bfabric_applicationid < 0: - logger.error("{0}; no bfabric application id.".format(_file_path)) - return - - obj = { 'applicationid':_bfabric_applicationid, - 'filechecksum':_md5, - 'containerid':_bfabric_projectid, - 'filedate':_file_date, - 'relativepath':_file_path, - 'name': os.path.basename(_file_path), - 'size':_file_size, - 'storageid': bfabric_storageid - } - - try: - m = re.search(r"p([0-9]+)\/(Proteomics\/[A-Z]+_[1-9])\/.*_\d\d\d_S([0-9][0-9][0-9][0-9][0-9][0-9]+)_.*(raw|zip)$", _file_path) - print ("found sampleid={} pattern".format(m.group(3))) - obj['sampleid'] = int(m.group(3)) - except: - pass - - - print (obj) - res = bfapp.save_object(endpoint='importresource', obj=obj) - print (res[0]) - -if __name__ == "__main__": - if sys.argv[1] == '-': - print ("reading from stdin ...") + if bfabric_applicationid < 0: + logger = logging.getLogger("sync_feeder") + logger.error(f"{file_path}; no bfabric application id.") + raise RuntimeError("no bfabric application id.") + return bfabric_applicationid, bfabric_projectid + + +def setup_logger() -> None: + """Sets up a logger for the script.""" + logger = logging.getLogger("sync_feeder") + hdlr_syslog = logging.handlers.SysLogHandler(address=("130.60.81.21", 514)) + formatter = logging.Formatter("%(name)s %(message)s", datefmt="%Y-%m-%d %H:%M:%S") + hdlr_syslog.setFormatter(formatter) + logger.addHandler(hdlr_syslog) + logger.setLevel(logging.INFO) + + +def main() -> None: + """Parses arguments and calls `save_importresource`.""" + setup_logger() + client = Bfabric.from_config(verbose=True) + if sys.argv[1] == "-": + print("reading from stdin ...") for input_line in sys.stdin: - save_importresource(input_line.rstrip()) - elif sys.argv[1] == '-h': + save_importresource(client, input_line.rstrip()) + elif sys.argv[1] == "-h": print(__doc__) else: - save_importresource(sys.argv[1]) + save_importresource(client, sys.argv[1]) + +if __name__ == "__main__": + main() diff --git a/bfabric/scripts/bfabric_save_link_to_workunit.py b/bfabric/scripts/bfabric_save_link_to_workunit.py index 53b58504..2dd7a22c 100755 --- a/bfabric/scripts/bfabric_save_link_to_workunit.py +++ b/bfabric/scripts/bfabric_save_link_to_workunit.py @@ -1,32 +1,33 @@ #!/usr/bin/env python3 -# -*- coding: latin1 -*- - """ Copyright (C) 2023 Functional Genomics Center Zurich ETHZ|UZH. All rights reserved. Christian Panse 20231011 """ +import argparse +import json -import sys -import os from bfabric import Bfabric -def save_link(wuid=294156, link="", name=""): - B = Bfabric() - rv = B.save_object('link', - obj={'name': name, - 'parentclassname': 'workunit', - 'parentid': wuid, - 'url': link}) - B.print_json(rv) +def save_link(workunit_id: int, url: str, name: str) -> None: + """Saves a link to a workunit.""" + client = Bfabric.from_config(verbose=True) + results = client.save( + endpoint="link", obj={"name": name, "parentclassname": "workunit", "parentid": workunit_id, "url": url} + ).to_list_dict() + print(json.dumps(results[0], indent=2)) -if __name__ == "__main__": - if len(sys.argv) == 4: - save_link(wuid=sys.argv[1], link=sys.argv[2], name=sys.argv[3]) - else: - print ("Usage:") - print ("{} ".format(sys.argv[0])) - print ("Example:") - print ("{} 294156 'https://fgcz-shiny.uzh.ch/exploreDE_prot/?data=p3000/bfabric/Proteomics/SummarizedExperiment/2023/2023-09/2023-09-29/workunit_294156/2363303.rds' 'demo1 link'".format(sys.argv[0])) +def main() -> None: + """Parses the command line arguments and calls `save_link`.""" + parser = argparse.ArgumentParser() + parser.add_argument("workunit_id", type=int, help="the workunit ID") + parser.add_argument("link", type=str, help="the url to save") + parser.add_argument("name", type=str, help="the name of the link") + args = parser.parse_args() + save_link(workunit_id=args.workunit_id, url=args.link, name=args.name) + + +if __name__ == "__main__": + main() diff --git a/bfabric/scripts/bfabric_save_qcloud2_annotation.py b/bfabric/scripts/bfabric_save_qcloud2_annotation.py deleted file mode 100755 index a38d68f9..00000000 --- a/bfabric/scripts/bfabric_save_qcloud2_annotation.py +++ /dev/null @@ -1,49 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: latin1 -*- -import sys -import bfabric -import json - -if __name__ == "__main__": - B = bfabric.Bfabric(verbose=False) - obj = {} - obj['name'] = 'qcloud2 annotaion test dataset by CP' - obj['containerid'] = 3000 - obj['attribute'] = [ - {'name': 'user_date', 'position':1}, - {'name': 'user_email', 'position':2}, - {'name': 'additional_information', 'position':3}, - {'name': 'problems', 'position':4}, - {'name': 'actions', 'position':5} - ] - obj['item'] = [] - - with open('LUMOS_2.json') as json_file: - d = json.load(json_file) - - for i in range(len(d)): - try: - problems = " | ".join([ "{} ({})".format(j['name'], j['qccv']) for j in d[i]['problems'] ]) - except: - problems = '-' - - try: - actions = " | ".join([ "{} ({})".format(j['name'], j['qccv']) for j in d[i]['actions'] ]) - except: - actions = '-' - - it = {'field':[ - {'value': d[i]['user_date'], 'attributeposition':1}, - {'value': d[i]['user_email'], 'attributeposition':2}, - {'value': d[i]['additional_information'], 'attributeposition':3}, - {'value': problems, 'attributeposition':4}, - {'value': actions, 'attributeposition':5} - ], 'position': i + 1} - obj['item'].append(it) - print(obj) - #res = B.save_object(endpoint='dataset', obj=obj) - #print (res[0]) - -""" -curl --location --request GET 'https://api.qcloud2.crg.eu/annotations?start_date=2019-04-01&end_date=2021-10-03&labsystem_name=LUMOS_2' --header "Authorization: Bearer ${ACCESSTOKEN}" > LUMOS_2.json -""" diff --git a/bfabric/scripts/bfabric_save_workflowstep.py b/bfabric/scripts/bfabric_save_workflowstep.py index d90cc466..81e20fe0 100755 --- a/bfabric/scripts/bfabric_save_workflowstep.py +++ b/bfabric/scripts/bfabric_save_workflowstep.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 """ Author: @@ -15,58 +15,71 @@ Usage: bfabric_save_workflowstep.py 285507 """ +from __future__ import annotations -import sys -from bfabric import Bfabric +import argparse +from bfabric import Bfabric -def main(workunit_id = None): - B = Bfabric() - workflowtemplatestep_ids = {224: 247, # MaxQuant - #295: 248, # FragPipe-RESOURCE - 314: 254, # DIANN - 255: 256, # maxquant_scaffold - 266: 258 # MaxQuant-sampleSizeEstimation - } - workflowtemplate_ids = {224: 59, # Proteomics Data analysis - #295: 59, - 314: 59, - 255: 60, # Proteomics Results - 266: 60 - } +def save_workflowstep(workunit_id: int | None = None) -> None: + """Creates an analysis workflow step for a given workunit id.""" + client = Bfabric.from_config(verbose=True) + workflowtemplatestep_ids = { + 224: 247, # MaxQuant + # 295: 248, # FragPipe-RESOURCE + 314: 254, # DIANN + 255: 256, # maxquant_scaffold + 266: 258, # MaxQuant-sampleSizeEstimation + } + workflowtemplate_ids = { + 224: 59, # Proteomics Data analysis + # 295: 59, + 314: 59, + 255: 60, # Proteomics Results + 266: 60, + } - workunit = B.read_object("workunit", obj={"id": workunit_id})[0] + workunit = client.read("workunit", obj={"id": workunit_id}).to_list_dict()[0] application_id = workunit["application"]["_id"] container_id = workunit["container"]["_id"] if application_id in workflowtemplatestep_ids and application_id in workflowtemplate_ids: - workflows = B.read_object("workflow", obj={"containerid": container_id}) + workflows = client.read("workflow", obj={"containerid": container_id}).to_list_dict() # if workflows is None, no workflow is available - > create a new one daw_id = -1 - if workflows is not None: + if workflows: # check if the corresponding workflow exists (template id 59) for item in workflows: - if item["workflowtemplate"]["_id"] == workflowtemplate_ids[application_id]: - daw_id = item["_id"] + if item["workflowtemplate"]["id"] == workflowtemplate_ids[application_id]: + daw_id = item["id"] break - else: - pass # case when no workflows are available (workflows == None) if daw_id == -1: - daw = B.save_object("workflow", obj={"containerid": container_id, "workflowtemplateid": workflowtemplate_ids[application_id]}) + daw = client.save( + "workflow", + obj={"containerid": container_id, "workflowtemplateid": workflowtemplate_ids[application_id]}, + ) daw_id = daw[0]["_id"] - res = B.save_object("workflowstep", obj = {"workflowid": daw_id, "workflowtemplatestepid": workflowtemplatestep_ids[application_id], "workunitid": workunit_id}) + res = client.save( + "workflowstep", + obj={ + "workflowid": daw_id, + "workflowtemplatestepid": workflowtemplatestep_ids[application_id], + "workunitid": workunit_id, + }, + ).to_list_dict() print(res[0]) - else: - pass -if __name__ == "__main__": - import argparse - parser = argparse.ArgumentParser(description='Create an analysis workflow step') - parser.add_argument('workunitid', metavar='workunitid', type=int, - help='workunit id') + +def main() -> None: + """Parses command line args and calls `save_workflowstep`.""" + parser = argparse.ArgumentParser(description="Create an analysis workflow step") + parser.add_argument("workunitid", metavar="workunitid", type=int, help="workunit id") args = parser.parse_args() - main(workunit_id = args.workunitid) + save_workflowstep(workunit_id=args.workunitid) + +if __name__ == "__main__": + main() diff --git a/bfabric/scripts/bfabric_save_workunit.py b/bfabric/scripts/bfabric_save_workunit.py deleted file mode 100755 index 0c66369b..00000000 --- a/bfabric/scripts/bfabric_save_workunit.py +++ /dev/null @@ -1,29 +0,0 @@ -#!/usr/bin/python -# -*- coding: latin1 -*- - -""" - -Copyright (C) 2016 Functional Genomics Center Zurich ETHZ|UZH. All rights reserved. - -Author: - Christian Panse - -Licensed under GPL version 3 - -$HeadURL: http://fgcz-svn.uzh.ch/repos/scripts/trunk/linux/bfabric/apps/python/bfabric/scripts/bfabric_save_workunit.py $ -$Id: bfabric_save_workunit.py 2956 2017-08-09 07:14:59Z cpanse $ - -""" - -import os -import sys -import bfabric -import datetime - - -if __name__ == "__main__": - bfapp = bfabric.Bfabric() - - - workunit = bfapp.save_object(endpoint='workunit', obj={'name': 'MaxQuant report', 'projectid': '1000', 'applicationid': 217, 'status': 'available'}) - print (workunit) diff --git a/bfabric/scripts/bfabric_save_workunit_attribute.py b/bfabric/scripts/bfabric_save_workunit_attribute.py index 481a30c3..e0a4b7fa 100755 --- a/bfabric/scripts/bfabric_save_workunit_attribute.py +++ b/bfabric/scripts/bfabric_save_workunit_attribute.py @@ -1,6 +1,4 @@ -#!/usr/bin/python -# -*- coding: latin1 -*- - +#!/usr/bin/env python3 """ Copyright (C) 2021 Functional Genomics Center Zurich ETHZ|UZH. All rights reserved. @@ -12,32 +10,28 @@ """ +import argparse +import json + +from bfabric import Bfabric + + +def bfabric_save_workunit_attribute(workunit_id: int, attribute: str, value: str) -> None: + """Sets the specified attribute to the specified value for the specified workunit.""" + client = Bfabric.from_config(verbose=True) + result = client.save(endpoint="workunit", obj={"id": workunit_id, attribute: value}).to_list_dict() + print(json.dumps(result[0], indent=2)) + -import os -import sys -import bfabric -import datetime +def main() -> None: + """Parses the command line arguments and calls `bfabric_save_workunit_attribute`.""" + parser = argparse.ArgumentParser() + parser.add_argument("workunit_id", type=int, help="the workunit ID") + parser.add_argument("attribute", type=str, help="the attribute to save") + parser.add_argument("value", type=str, help="the value to save") + args = vars(parser.parse_args()) + bfabric_save_workunit_attribute(**args) -def usage(): - print("usage:\n") - msg = "\t{} ".format(sys.argv[0]) - print(msg) if __name__ == "__main__": - B = bfabric.Bfabric() - - query_obj = {} - - try: - workunitID = sys.argv[1] - attribute = sys.argv[2] - value = sys.argv[3] - query_obj["id"] = workunitID - query_obj[attribute] = value - except: - usage() - sys.exit(1) - - - res = B.save_object(endpoint='workunit', obj=query_obj) - print(res) + main() diff --git a/bfabric/scripts/bfabric_setExternalJobStatus_done.py b/bfabric/scripts/bfabric_setExternalJobStatus_done.py index 73d0581c..5e81f3d5 100755 --- a/bfabric/scripts/bfabric_setExternalJobStatus_done.py +++ b/bfabric/scripts/bfabric_setExternalJobStatus_done.py @@ -1,9 +1,12 @@ #!/usr/bin/env python3 -# -*- coding: latin1 -*- - """ set status of a resource of a given resource id """ +from __future__ import annotations +import argparse + +from bfabric import Bfabric + # Copyright (C) 2014 Functional Genomics Center Zurich ETHZ|UZH. All rights reserved. # @@ -14,20 +17,27 @@ # # Licensed under GPL version 3 # -# $HeadURL: http://fgcz-svn.uzh.ch/repos/scripts/trunk/linux/bfabric/apps/python/bfabric/scripts/bfabric_setExternalJobStatus_done.py $ -# $Id: bfabric_setExternalJobStatus_done.py 2996 2017-08-18 12:11:17Z cpanse $ -import sys -import bfabric + +def set_external_job_status_done(client: Bfabric, external_job_id: list[int]) -> None: + """Sets the status of the specified external jobs to 'done'.""" + for job_id in external_job_id: + try: + res = client.save("externaljob", {"id": job_id, "status": "done"}).to_list_dict() + print(res) + except Exception: + print(f"failed to set externaljob with id={job_id} 'available'.") + raise + + +def main() -> None: + """Parses command line arguments and calls `set_external_job_status_done`.""" + parser = argparse.ArgumentParser(description="set external job status to 'done'") + parser.add_argument("external_job_id", type=int, help="external job id", nargs="+") + args = parser.parse_args() + client = Bfabric.from_config(verbose=True) + set_external_job_status_done(client, args.external_job_id) + if __name__ == "__main__": - bfapp = bfabric.BfabricFeeder() - - if len(sys.argv) > 1: - for i in range(1, len(sys.argv)): - try: - res = bfapp.save_object('externaljob', {'id':int(sys.argv[i]), 'status':'done'}) - print(res) - except: - print("failed to set externaljob with id={} 'available'.".format(int(sys.argv[i]))) - raise + main() diff --git a/bfabric/scripts/bfabric_setResourceStatus_available.py b/bfabric/scripts/bfabric_setResourceStatus_available.py index f72c94ed..45c678c8 100755 --- a/bfabric/scripts/bfabric_setResourceStatus_available.py +++ b/bfabric/scripts/bfabric_setResourceStatus_available.py @@ -1,9 +1,13 @@ -#!/usr/bin/env python3 -# -*- coding: latin1 -*- - +#!/usr/bin/env python3 """ set status of a resource of a given resource id """ +from __future__ import annotations + +import argparse + +from bfabric import Bfabric + # Copyright (C) 2014 Functional Genomics Center Zurich ETHZ|UZH. All rights reserved. # @@ -13,27 +17,27 @@ # # Licensed under GPL version 3 # -# $HeadURL: http://fgcz-svn.uzh.ch/repos/scripts/trunk/linux/bfabric/apps/python/fgcz_bfabric_setResourceStatus_available.py $ -# $Id: fgcz_bfabric_setResourceStatus_available.py 2397 2016-09-06 07:04:35Z cpanse $ -import sys -import bfabric -from random import randint -from time import sleep +def set_resource_status_available(client: Bfabric, resource_id: list[int]) -> None: + """Sets the status of the specified resources to 'available'.""" + for resource_id in resource_id: + try: + res = client.save("resource", {"id": resource_id, "status": "available"}).to_list_dict() + print(res) + except Exception: + print(f"failed to set resourceid {resource_id} 'available'.") + raise + + +def main() -> None: + """Parses command line arguments and calls `set_resource_status_available`.""" + parser = argparse.ArgumentParser() + parser.add_argument("resource_id", type=int, help="resource id", nargs="+") + args = parser.parse_args() + client = Bfabric.from_config(verbose=True) + set_resource_status_available(client, args.resource_id) if __name__ == "__main__": - if len(sys.argv) > 1: - bfapp = bfabric.BfabricFeeder() - - for i in range(1, len(sys.argv)): - sleep(randint(2, 20)) - try: - print(bfapp.report_resource(resourceid=int(sys.argv[i]))) - except: - print( "failed to set resourceid {} 'available'.".format(int(sys.argv[i]))) - raise - else: - print("Invalid argument: no resourceid is provided") - sys.exit(0) + main() diff --git a/bfabric/scripts/bfabric_setWorkunitStatus.py b/bfabric/scripts/bfabric_setWorkunitStatus.py new file mode 100755 index 00000000..719f33b9 --- /dev/null +++ b/bfabric/scripts/bfabric_setWorkunitStatus.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python3 +import argparse +import json + +from bfabric import Bfabric + + +# Copyright (C) 2014 Functional Genomics Center Zurich ETHZ|UZH. All rights reserved. +# +# Authors: +# Marco Schmidt +# Christian Panse +# +# Licensed under GPL version 3 +# + + +def main_generic(result_status: str) -> None: + """Main function for setting workunit status to `result_status`.""" + parser = argparse.ArgumentParser(description=f"Sets workunit status to '{result_status}'") + parser.add_argument("workunit_id", type=int, help="workunit id") + args = parser.parse_args() + client = Bfabric.from_config(verbose=True) + res = client.save("workunit", {"id": args.workunit_id, "status": "available"}) + print(json.dumps(res.to_list_dict(), indent=2)) + + +def main_available() -> None: + """Calls `main_generic` with 'available' as argument.""" + main_generic("available") + + +def main_failed() -> None: + """Calls `main_generic` with 'failed' as argument.""" + main_generic("failed") + + +def main_processing() -> None: + """Calls `main_generic` with 'processing' as argument.""" + main_generic("processing") diff --git a/bfabric/scripts/bfabric_setWorkunitStatus_available.py b/bfabric/scripts/bfabric_setWorkunitStatus_available.py deleted file mode 100755 index e1f9d67d..00000000 --- a/bfabric/scripts/bfabric_setWorkunitStatus_available.py +++ /dev/null @@ -1,31 +0,0 @@ -#!/usr/bin/python -# -*- coding: latin1 -*- - -""" -set status of a resource of a given resource id -""" - -# Copyright (C) 2014 Functional Genomics Center Zurich ETHZ|UZH. All rights reserved. -# -# Authors: -# Marco Schmidt -# Christian Panse -# -# Licensed under GPL version 3 -# -# $HeadURL: http://fgcz-svn.uzh.ch/repos/scripts/trunk/linux/bfabric/apps/python/fgcz_bfabric_setResourceStatus_available.py $ -# $Id: fgcz_bfabric_setResourceStatus_available.py 2397 2016-09-06 07:04:35Z cpanse $ - -import sys -import bfabric - -from random import randint -from time import sleep - - -if __name__ == "__main__": - if len(sys.argv) > 1: - B = bfabric.BfabricFeeder() - - res = B.save_object(endpoint='workunit', obj={'id': int(sys.argv[1]), 'status': 'available'}) - B.print_json(res) diff --git a/bfabric/scripts/bfabric_setWorkunitStatus_failed.py b/bfabric/scripts/bfabric_setWorkunitStatus_failed.py deleted file mode 100755 index c71e6ab0..00000000 --- a/bfabric/scripts/bfabric_setWorkunitStatus_failed.py +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/python -# -*- coding: latin1 -*- - -""" -set status of a resource of a given resource id -""" - -# Copyright (C) 2021 Functional Genomics Center Zurich ETHZ|UZH. All rights reserved. -# -# Authors: -# Christian Panse -# Maria - -# 2021-02-02 - -import sys -import bfabric - -from random import randint -from time import sleep - - -if __name__ == "__main__": - if len(sys.argv) > 1: - bfapp = bfabric.BfabricFeeder() - - workunitid = int(sys.argv[1]) - print("workunitit={}".format(workunitid)) - - res = bfapp.save_object(endpoint='workunit', obj={'id': workunitid, 'status': 'failed'}) - bfapp.print_json(res) - print ("alive") diff --git a/bfabric/scripts/bfabric_setWorkunitStatus_processing.py b/bfabric/scripts/bfabric_setWorkunitStatus_processing.py deleted file mode 100755 index 57f4948b..00000000 --- a/bfabric/scripts/bfabric_setWorkunitStatus_processing.py +++ /dev/null @@ -1,27 +0,0 @@ -#!/usr/bin/python -# -*- coding: latin1 -*- - -""" -set status of a resource of a given resource id -""" - -# Copyright (C) 2023 Functional Genomics Center Zurich ETHZ|UZH. All rights reserved. -# -# Authors: -# Christian Panse -# -# Licensed under GPL version 3 -# -# https://github.com/fgcz/bfabricPy/ - -import sys -import bfabric - -from random import randint -from time import sleep - -if __name__ == "__main__": - if len(sys.argv) > 1: - B = bfabric.BfabricFeeder() - res = B.save_object(endpoint='workunit', obj={'id': int(sys.argv[1]), 'status': 'processing'}) - B.print_json(res) diff --git a/bfabric/scripts/bfabric_upload_resource.py b/bfabric/scripts/bfabric_upload_resource.py index fe8c6192..5302320e 100755 --- a/bfabric/scripts/bfabric_upload_resource.py +++ b/bfabric/scripts/bfabric_upload_resource.py @@ -1,6 +1,4 @@ #!/usr/bin/env python3 -# -*- coding: latin1 -*- - """ Copyright (C) 2017,2020 Functional Genomics Center Zurich ETHZ|UZH. All rights reserved. @@ -12,15 +10,28 @@ this script takes a blob file and a workunit id as input and adds the file as resource to bfabric """ +import argparse +import json +from pathlib import Path -import sys -import os from bfabric import Bfabric + +def bfabric_upload_resource(client: Bfabric, filename: Path, workunit_id: int) -> None: + """Uploads the specified file to the workunit with the name of the file as resource name.""" + result = client.upload_resource(resource_name=filename.name, content=filename.read_bytes(), workunit_id=workunit_id) + print(json.dumps(result.to_list_dict(), indent=2)) + + +def main() -> None: + """Parses the command line arguments and calls `bfabric_upload_resource`.""" + client = Bfabric.from_config(verbose=True) + parser = argparse.ArgumentParser() + parser.add_argument("filename", help="filename", type=Path) + parser.add_argument("workunitid", help="workunitid", type=int) + args = parser.parse_args() + bfabric_upload_resource(client=client, filename=args.filename, workunit_id=args.workunitid) + + if __name__ == "__main__": - if len(sys.argv) == 3 and os.path.isfile(sys.argv[1]): - B = Bfabric() - B.print_json(B.upload_file(filename = sys.argv[1], workunitid = int(sys.argv[2]))) - else: - print("usage:\nbfabric_upload_resource.py ") - sys.exit(1) + main() diff --git a/bfabric/scripts/bfabric_upload_submitter_executable.py b/bfabric/scripts/bfabric_upload_submitter_executable.py index f44d9117..23d41af3 100755 --- a/bfabric/scripts/bfabric_upload_submitter_executable.py +++ b/bfabric/scripts/bfabric_upload_submitter_executable.py @@ -1,6 +1,4 @@ #!/usr/bin/env python3 -# -*- coding: latin1 -*- - """ Uploader for B-Fabric """ @@ -30,7 +28,7 @@ # # # Example of use: -# +# # For bfabric.__version__ < 0.10.22 # # ./bfabric_upload_submitter_executable.py bfabric_executable_submitter_functionalTest.py gridengine --name "Dummy - yaml / Grid Engine executable" --description "Dummy submitter for the bfabric functional test using Grid Engine." @@ -45,100 +43,101 @@ # ./bfabric_upload_submitter_executable.py bfabric_executable_submitter_functionalTest.py slurm --name "Dummy_-_yaml___Slurm_executable" --description "test new submitter's parameters" # -import os -import sys +import argparse import base64 + +import yaml + from bfabric import Bfabric -import argparse -SVN="$HeadURL: http://fgcz-svn.uzh.ch/repos/scripts/trunk/linux/bfabric/apps/python/bfabric/scripts/bfabric_upload_submitter_executable.py $" - -def setup(argv=sys.argv[1:]): - argparser = argparse.ArgumentParser(description="Arguments for new submitter executable.\nFor more details run: ./bfabric_upload_submitter_executable.py --help") - argparser.add_argument('filename', type=str, help="Bash executable of the submitter") - argparser.add_argument('engine', type=str, choices=['slurm', 'gridengine'], help="Valid engines for job handling are: slurm, gridengine") - argparser.add_argument('--name', type=str, help="Name of the submitter", required=False) - argparser.add_argument('--description', type=str, help="Description about the submitter", required=False) - if len(sys.argv) < 3: - argparser.print_help(sys.stderr) - sys.exit(1) - options = argparser.parse_args() - return options - -def main(options): + +def main_upload_submitter_executable(options) -> None: executableFileName = options.filename engine = options.engine - bfapp = Bfabric() + client = Bfabric.from_config(verbose=True) - with open(executableFileName, 'r') as f: + with open(executableFileName) as f: executable = f.read() - attr = { 'context': 'SUBMITTER', - 'parameter': [{'modifiable': 'true', - 'required': 'true', - 'type':'STRING'}, - {'modifiable': 'true', - 'required': 'true', - 'type':'STRING'}, - {'modifiable': 'true', - 'required': 'true', - 'type':'STRING'}], - 'masterexecutableid': 11871, - 'status': 'available', - 'enabled': 'true', - 'valid': 'true', - 'base64': base64.b64encode(executable.encode()).decode() } + attr = { + "context": "SUBMITTER", + "parameter": [ + {"modifiable": "true", "required": "true", "type": "STRING"}, + {"modifiable": "true", "required": "true", "type": "STRING"}, + {"modifiable": "true", "required": "true", "type": "STRING"}, + ], + "masterexecutableid": 11871, + "status": "available", + "enabled": "true", + "valid": "true", + "base64": base64.b64encode(executable.encode()).decode(), + } if engine == "slurm": - attr['name'] = 'yaml / Slurm executable' - attr['parameter'][0]['description'] = 'Which Slurm partition should be used.' - attr['parameter'][0]['enumeration'] = ['prx','maxquant','scaffold','mascot'] - attr['parameter'][0]['key'] = 'partition' - attr['parameter'][0]['label'] = 'partition' - attr['parameter'][0]['value'] = 'prx' - attr['parameter'][1]['description'] = 'Which Slurm nodelist should be used.' - attr['parameter'][1]['enumeration'] = ['fgcz-r-[035,028]','fgcz-r-035','fgcz-r-033','fgcz-r-028','fgcz-r-018'] - attr['parameter'][1]['key'] = 'nodelist' - attr['parameter'][1]['label'] = 'nodelist' - attr['parameter'][1]['value'] = 'fgcz-r-[035,028]' - attr['parameter'][2]['description'] = 'Which Slurm memory should be used.' - attr['parameter'][2]['enumeration'] = ['10G','50G','128G','256G','512G','960G'] - attr['parameter'][2]['key'] = 'memory' - attr['parameter'][2]['label'] = 'memory' - attr['parameter'][2]['value'] = '10G' - attr['version'] = 1.02 - attr['description'] = 'Stage the yaml config file to application using Slurm.' + attr["name"] = "yaml / Slurm executable" + attr["parameter"][0]["description"] = "Which Slurm partition should be used." + attr["parameter"][0]["enumeration"] = ["prx", "maxquant", "scaffold", "mascot"] + attr["parameter"][0]["key"] = "partition" + attr["parameter"][0]["label"] = "partition" + attr["parameter"][0]["value"] = "prx" + attr["parameter"][1]["description"] = "Which Slurm nodelist should be used." + attr["parameter"][1]["enumeration"] = [ + "fgcz-r-[035,028]", + "fgcz-r-035", + "fgcz-r-033", + "fgcz-r-028", + "fgcz-r-018", + ] + attr["parameter"][1]["key"] = "nodelist" + attr["parameter"][1]["label"] = "nodelist" + attr["parameter"][1]["value"] = "fgcz-r-[035,028]" + attr["parameter"][2]["description"] = "Which Slurm memory should be used." + attr["parameter"][2]["enumeration"] = ["10G", "50G", "128G", "256G", "512G", "960G"] + attr["parameter"][2]["key"] = "memory" + attr["parameter"][2]["label"] = "memory" + attr["parameter"][2]["value"] = "10G" + attr["version"] = 1.02 + attr["description"] = "Stage the yaml config file to application using Slurm." elif engine == "gridengine": - attr['name'] = 'yaml / Grid Engine executable' - attr['parameter'][0]['description'] = 'Which Grid Engine partition should be used.' - attr['parameter'][0]['enumeration'] = 'PRX' - attr['parameter'][0]['key'] = 'partition' - attr['parameter'][0]['label'] = 'partition' - attr['parameter'][0]['value'] = 'PRX' - attr['parameter'][1]['description'] = 'Which Grid Engine node should be used.' - attr['parameter'][1]['enumeration'] = ['fgcz-r-033','fgcz-r-028','fgcz-r-018'] - attr['parameter'][1]['key'] = 'nodelist' - attr['parameter'][1]['label'] = 'nodelist' - attr['parameter'][1]['value'] = 'fgcz-r-028' - attr['version'] = 1.00 - attr['description'] = 'Stage the yaml config file to an application using Grid Engine.' + attr["name"] = "yaml / Grid Engine executable" + attr["parameter"][0]["description"] = "Which Grid Engine partition should be used." + attr["parameter"][0]["enumeration"] = "PRX" + attr["parameter"][0]["key"] = "partition" + attr["parameter"][0]["label"] = "partition" + attr["parameter"][0]["value"] = "PRX" + attr["parameter"][1]["description"] = "Which Grid Engine node should be used." + attr["parameter"][1]["enumeration"] = ["fgcz-r-033", "fgcz-r-028", "fgcz-r-018"] + attr["parameter"][1]["key"] = "nodelist" + attr["parameter"][1]["label"] = "nodelist" + attr["parameter"][1]["value"] = "fgcz-r-028" + attr["version"] = 1.00 + attr["description"] = "Stage the yaml config file to an application using Grid Engine." if options.name: - attr['name'] = options.name - else: - pass + attr["name"] = options.name if options.description: - attr['description'] = options.description - else: - pass - - res = bfapp.save_object('executable', attr) - - bfapp.print_yaml(res) + attr["description"] = options.description + + res = client.save("executable", attr) + print(yaml.dump(res)) + + +def main() -> None: + """Parses command line arguments and calls `main_upload_submitter_executable`.""" + parser = argparse.ArgumentParser() + parser.add_argument("filename", type=str, help="Bash executable of the submitter") + parser.add_argument( + "engine", + type=str, + choices=["slurm", "gridengine"], + help="Valid engines for job handling are: slurm, gridengine", + ) + parser.add_argument("--name", type=str, help="Name of the submitter", required=False) + parser.add_argument("--description", type=str, help="Description about the submitter", required=False) + options = parser.parse_args() + main(options) if __name__ == "__main__": - options = setup() - main(options) - + main() diff --git a/bfabric/scripts/bfabric_wrapper_creator_yaml.py b/bfabric/scripts/bfabric_wrapper_creator_yaml.py index 462d68c3..8e6fc953 100755 --- a/bfabric/scripts/bfabric_wrapper_creator_yaml.py +++ b/bfabric/scripts/bfabric_wrapper_creator_yaml.py @@ -1,5 +1,4 @@ -#!/usr/bin/python -# -*- coding: latin1 -*- +#!/usr/bin/env python3 """ A wrapper_creator for B-Fabric @@ -20,9 +19,8 @@ # Licensed under GPL version 3 # # $HeadURL: http://fgcz-svn.uzh.ch/repos/scripts/trunk/linux/bfabric/apps/python/wrapper_creator_yaml.py $ -# $Id: wrapper_creator_yaml.py 2478 2016-09-26 09:46:53Z cpanse $ +# $Id: wrapper_creator_yaml.py 2478 2016-09-26 09:46:53Z cpanse $ -import os import sys from bfabric import BfabricWrapperCreator @@ -30,10 +28,10 @@ externaljobid = -1 - if len(sys.argv) == 3 and sys.argv[1] == '-j' and int(sys.argv[2]) > 0: + if len(sys.argv) == 3 and sys.argv[1] == "-j" and int(sys.argv[2]) > 0: externaljobid = int(sys.argv[2]) else: - print ("usage: {} -j ".format(sys.argv[0])) + print(f"usage: {sys.argv[0]} -j ") sys.exit(1) bfapp = BfabricWrapperCreator(externaljobid=externaljobid) diff --git a/bfabric/scripts/fgcz_maxquant_scaffold-wrapper.py b/bfabric/scripts/fgcz_maxquant_scaffold-wrapper.py index ff2d89ac..2a50994d 100755 --- a/bfabric/scripts/fgcz_maxquant_scaffold-wrapper.py +++ b/bfabric/scripts/fgcz_maxquant_scaffold-wrapper.py @@ -1,5 +1,4 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- # # Copyright (C) 2019 Functional Genomics Center Zurich ETHZ|UZH. All rights reserved. # @@ -17,10 +16,12 @@ import sys from lxml import etree import yaml -from io import StringIO, BytesIO +from io import StringIO from optparse import OptionParser + # import unittest + class FgczMaxQuantScaffold: """ input: @@ -32,33 +33,32 @@ class FgczMaxQuantScaffold: fasta = None samples = None - def __init__(self, yamlfilename=None, zipfilename=None): + def __init__(self, yamlfilename=None, zipfilename=None) -> None: if not os.path.isfile(zipfilename): - print("ERROR: no such file '{0}'".format(zipfilename)) + print(f"ERROR: no such file '{zipfilename}'") sys.exit(1) - self.zipfilename = zipfilename - with open(yamlfilename, 'r') as f: + with open(yamlfilename) as f: content = f.read() self.config = yaml.load(content, Loader=yaml.FullLoader) try: - self.fasta = os.path.basename(self.config['application']['parameters']['/fastaFiles/FastaFileInfo/fastaFilePath']) + self.fasta = os.path.basename( + self.config["application"]["parameters"]["/fastaFiles/FastaFileInfo/fastaFilePath"] + ) except: raise - L = [value for values in self.config['application']['input'].values() for value in values] - - self.samples = list(map(lambda x: os.path.basename(x).replace('.raw', ''), L)) + L = [value for values in self.config["application"]["input"].values() for value in values] + self.samples = list(map(lambda x: os.path.basename(x).replace(".raw", ""), L)) + def getBiologicalSample(selfs, InputFile=None, category="***BASENAME***"): - def getBiologicalSample(selfs, InputFile = None, category = '***BASENAME***'): - - scaffold_BiologicalSample = ''' + scaffold_BiologicalSample = """ WU192418/output-WU192418.zip - ''' + """ pBioSample = etree.XML(scaffold_BiologicalSample) @@ -78,18 +78,17 @@ def getBiologicalSample(selfs, InputFile = None, category = '***BASENAME***'): if eInputFile is None: raise TypeError - eInputFile.text = '{}'.format(InputFile) - eInputFile.attrib['maxQuantExperiment'] = "{}".format(category) + eInputFile.text = f"{InputFile}" + eInputFile.attrib["maxQuantExperiment"] = f"{category}" eBiologicalSample = eInputFile.getparent() - eBiologicalSample.attrib['category'] = "{}".format(category) - eBiologicalSample.attrib['name'] = "{}".format(category) - - return(pBioSample) + eBiologicalSample.attrib["category"] = f"{category}" + eBiologicalSample.attrib["name"] = f"{category}" + return pBioSample def getScaffold(selfs): - xml = ''' + xml = """ -''' +""" pxml = etree.parse(StringIO(xml)) - #pxml = etree.XML(xml) - return(pxml) - - + # pxml = etree.XML(xml) + return pxml - def run(self): + def run(self) -> None: xml = self.getScaffold() - eExperiment = xml.find('/Experiment') - eFastaDatabase = xml.find('/Experiment/FastaDatabase') - eFastaDatabase.attrib['path'] = "{}/{}".format(os.getcwd(), self.fasta) + eExperiment = xml.find("/Experiment") + eFastaDatabase = xml.find("/Experiment/FastaDatabase") + eFastaDatabase.attrib["path"] = f"{os.getcwd()}/{self.fasta}" for s in self.samples: - eExperiment.append(self.getBiologicalSample(category=s, InputFile = self.zipfilename)) + eExperiment.extend(self.getBiologicalSample(category=s, InputFile=self.zipfilename)) + xml.write("/dev/stdout", pretty_print=True, xml_declaration=True, method="xml", encoding="UTF-8") - xml.write('/dev/stdout' , pretty_print=True, xml_declaration=True, method='xml', encoding="UTF-8") if __name__ == "__main__": - parser = OptionParser(usage="usage: %prog -y -z ", - version="%prog 1.0") - - parser.add_option("-y", "--yaml", - type='string', - action="store", - dest="yaml_filename", - default="/Users/cp/WU199270.yaml ", - help="config file.yaml") - - parser.add_option("-z", "--zip", - type='string', - action="store", - dest="zip_filename", - default="output-WU199270.zip", - help="config file.yaml") + parser = OptionParser( + usage="usage: %prog -y -z ", + version="%prog 1.0", + ) + + parser.add_option( + "-y", + "--yaml", + type="string", + action="store", + dest="yaml_filename", + default="/Users/cp/WU199270.yaml ", + help="config file.yaml", + ) + + parser.add_option( + "-z", + "--zip", + type="string", + action="store", + dest="zip_filename", + default="output-WU199270.zip", + help="config file.yaml", + ) (options, args) = parser.parse_args() driver = FgczMaxQuantScaffold(yamlfilename=options.yaml_filename, zipfilename=options.zip_filename) diff --git a/bfabric/scripts/fgcz_maxquant_wrapper.py b/bfabric/scripts/fgcz_maxquant_wrapper.py index 2f9109d4..a0a44774 100755 --- a/bfabric/scripts/fgcz_maxquant_wrapper.py +++ b/bfabric/scripts/fgcz_maxquant_wrapper.py @@ -1,6 +1,4 @@ -#!/usr/bin/python3 -# -*- coding: latin1 -*- - +#!/usr/bin/env python3 # Copyright (C) 2017, 2018 Functional Genomics Center Zurich ETHZ|UZH. All rights reserved. # # Authors: @@ -8,23 +6,16 @@ # # Licensed under GPL version 3 # -# $HeadURL: http://fgcz-svn.uzh.ch/repos/scripts/trunk/linux/bfabric/apps/python/bfabric/scripts/fgcz_pd_wrapper.py $ -# $Id: fgcz_pd_wrapper.py 2992 2017-08-17 13:37:36Z cpanse $ -import logging -import logging.handlers import os -import pprint -import re import sys -import time -import urllib +from io import StringIO from optparse import OptionParser -from lxml import etree -import yaml from pathlib import Path -import hashlib -from io import StringIO, BytesIO + +import yaml +from lxml import etree + # import warnings """ @@ -33,75 +24,73 @@ """ -import unittest class FgczMaxQuantConfig: """ - input: - QEXACTIVE_2: - - bfabric@fgczdata.fgcz-net.unizh.ch://srv/www/htdocs//p1946/Proteomics/QEXACTIVE_2/paolo_20150811_course/20150811_01_Fetuin40fmol.raw - - bfabric@fgczdata.fgcz-net.unizh.ch://srv/www/htdocs//p1946/Proteomics/QEXACTIVE_2/paolo_20150811_course/20150811_02_YPG1.raw - output: + input: + QEXACTIVE_2: + - bfabric@fgczdata.fgcz-net.unizh.ch://srv/www/htdocs//p1946/Proteomics/QEXACTIVE_2/paolo_20150811_course/20150811_01_Fetuin40fmol.raw + - bfabric@fgczdata.fgcz-net.unizh.ch://srv/www/htdocs//p1946/Proteomics/QEXACTIVE_2/paolo_20150811_course/20150811_02_YPG1.raw + output: """ config = None scratchdir = None - def __init__(self, config=None, scratch = "/scratch/MAXQUANT/"): + def __init__(self, config=None, scratch="/scratch/MAXQUANT/") -> None: if config: self.config = config - self.scratchdir = Path("{0}/WU{1}".format(scratch, self.config['job_configuration']['workunit_id'])) + self.scratchdir = Path("{}/WU{}".format(scratch, self.config["job_configuration"]["workunit_id"])) if not os.path.isdir(self.scratchdir): - print ("no scratch dir '{0}'.".format(self.scratchdir)) + print(f"no scratch dir '{self.scratchdir}'.") # raise SystemError - def generate_mqpar(self, xml_filename, xml_template): - """ PARAMETER """ - for query, value in self.config['application']['parameters'].items(): + def generate_mqpar(self, xml_filename, xml_template) -> None: + """PARAMETER""" + for query, value in self.config["application"]["parameters"].items(): element = xml_template.find(query) if element is not None: if value == "None": - element.text = '' + element.text = "" elif query == "/parameterGroups/parameterGroup/variableModifications": for a in value.split(","): - estring = etree.Element("string") - estring.text = a - element.append(estring) + estring = etree.Element("string") + estring.text = a + element.extend(estring) pass else: - print ("replacing xpath expression {} by {}.".format(query, value)) + print(f"replacing xpath expression {query} by {value}.") element.text = value - ecount = 0; + ecount = 0 """ INPUT """ - for query, value in self.config['application']['input'].items(): - for input in self.config['application']['input'][query]: + for query, value in self.config["application"]["input"].items(): + for input in self.config["application"]["input"][query]: element = xml_template.find("/filePaths") if element is None: raise TypeError - host, file = input.split(":") - print ("{}\t{}".format(os.path.basename(input), file)) + print(f"{os.path.basename(input)}\t{file}") if not os.path.isfile(file): - print("'{}' do not exists.".format(file)) - #raise SystemError + print(f"'{file}' do not exists.") + # raise SystemError - targetRawFile = "{}/{}".format(self.scratchdir, os.path.basename(input)) + targetRawFile = f"{self.scratchdir}/{os.path.basename(input)}" if not os.path.islink(targetRawFile): try: - os.symlink(file, targetRawFile) + os.symlink(file, targetRawFile) except: - print("linking '{}' failed.".format(file)) + print(f"linking '{file}' failed.") estring = etree.Element("string") estring.text = targetRawFile - element.append(estring) + element.extend(estring) element = xml_template.find("/experiments") if element is None: @@ -110,7 +99,7 @@ def generate_mqpar(self, xml_filename, xml_template): estring = etree.Element("string") estring.text = "{}".format(os.path.basename(input).replace(".raw", "").replace(".RAW", "")) ecount += 1 - element.append(estring) + element.extend(estring) element = xml_template.find("/fractions") if element is None: @@ -118,7 +107,7 @@ def generate_mqpar(self, xml_filename, xml_template): estring = etree.Element("short") estring.text = "32767" - element.append(estring) + element.extend(estring) element = xml_template.find("/ptms") if element is None: @@ -126,7 +115,7 @@ def generate_mqpar(self, xml_filename, xml_template): estring = etree.Element("boolean") estring.text = "false" - element.append(estring) + element.extend(estring) element = xml_template.find("/paramGroupIndices") if element is None: @@ -134,17 +123,16 @@ def generate_mqpar(self, xml_filename, xml_template): estring = etree.Element("int") estring.text = "0" - element.append(estring) - - #return(xml_template) - xml_template.write(xml_filename)#, pretty_print=True) + element.extend(estring) + # return(xml_template) + xml_template.write(xml_filename) # , pretty_print=True) - def run(self): - pass + def run(self) -> None: + pass -mqpar_templ_xml =''' +mqpar_templ_xml = """ test.fasta @@ -484,42 +472,43 @@ def run(self): -''' +""" if __name__ == "__main__": - parser = OptionParser(usage="usage: %prog -y ", - version="%prog 1.0") - - parser.add_option("-y", "--yaml", - type='string', - action="store", - dest="yaml_filename", - default=None, - help="config file.yaml") - - parser.add_option("-x", "--xml", - type='string', - action="store", - dest="xml_filename", - default=None, - help="MaxQuant mqpar xml parameter filename.") - - parser.add_option("-t", "--xmltemplate", - type='string', - action="store", - dest="xml_template_filename", - default=None, - help="MaxQuant mqpar template xml parameter filename.") + parser = OptionParser(usage="usage: %prog -y ", version="%prog 1.0") + + parser.add_option( + "-y", "--yaml", type="string", action="store", dest="yaml_filename", default=None, help="config file.yaml" + ) + + parser.add_option( + "-x", + "--xml", + type="string", + action="store", + dest="xml_filename", + default=None, + help="MaxQuant mqpar xml parameter filename.", + ) + + parser.add_option( + "-t", + "--xmltemplate", + type="string", + action="store", + dest="xml_template_filename", + default=None, + help="MaxQuant mqpar template xml parameter filename.", + ) (options, args) = parser.parse_args() if not os.path.isfile(options.yaml_filename): - print ("ERROR: no such file '{0}'".format(options.yaml_filename)) + print(f"ERROR: no such file '{options.yaml_filename}'") sys.exit(1) try: - with open(options.yaml_filename, 'r') as f: - content = f.read() - job_config = yaml.load(content) + with open(options.yaml_filename) as f: + job_config = yaml.safe_load(f) if options.xml_template_filename is None: try: @@ -527,25 +516,26 @@ def run(self): except: raise else: - with open(options.xml_template_filename, 'r') as f: + with open(options.xml_template_filename) as f: mqpartree = etree.parse(f) - MQC = FgczMaxQuantConfig(config = job_config, scratch="d:/scratch/") + MQC = FgczMaxQuantConfig(config=job_config, scratch="d:/scratch/") output = MQC.generate_mqpar(options.xml_filename, xml_template=mqpartree) except: - print ("ERROR: exit 1") + print("ERROR: exit 1") raise - import unittest """ python3 -m unittest fgcz_maxquant_wrapper.py """ + + class TestFgczMaxQuantConfig(unittest.TestCase): - def test_xml(self): + def test_xml(self) -> None: input_WU181492_yaml = """ application: input: @@ -631,7 +621,7 @@ def test_xml(self): """ - job_config = yaml.load(input_WU181492_yaml) + job_config = yaml.safe_load(input_WU181492_yaml) mqpartree = etree.parse(StringIO(mqpar_templ_xml)) MQC = FgczMaxQuantConfig(config=job_config) diff --git a/bfabric/tests/__init__.py b/bfabric/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/bfabric/tests/__test_bfabric.py b/bfabric/tests/__test_bfabric.py deleted file mode 100755 index dc505716..00000000 --- a/bfabric/tests/__test_bfabric.py +++ /dev/null @@ -1,85 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: latin1 -*- - -""" -unittest by -""" - -import unittest -from bfabric import Bfabric - -""" -ssh localhost "cat > /tmp/bb.py && /usr/bin/python /tmp/bb.py" < PycharmProjects/untitled/bfabric_wsdl.py -""" -class BfabricTestCase(unittest.TestCase): - bfapp = Bfabric(verbose=True) - - workunits = [] - samples = [] - - - def workunit_save(self): - print("WORKUNIT SAVE") - for name in ['test1', 'test2', 'test3']: - res = self.bfapp.save_object(endpoint='workunit', obj={'name': "unit test - {}".format(name), - 'containerid': 3000, - 'description': '68b329da9893e34099c7d8ad5cb9c940', - 'applicationid': 217 - }) - - self.workunits.append(res[0]._id) - print(res) - print(self.workunits) - - def workunit_read(self): - print("WORKUNIT READ") - res = [self.bfapp.delete_object(endpoint='workunit', id=x)[0] for x in self.workunits] - print(res) - self.assertEqual(len(res), len(self.workunits)) - - def workunit_delete(self): - print("WORKUNIT DELETE") - res = [self.bfapp.delete_object(endpoint='workunit', id=x)[0] for x in self.workunits] - print(res) - self.assertEqual(len(res), len(self.workunits)) - - def sample_save(self): - print("SAVE SAMPLE") - sample_type = 'Biological Sample - Proteomics' - species = "n/a" - for name in ['test1', 'test2', 'test3']: - res = self.bfapp.save_object(endpoint='sample', obj={'name': "unit test - {} - {}".format(name, sample_type), - 'containerid': 3000, - 'type' : sample_type, - 'species' : species, - 'samplingdate' : "2017-10-12", - 'groupingvar' : "A", - 'description': '68b329da9893e34099c7d8ad5cb9c940' - }) - - #print(res[0]._id) - print("=== BEGIN DEBUG") - for i in res: - print (i) - self.samples.append(res[0]._id) - print("=== END DEBUG") - - def sample_delete(self): - print("SAMPLE DELETE") - print(self.samples) - res = [self.bfapp.delete_object(endpoint='sample', id=x)[0] for x in self.samples] - #res = [x for x in res if "removed successfully." in x.deletionreport] - print(res) - self.assertEqual(len(res), len(self.samples)) - - def test_workunit(self): - self.workunit_save() - self.workunit_read() - self.workunit_delete() - - def test_sample(self): - self.sample_save() - self.sample_delete() - -if __name__ == '__main__': - unittest.main() diff --git a/bfabric/tests/integration/__init__.py b/bfabric/tests/integration/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/bfabric/tests/integration/groundtruth.json b/bfabric/tests/integration/groundtruth.json new file mode 100644 index 00000000..fc7ffbe4 --- /dev/null +++ b/bfabric/tests/integration/groundtruth.json @@ -0,0 +1,144 @@ + { + "project":[ + [ + { + "id":3000 + }, + { + "name":"FGCZ Internal" + } + ] + ], + "container":[ + [ + { + "id":3000 + }, + { + "name":"FGCZ Internal" + } + ] + ], + "application":[ + [ + { + "id":224 + }, + { + "name":"MaxQuant" + } + ] + ], + "workunit":[ + [ + { + "createdby":"gerritsb", + "name":"Lovorka SUZ12 control" + }, + { + "id":162, + "status":"AVAILABLE" + } + ] + ], + "sample":[ + [ + { + "id":190249 + }, + { + "name":"autoQC4L", + "type":"Biological Sample - Proteomics User Lab" + } + ] + ], + "annotation":[ + [ + { + "id":2710 + }, + { + "name":"A", + "type":"Grouping Var" + } + ] + ], + "resource":[ + [ + { + "filechecksum":"090a3f025d3ebbad75213e3d4886e17c" + }, + { + "name":"20190903_07_autoQC4L.raw", + "size":264773059 + } + ], + [ + { + "filechecksum":"090a3f02%" + }, + { + "name":"20190903_07_autoQC4L.raw", + "size":264773059, + "filechecksum":"090a3f025d3ebbad75213e3d4886e17c" + } + ] + ], + "user":[ + [ + { + "id":482 + }, + { + "login":"cpanse", + "city":"Zürich", + "zip":"8057" + } + ], + [ + { + "login":"cpanse" + }, + { + "id":482, + "city":"Zürich", + "zip":"8057" + } + ], + [ + { + "login":"mderrico" + }, + { + "id":7133, + "lastname":"d'Errico", + "city":"Zürich", + "zip":"8057" + } + ] + ], + "executable":[ + [ + { + "id":16375 + }, + { + "createdby":"cpanse", + "name":"yaml / Grid Engine executable", + "size":1593, + "context":"SUBMITTER" + } + ], + [ + { + "id":16374 + }, + { + "createdby":"cpanse", + "name":"yaml 004", + "size":953, + "context":"WRAPPERCREATOR" + } + ] + ] + } diff --git a/bfabric/tests/integration/integration_test_helper.py b/bfabric/tests/integration/integration_test_helper.py new file mode 100644 index 00000000..85e20da2 --- /dev/null +++ b/bfabric/tests/integration/integration_test_helper.py @@ -0,0 +1,33 @@ +from __future__ import annotations + +from typing import Any, TYPE_CHECKING + + +if TYPE_CHECKING: + from bfabric import Bfabric + + +class DeleteEntities: + """Deletes entities that were registered, when a test is torn down. + Please use `self.addCleanup` to ensure that the entities are deleted even if the test fails. + """ + + def __init__(self, client: Bfabric, created_entities: list[tuple[str, int]] | None = None) -> None: + self.client = client + self.created_entities = created_entities or [] + + def __call__(self) -> None: + """Deletes all created entities.""" + errors = [] + for entity_type, entity_id in self.created_entities: + errors += self.client.delete(entity_type, entity_id, check=False).errors + if errors: + print("Error deleting entities:", errors) + else: + print("Successfully deleted entities:", self.created_entities) + + def register_entity(self, entity: dict[str, Any], entity_type: str | None = None) -> None: + """Registers an entity to be deleted when the test is torn down.""" + if entity_type is None: + entity_type = entity["classname"] + self.created_entities.append((entity_type, entity["id"])) diff --git a/bfabric/tests/integration/scripts/__init__.py b/bfabric/tests/integration/scripts/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/bfabric/tests/integration/scripts/test_read.py b/bfabric/tests/integration/scripts/test_read.py new file mode 100644 index 00000000..f1636e47 --- /dev/null +++ b/bfabric/tests/integration/scripts/test_read.py @@ -0,0 +1,41 @@ +import contextlib +import json +import unittest +from io import StringIO + +import yaml + +from bfabric import Bfabric +from bfabric.scripts.bfabric_read import bfabric_read +from bfabric.tests.integration.integration_test_helper import DeleteEntities + + +class TestRead(unittest.TestCase): + def setUp(self): + self.client = Bfabric.from_config(config_env="TEST") + self.delete_entities = DeleteEntities(client=self.client, created_entities=[]) + self.addCleanup(self.delete_entities) + + self.example = {"endpoint": "resource"} + + def test_read_json(self): + out = StringIO() + with contextlib.redirect_stdout(out): + bfabric_read( + client=self.client, endpoint=self.example["endpoint"], attribute=None, value=None, output_format="json" + ) + parsed = json.loads(out.getvalue()) + self.assertEqual(100, len(parsed)) + + def test_read_yaml(self): + out = StringIO() + with contextlib.redirect_stdout(out): + bfabric_read( + client=self.client, endpoint=self.example["endpoint"], attribute=None, value=None, output_format="yaml" + ) + parsed = yaml.safe_load(out.getvalue()) + self.assertEqual(100, len(parsed)) + + +if __name__ == "__main__": + unittest.main() diff --git a/bfabric/tests/integration/scripts/test_save_csv2dataset.py b/bfabric/tests/integration/scripts/test_save_csv2dataset.py new file mode 100644 index 00000000..89a17043 --- /dev/null +++ b/bfabric/tests/integration/scripts/test_save_csv2dataset.py @@ -0,0 +1,119 @@ +from __future__ import annotations +import time +import unittest +import datetime +from pathlib import Path +from tempfile import TemporaryDirectory + +import polars as pl +from bfabric import Bfabric +from bfabric.scripts.bfabric_save_csv2dataset import bfabric_save_csv2dataset +from bfabric.tests.integration.integration_test_helper import DeleteEntities + + +class TestSaveCsv2Dataset(unittest.TestCase): + def setUp(self): + self.mock_client = Bfabric.from_config(config_env="TEST", verbose=True) + self.created_entities = [] + self.addCleanup(DeleteEntities(self.mock_client, self.created_entities)) + + self.sample_data = pl.DataFrame( + [ + { + "Normal": "just a normal string", + "Comma": "contains,some,commas,,,", + "Backslash": "testing\\backslash/support", + "Apostrophe": 'Lot\'s"of"apostrophes', + } + ] + ) + + def test_save_csv2dataset(self): + timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S") + with TemporaryDirectory() as work_dir: + work_dir = Path(work_dir) + sample_file = work_dir / "sample_table.csv" + self.sample_data.write_csv(sample_file) + + bfabric_save_csv2dataset( + self.mock_client, + csv_file=sample_file, + dataset_name=f"test_dataset {timestamp}", + container_id=3000, + workunit_id=None, + sep=",", + has_header=True, + ) + + # check the result + time.sleep(1) + response = self.mock_client.read("dataset", {"name": f"test_dataset {timestamp}"}).to_list_dict()[0] + self.created_entities.append(("dataset", response["id"])) + + expected_attribute = [ + {"name": "Normal", "position": "1", "type": "String"}, + {"name": "Comma", "position": "2", "type": "String"}, + {"name": "Backslash", "position": "3", "type": "String"}, + {"name": "Apostrophe", "position": "4", "type": "String"}, + ] + self.assertListEqual(expected_attribute, response["attribute"]) + + expected_item = [ + { + "field": [ + {"attributeposition": "1", "value": "just a normal string"}, + {"attributeposition": "2", "value": "contains,some,commas,,,"}, + {"attributeposition": "3", "value": "testing\\backslash/support"}, + {"attributeposition": "4", "value": 'Lot\'s"of"apostrophes'}, + ], + "position": "1", + } + ] + self.assertListEqual(expected_item, response["item"]) + + def test_save_csv2dataset_no_header(self): + timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S") + with TemporaryDirectory() as work_dir: + work_dir = Path(work_dir) + sample_file = work_dir / "sample_table.csv" + self.sample_data.write_csv(sample_file, include_header=False) + + bfabric_save_csv2dataset( + self.mock_client, + csv_file=sample_file, + dataset_name=f"test_dataset {timestamp}", + container_id=3000, + workunit_id=None, + sep=",", + has_header=False, + ) + + # check the result + time.sleep(1) + response = self.mock_client.read("dataset", {"name": f"test_dataset {timestamp}"}).to_list_dict()[0] + self.created_entities.append(("dataset", response["id"])) + + expected_attribute = [ + {"name": "Column_1", "position": "1", "type": "String"}, + {"name": "Column_2", "position": "2", "type": "String"}, + {"name": "Column_3", "position": "3", "type": "String"}, + {"name": "Column_4", "position": "4", "type": "String"}, + ] + self.assertListEqual(expected_attribute, response["attribute"]) + + expected_item = [ + { + "field": [ + {"attributeposition": "1", "value": "just a normal string"}, + {"attributeposition": "2", "value": "contains,some,commas,,,"}, + {"attributeposition": "3", "value": "testing\\backslash/support"}, + {"attributeposition": "4", "value": 'Lot\'s"of"apostrophes'}, + ], + "position": "1", + } + ] + self.assertListEqual(expected_item, response["item"]) + + +if __name__ == "__main__": + unittest.main() diff --git a/bfabric/tests/integration/scripts/test_save_dataset2csv.py b/bfabric/tests/integration/scripts/test_save_dataset2csv.py new file mode 100644 index 00000000..639a7268 --- /dev/null +++ b/bfabric/tests/integration/scripts/test_save_dataset2csv.py @@ -0,0 +1,50 @@ +import unittest +from pathlib import Path +from tempfile import TemporaryDirectory + +import polars as pl +import polars.testing + +from bfabric import Bfabric +from bfabric.scripts.bfabric_save_dataset2csv import bfabric_save_dataset2csv + + +class TestSaveDataset2Csv(unittest.TestCase): + def setUp(self): + self.mock_client = Bfabric.from_config(config_env="TEST", verbose=True) + self.dataset_id = 46184 + + def test_save_dataset2csv(self): + with TemporaryDirectory() as temp_dir: + out_dir = Path(temp_dir) + bfabric_save_dataset2csv( + self.mock_client, self.dataset_id, out_dir, out_filename=Path("dataset.csv"), sep="," + ) + + expected_lines = [ + r"Normal,Comma,Backslash,Apostrophe", + r"""just a normal string,"contains,some,commas,,,",testing\backslash/support,"Lot's""of""apostrophes""" + '"', + ] + + out_file = out_dir / "dataset.csv" + actual_lines = out_file.read_text().splitlines() + + self.assertListEqual(expected_lines, actual_lines) + + df = pl.read_csv(out_file) + expected_df = pl.DataFrame( + [ + { + "Normal": "just a normal string", + "Comma": "contains,some,commas,,,", + "Backslash": "testing\\backslash/support", + "Apostrophe": 'Lot\'s"of"apostrophes', + } + ] + ) + pl.testing.assert_frame_equal(expected_df, df) + + +if __name__ == "__main__": + unittest.main() diff --git a/bfabric/tests/integration/scripts/test_upload_resource.py b/bfabric/tests/integration/scripts/test_upload_resource.py new file mode 100644 index 00000000..8be63682 --- /dev/null +++ b/bfabric/tests/integration/scripts/test_upload_resource.py @@ -0,0 +1,78 @@ +import contextlib +import datetime +import hashlib +import json +import unittest +from io import StringIO +from pathlib import Path +from tempfile import TemporaryDirectory + +from bfabric import Bfabric +from bfabric.scripts.bfabric_upload_resource import bfabric_upload_resource +from bfabric.tests.integration.integration_test_helper import DeleteEntities + + +class TestUploadResource(unittest.TestCase): + def setUp(self): + self.client = Bfabric.from_config(config_env="TEST", verbose=True) + self.delete_results = DeleteEntities(client=self.client, created_entities=[]) + self.addCleanup(self.delete_results) + self.container_id = 3000 + + self.ts = datetime.datetime.now().isoformat() + + def _create_workunit(self): + # create workunit + workunit = self.client.save( + "workunit", {"containerid": self.container_id, "name": f"Testing {self.ts}", "applicationid": 1} + ).to_list_dict()[0] + self.delete_results.created_entities.append(("workunit", workunit["id"])) + return workunit["id"] + + def test_upload_resource(self): + with TemporaryDirectory() as work_dir: + work_dir = Path(work_dir) + file = work_dir / "test.txt" + file.write_text("Hello World!") + + workunit_id = self._create_workunit() + + # upload resource + out_text = StringIO() + with contextlib.redirect_stdout(out_text): + bfabric_upload_resource(client=self.client, filename=file, workunit_id=workunit_id) + resp = json.loads(out_text.getvalue())[0] + + # expected checksum + expected_checksum = hashlib.md5(file.read_bytes()).hexdigest() + + # check resource + resource = self.client.read("resource", {"id": resp["id"]}).to_list_dict()[0] + self.assertEqual(file.name, resource["name"]) + self.assertEqual("base64 encoded file", resource["description"]) + self.assertEqual(expected_checksum, resource["filechecksum"]) + + def test_upload_resource_when_already_exists(self): + with TemporaryDirectory() as work_dir: + work_dir = Path(work_dir) + file = work_dir / "test.txt" + file.write_text("Hello World!") + + workunit_id = self._create_workunit() + + # upload resource + out_text = StringIO() + with contextlib.redirect_stdout(out_text): + bfabric_upload_resource(client=self.client, filename=file, workunit_id=workunit_id) + resp = json.loads(out_text.getvalue())[0] + self.assertEqual(workunit_id, resp["workunit"]["id"]) + + # upload resource again + with self.assertRaises(RuntimeError) as error: + bfabric_upload_resource(client=self.client, filename=file, workunit_id=workunit_id) + + self.assertIn("Resource with the specified attribute combination already exists", str(error.exception)) + + +if __name__ == "__main__": + unittest.main() diff --git a/bfabric/tests/integration/test_bfabric2_bad_requests.py b/bfabric/tests/integration/test_bfabric2_bad_requests.py new file mode 100755 index 00000000..a45f45ed --- /dev/null +++ b/bfabric/tests/integration/test_bfabric2_bad_requests.py @@ -0,0 +1,54 @@ +import json +import os +import unittest + +from bfabric import Bfabric, BfabricAPIEngineType +from bfabric.errors import BfabricRequestError + + +class BfabricTestBadRequest(unittest.TestCase): + def setUp(self): + # Load ground truth + path = os.path.join(os.path.dirname(__file__), "groundtruth.json") + with open(path) as json_file: + self.ground_truth = json.load(json_file) + + # Create clients + self.clients = { + "zeep": Bfabric.from_config("TEST", engine=BfabricAPIEngineType.ZEEP), + "suds": Bfabric.from_config("TEST", engine=BfabricAPIEngineType.SUDS), + } + + def _test_non_existing_read(self, engine_name: str): + # NOTE: Currently a bad read request simply returns no matches, but does not throw errors + res = self.clients[engine_name].read("user", {"id": "cat"}).to_list_dict() + self.assertEqual([], res) + + def _test_forbidden_save(self, engine_name: str): + # Test what happens if we save to an endpoint that does not accept saving + self.assertRaises(BfabricRequestError, self.clients[engine_name].save, "project", {"name": "TheForbiddenPlan"}) + + def _test_wrong_delete(self, engine_name: str): + self.assertRaises(RuntimeError, self.clients[engine_name].delete, "workunit", 101010101010101) + + def test_non_existing_read_when_suds(self): + self._test_non_existing_read("suds") + + def test_non_existing_read_when_zeep(self): + self._test_non_existing_read("zeep") + + def test_forbidden_save_when_suds(self): + self._test_forbidden_save("suds") + + def test_forbidden_save_when_zeep(self): + self._test_forbidden_save("zeep") + + def test_wrong_delete_when_suds(self): + self._test_wrong_delete("suds") + + def test_wrong_delete_when_zeep(self): + self._test_wrong_delete("zeep") + + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/bfabric/tests/integration/test_bfabric2_exists.py b/bfabric/tests/integration/test_bfabric2_exists.py new file mode 100644 index 00000000..d3a69c75 --- /dev/null +++ b/bfabric/tests/integration/test_bfabric2_exists.py @@ -0,0 +1,20 @@ +import unittest + +from bfabric import BfabricAPIEngineType, Bfabric + + +class BfabricTestExists(unittest.TestCase): + def _test_single_exists(self, engine: BfabricAPIEngineType): + client = Bfabric.from_config("TEST", engine=engine) + res = client.exists("dataset", "id", 30721) + self.assertEqual(res, True) + + def test_zeep(self): + self._test_single_exists(engine=BfabricAPIEngineType.ZEEP) + + def test_suds(self): + self._test_single_exists(engine=BfabricAPIEngineType.SUDS) + + +if __name__ == "__main__": + pass diff --git a/bfabric/tests/integration/test_bfabric2_read.py b/bfabric/tests/integration/test_bfabric2_read.py new file mode 100755 index 00000000..8069be45 --- /dev/null +++ b/bfabric/tests/integration/test_bfabric2_read.py @@ -0,0 +1,95 @@ +import json +import os +import unittest + +from bfabric import Bfabric, BfabricAuth, BfabricAPIEngineType + + +class BfabricTestRead(unittest.TestCase): + def setUp(self, *args, **kwargs): + # Load ground truth + path = os.path.join(os.path.dirname(__file__), "groundtruth.json") + with open(path) as json_file: + self.ground_truth = json.load(json_file) + + # Create clients + self.clients = { + "zeep": Bfabric.from_config("TEST", engine=BfabricAPIEngineType.ZEEP), + "suds": Bfabric.from_config("TEST", engine=BfabricAPIEngineType.SUDS), + } + + def read(self, engine: str, endpoint: str): + """Executes read queries for `endpoint` and compares results with ground truth.""" + with self.subTest(engine=engine): + bf = self.clients[engine] + self.assertIn(endpoint, self.ground_truth) + for query, ground_truth in self.ground_truth[endpoint]: + res = bf.read(endpoint=endpoint, obj=query).to_list_dict() + + # print(query, res) + + self.assertEqual(len(res), 1) # Expecting only one query result in all cases + for gt_attr, gt_value in ground_truth.items(): + self.assertEqual(str(gt_value), str(res[0][gt_attr])) + + def _test_empty_project(self, engine: str): + with self.subTest(engine=engine): + bf = self.clients[engine] + res = bf.read(endpoint="project", obj={"name": "this project does not exist"}).to_list_dict() + self.assertEqual(res, []) + + def test_user(self): + self.read("suds", "user") + self.read("zeep", "user") + + def test_container(self): + self.read("suds", "container") + self.read("zeep", "container") + + def test_project(self): + self.read("suds", "project") + # self.read("zeep", "project") # FIXME: Zeep does not parse name correctly for project queries + + def test_project_when_not_exists(self): + self._test_empty_project("zeep") + self._test_empty_project("suds") + + def test_application(self): + self.read("suds", "application") + self.read("zeep", "application") + + def test_sample(self): + self.read("suds", "sample") + self.read("zeep", "sample") + + def test_workunit(self): + self.read("suds", "workunit") + self.read("zeep", "workunit") + + def test_resource(self): + self.read("suds", "resource") + self.read("zeep", "resource") + + def test_executable(self): + self.read("suds", "executable") + self.read("zeep", "executable") + + def test_annotation(self): + self.read("suds", "annotation") + self.read("zeep", "annotation") + + def test_invalid_auth(self): + auth = BfabricAuth(login=self.clients["suds"].auth.login, password="invalid_password") + clients = { + "zeep": Bfabric.from_config("TEST", auth=auth, engine=BfabricAPIEngineType.ZEEP), + "suds": Bfabric.from_config("TEST", auth=auth, engine=BfabricAPIEngineType.SUDS), + } + for engine, bf in clients.items(): + with self.subTest(engine=engine): + with self.assertRaises(RuntimeError) as e: + bf.read(endpoint="workunit", obj={}) + self.assertIn("Invalid login or password. Could not login.", str(e.exception)) + + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/bfabric/tests/integration/test_bfabric2_read_pagination.py b/bfabric/tests/integration/test_bfabric2_read_pagination.py new file mode 100644 index 00000000..135b76d6 --- /dev/null +++ b/bfabric/tests/integration/test_bfabric2_read_pagination.py @@ -0,0 +1,59 @@ +import logging +import unittest +import pandas as pd + +from bfabric import BfabricAPIEngineType, Bfabric +from bfabric.bfabric import get_system_auth +from bfabric.results.pandas_helper import list_dict_to_df + + +def _calc_query(config, auth, engine: BfabricAPIEngineType, endpoint: str, max_results: int = 300) -> pd.DataFrame: + print("Sending query via", engine) + b = Bfabric(config, auth, engine=engine) + + response_class = b.read(endpoint, {}, max_results=max_results) + response_dict = response_class.to_list_dict(drop_empty=True) + return list_dict_to_df(response_dict) + + +class BfabricTestPagination(unittest.TestCase): + def setUp(self): + self.config, self.auth = get_system_auth(config_env="TEST") + + def test_composite_user(self): + endpoint = "user" + max_results = 300 + + # Test SUDS + print("Testing if SUDS returns the requested number of entries") + resp_df_suds = _calc_query(self.config, self.auth, BfabricAPIEngineType.SUDS, endpoint, max_results=max_results) + assert len(resp_df_suds.index) == max_results + + # Test ZEEP + print("Testing if ZEEP returns the requested number of entries") + resp_df_zeep = _calc_query(self.config, self.auth, BfabricAPIEngineType.ZEEP, endpoint, max_results=max_results) + assert len(resp_df_zeep.index) == max_results + + # Rename suds to remove underscores + # resp_df_suds.rename(columns={"_id": "id", "_classname": "classname"}, inplace=True) + + # Test that columns are exactly the same + print("Testing if SUDS and ZEEP parsed responses have the same root fields") + suds_cols = sorted(resp_df_suds.columns) + zeep_cols = sorted(resp_df_zeep.columns) + assert suds_cols == zeep_cols + + print("Testing if SUDS and ZEEP responses are the same field by field") + mismatch_cols = [] + for col_name in suds_cols: + if not resp_df_suds[col_name].equals(resp_df_zeep[col_name]): + mismatch_cols += [col_name] + + # TODO: Make the test strict if Zeep bug is ever resolved. + self.assertListEqual(["formerproject", "project"], mismatch_cols) + print("SUDS and ZEEP mismatch in", mismatch_cols, "(expected)") + + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + unittest.main() diff --git a/bfabric/tests/integration/test_bfabric2_save_delete.py b/bfabric/tests/integration/test_bfabric2_save_delete.py new file mode 100644 index 00000000..4bb09692 --- /dev/null +++ b/bfabric/tests/integration/test_bfabric2_save_delete.py @@ -0,0 +1,104 @@ +from typing import Tuple +import unittest + +from bfabric import BfabricAPIEngineType, Bfabric +from bfabric.bfabric import get_system_auth +from bfabric.experimental.multi_query import MultiQuery + + +def _find_delete_existing_objects_by_name(b: Bfabric, endpoint: str, name_list: list) -> Tuple[list, list]: + """ + Checks if workunits with requested names exist. Attempts to delete the existing workunits + + :param b: Bfabric instance + :param endpoint: Endpoint + :param name_list: List of names to check + :return: Subset of workunit names that are found to exist, and deletion reports for those workunits + """ + + # 1. Check which objects exist + objs_exist = MultiQuery(b).exists_multi(endpoint, "name", name_list) + objs_exist_names = [name for i, name in enumerate(name_list) if objs_exist[i]] + + if len(objs_exist_names) == 0: + print("No", endpoint, "exists") + return [], [] + else: + print("Already exist:", objs_exist_names) + + ids_to_delete = [] + for name in objs_exist_names: + # 2.1 Get IDs of all existing workunits + response_dict = b.read(endpoint, {"name": name}).to_list_dict() + ids_this = [r["id"] for r in response_dict] + + print("--", name, "exist with ids", ids_this) + ids_to_delete += ids_this + + # Delete + delete_response_dict = b.delete(endpoint, ids_to_delete).to_list_dict(drop_empty=True) + print("Deletion results:", delete_response_dict) + + return objs_exist_names, delete_response_dict + + +class BfabricTestSaveDelete(unittest.TestCase): + def setUp(self): + self.config, self.auth = get_system_auth(config_env="TEST") + + def _save_delete_workunit(self, b: Bfabric, verbose: bool = False) -> None: + """ + Integration test. Attempts to create some work units, then delete them. + - We check whether, after creation, the workunits with the target names are found in the API, + and the control workunit is not found (because it is not created) + - We check whether the deletion of the created workunits is successful + + :param b: BFabric Instance + :param verbose: Verbosity + :return: + """ + + endpoint = "workunit" + workunit_names = ["MewThePokemon", "TomMGM", "MinkyLeChat"] + fake_name = "SpikeTheDog" + all_names = workunit_names + [fake_name] + + # 1. Find and delete any workunits with these names, if they already exist + print("Phase 1: Make sure to clean up workunits with target names, if they somehow already exist") + _find_delete_existing_objects_by_name(b, endpoint, all_names) + + # 2. Create some workunits + print("Phase 2: Creating the target units") + new_ids = [] + for name in workunit_names: + query = {"name": name, "applicationid": 2, "description": "is warm and fluffy", "containerid": 3000} + response = b.save("workunit", query).to_list_dict() # We do the conversion to drop underscores in SUDS + if verbose: + print(response[0]) + + self.assertEqual(len(response), 1, msg="Expected a single response from a single saved workunit") + new_ids += [response[0]["id"]] + + # 3. Find and delete any workunits with these names, now that they have been created + print("Phase 3: Finding and deleting the created work units, checking if they match expectation") + found_names, deleted_responses = _find_delete_existing_objects_by_name(b, endpoint, all_names) + + self.assertEqual( + found_names, workunit_names, msg="Expected the names found in the API to be the ones we just created" + ) + for resp, trg_id in zip(deleted_responses, new_ids): + self.assertEqual(len(resp), 1, msg="Deletion response format unexpected") + self.assertIn("deletionreport", resp, msg="Deletion response format unexpected") + self.assertEqual( + resp["deletionreport"], + "Workunit " + str(trg_id) + " removed successfully.", + msg="Deletion response format unexpected", + ) + + def test_zeep(self): + bZeep = Bfabric(self.config, self.auth, engine=BfabricAPIEngineType.ZEEP) + self._save_delete_workunit(bZeep, verbose=True) + + def test_suds(self): + bSuds = Bfabric(self.config, self.auth, engine=BfabricAPIEngineType.SUDS) + self._save_delete_workunit(bSuds, verbose=True) diff --git a/bfabric/tests/integration/test_bfabric2_time_inequality_query.py b/bfabric/tests/integration/test_bfabric2_time_inequality_query.py new file mode 100755 index 00000000..437496b6 --- /dev/null +++ b/bfabric/tests/integration/test_bfabric2_time_inequality_query.py @@ -0,0 +1,90 @@ +import unittest +from datetime import datetime, timedelta + + +from bfabric.bfabric2 import Bfabric, BfabricAPIEngineType, get_system_auth + + +class BfabricTestTimeInequalityQuery(unittest.TestCase): + def setUp(self): + # Load config and authentication + self.config, self.auth = get_system_auth(config_env="TEST") + + # Init the engines + self.clients = { + "zeep": Bfabric(self.config, self.auth, engine=BfabricAPIEngineType.ZEEP), + "suds": Bfabric(self.config, self.auth, engine=BfabricAPIEngineType.SUDS), + } + + def _test_created_before_after(self, engine: str): + with self.subTest(engine=engine): + n_resources = 50 + bf = self.clients[engine] + + # 0. Create a workunit + query = { + "name": "CatPetter9000", + "applicationid": 1, + "containerid": 3000, + "description": "Best cat petter ever", + } + res = bf.save("workunit", query).to_list_dict() + self.assertIsNotNone(res) + self.assertEqual(1, len(res)) + self.assertIn("id", res[0]) + workunit_id = res[0]["id"] + + # 1. Create a bunch of resources + resource_ids = [] + resource_created = [] + for i_resource in range(n_resources): + query = { + "name": "kitten_" + str(i_resource), + # 'sampleid': 1, + "filechecksum": 0, + "relativepath": "/catpath/kitten_" + str(i_resource), + "size": 0, + "status": "pending", + "storageid": 1, + "workunitid": workunit_id, + } + + res = bf.save("resource", query).to_list_dict() + self.assertIsNotNone(res) + self.assertEquals(len(res), 1) + self.assertIn("id", res[0]) + self.assertIn("created", res[0]) + + resource_ids += [res[0]["id"]] + resource_created += [datetime.fromisoformat(res[0]["created"])] + + # 2. attempt to find the resources we just created by datetime + # NOTE: + query = { + "workunitid": workunit_id, + "createdbefore": str(max(resource_created) + timedelta(seconds=1)), + "createdafter": str(min(resource_created)), + } + results = bf.read("resource", query, return_id_only=True).to_list_dict() + + # 3. delete all created resources. Do this before test not to leave undeleted resources behind if possible + bf.delete("resource", resource_ids) + bf.delete("workunit", workunit_id) + + # 4. Check that the found resources are the ones we created + # NOTE: We might find more resources, if somebody created resources at the same time as us + # Hence, we are testing for a subset, not an exact match + resource_ids_found = [r["id"] for r in results] + isSubset = set(resource_ids).issubset(set(resource_ids_found)) + # if not isSubset: + # print(min(resource_ids), max(resource_ids), set(resource_ids) - set(resource_ids_found), set(resource_ids_found) - set(resource_ids)) + + self.assertTrue(isSubset) + + def test_created(self): + self._test_created_before_after("suds") + self._test_created_before_after("zeep") + + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/bfabric/tests/integration/test_multi_methods.py b/bfabric/tests/integration/test_multi_methods.py new file mode 100644 index 00000000..db7c03f0 --- /dev/null +++ b/bfabric/tests/integration/test_multi_methods.py @@ -0,0 +1,79 @@ +import json +import unittest +from pathlib import Path + +from bfabric import Bfabric, BfabricAPIEngineType +from bfabric.experimental.multi_query import MultiQuery + + +class BfabricTestMulti(unittest.TestCase): + def setUp(self, *args, **kwargs): + # Load ground truth + path = Path(__file__).parent / "groundtruth.json" + with path.open() as json_file: + self.ground_truth = json.load(json_file) + + # Create clients + self.clients = { + "zeep": Bfabric.from_config("TEST", engine=BfabricAPIEngineType.ZEEP), + "suds": Bfabric.from_config("TEST", engine=BfabricAPIEngineType.SUDS), + } + + def _test_multi_read_delete(self, engine: str): + """Creates many workunits + * Test if reading multiple of those workunits works + * Test if exists on multiple workunits works + * Test if deleting multiple workunits works + """ + bf: Bfabric = self.clients[engine] + mq = MultiQuery(bf) + + # 1. Create a bunch of workunits + # Note: we crate more than 100, to make sure pagination works correctly + n_units = 105 + workunit_ids = [] + for i in range(n_units): + query = { + "name": f"fancy_workunit_{i}", + "applicationid": 2, + "description": "is very fancy", + "containerid": 3000, + } + res = bf.save("workunit", query).to_list_dict() + self.assertEqual(len(res), 1) + self.assertIn("id", res[0]) + workunit_ids += [res[0]["id"]] + + # 2. TODO: Make sure that the results are indeed read correctly, not just read + res = mq.read_multi("workunit", {}, "id", workunit_ids, return_id_only=True) + + # 3. Check if correct ones exist and fake one does not + res = mq.exists_multi("workunit", "id", workunit_ids + [10101010101010]) + self.assertEqual(len(res), n_units + 1) + self.assertTrue(all(res[:n_units])) + self.assertFalse(res[n_units]) + + # 4. Delete all workunits at the same time + res = mq.delete_multi("workunit", workunit_ids) + self.assertEqual(len(res), n_units) + + # TODO: Implement me + def _test_multi_read_complex(self, engine: str): + """ + The main idea is to test how BFabric API behaves in case it is given multiple of the same field, + where for each field there is more than one result. + * e.g. for 'id' there is only one result, but for 'status there could be many' + * a test could try to get all files with {'status': ['archived', 'archiving']} that have been recently created, + such that in total there is more than 100 results. + """ + pass + + def test_multi_delete_when_suds(self): + self._test_multi_read_delete("suds") + + def test_multi_delete_when_zeep(self): + self._test_multi_read_delete("zeep") + + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/bfabric/tests/old_integration/__init__.py b/bfabric/tests/old_integration/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/bfabric/tests/old_integration/__test_bfabric.py b/bfabric/tests/old_integration/__test_bfabric.py new file mode 100755 index 00000000..61bca83f --- /dev/null +++ b/bfabric/tests/old_integration/__test_bfabric.py @@ -0,0 +1,94 @@ +#!/usr/bin/env python3 + +""" +unittest by +""" + +import unittest +from bfabric.bfabric_legacy import BfabricLegacy + +""" +ssh localhost "cat > /tmp/bb.py && /usr/bin/python /tmp/bb.py" < PycharmProjects/untitled/bfabric_wsdl.py +""" + + +class BfabricTestCase(unittest.TestCase): + bfapp = BfabricLegacy(verbose=True) + + workunits = [] + samples = [] + + def workunit_save(self): + print("WORKUNIT SAVE") + for name in ["test1", "test2", "test3"]: + res = self.bfapp.save_object( + endpoint="workunit", + obj={ + "name": "unit test - {}".format(name), + "containerid": 3000, + "description": "68b329da9893e34099c7d8ad5cb9c940", + "applicationid": 217, + }, + ) + + self.workunits.append(res[0]._id) + print(res) + print(self.workunits) + + def workunit_read(self): + print("WORKUNIT READ") + res = [self.bfapp.delete_object(endpoint="workunit", id=x)[0] for x in self.workunits] + print(res) + self.assertEqual(len(res), len(self.workunits)) + + def workunit_delete(self): + print("WORKUNIT DELETE") + res = [self.bfapp.delete_object(endpoint="workunit", id=x)[0] for x in self.workunits] + print(res) + self.assertEqual(len(res), len(self.workunits)) + + def sample_save(self): + print("SAVE SAMPLE") + sample_type = "Biological Sample - Proteomics" + species = "n/a" + for name in ["test1", "test2", "test3"]: + res = self.bfapp.save_object( + endpoint="sample", + obj={ + "name": "unit test - {} - {}".format(name, sample_type), + "containerid": 3000, + "type": sample_type, + "species": species, + "samplingdate": "2017-10-12", + "groupingvar": "A", + "description": "68b329da9893e34099c7d8ad5cb9c940", + }, + ) + + # print(res[0]._id) + print("=== BEGIN DEBUG") + for i in res: + print(i) + self.samples.append(res[0]._id) + print("=== END DEBUG") + + def sample_delete(self): + print("SAMPLE DELETE") + print(self.samples) + res = [self.bfapp.delete_object(endpoint="sample", id=x)[0] for x in self.samples] + # res = [x for x in res if "removed successfully." in x.deletionreport] + print(res) + self.assertEqual(len(res), len(self.samples)) + + def test_workunit(self): + self.workunit_save() + self.workunit_read() + self.workunit_delete() + + def test_sample(self): + self.sample_save() + self.sample_delete() + + +if __name__ == "__main__": + unittest.main() diff --git a/bfabric/tests/groundtruth.json b/bfabric/tests/old_integration/groundtruth.json similarity index 100% rename from bfabric/tests/groundtruth.json rename to bfabric/tests/old_integration/groundtruth.json diff --git a/bfabric/tests/old_integration/test_bfabric_executable.py b/bfabric/tests/old_integration/test_bfabric_executable.py new file mode 100755 index 00000000..29f565e9 --- /dev/null +++ b/bfabric/tests/old_integration/test_bfabric_executable.py @@ -0,0 +1,102 @@ +#!/usr/bin/env python3 + +""" +unittest by +""" + +import base64 +import unittest +import bfabric +import os +import json + +import bfabric.bfabric_legacy + + +class bfabricEncoder(json.JSONEncoder): + def default(self, o): + try: + return dict(o) + except TypeError: + pass + else: + return list(o) + return JSONEncoder.default(self, o) + + +class BfabricTestCase(unittest.TestCase): + + endpoint = {} + + def __init__(self, *args, **kwargs): + super(BfabricTestCase, self).__init__(*args, **kwargs) + + self.B = bfabric.bfabric_legacy.BfabricLegacy(verbose=False) + + for e in ["executable", "sample", "application", "workunit", "resource"]: + self.endpoint[e] = [] + + def delete_endpoint_entries(self, endpoint=None): + res = [self.B.delete_object(endpoint=endpoint, id=x._id)[0] for x in self.endpoint[endpoint]] + # print(json.dumps(res, cls=bfabricEncoder, indent=2)) + res = [x for x in res if "removed successfully." in x.deletionreport] + self.assertEqual(len(res), len(self.endpoint[endpoint])) + + def test_executable(self, filename=os.path.abspath(__file__)): + wu_res = self.B.save_object( + endpoint="workunit", + obj={ + "name": "unit test - #{}.".format(1234), + "containerid": 3000, + "description": "unit test", + "applicationid": 61, + }, + ) + self.endpoint["workunit"].extend(wu_res[0]) + # print(json.dumps(wu_res, cls=bfabricEncoder, indent=2)) + # save + with open(filename, "r") as f: + executable = f.read() + + # executable = "echo 'hello, world!'" + input_executable = executable + + input_b64_executable = base64.b64encode(input_executable.encode()).decode() + + query = { + "name": "unit test", + "context": "WORKUNIT", + "parameter": { + "modifiable": "true", + "description": "will be ignored.", + "key": "argument1", + "label": "argument1", + "required": "true", + "type": "string", + "value": "PRX@fgcz-r-028", + }, + "workunitid": wu_res[0]._id, + "description": "python3 unit test executable.", + #'masterexecutableid': 11871, + "base64": input_b64_executable, + } + + self.endpoint["executable"].extend(self.B.save_object("executable", query)[0]) + + # read + for e in self.endpoint["executable"]: + res = self.B.read_object("executable", obj={"id": e._id}) + output_b64_executable = res[0].base64 + + output_executable = base64.b64decode(output_b64_executable.encode()).decode() + + self.assertEqual(input_b64_executable, output_b64_executable) + self.assertEqual(input_executable, output_executable) + + # delete + self.delete_endpoint_entries(endpoint="executable") + self.delete_endpoint_entries(endpoint="workunit") + + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/bfabric/tests/test_bfabric_functional.py b/bfabric/tests/old_integration/test_bfabric_functional.py similarity index 65% rename from bfabric/tests/test_bfabric_functional.py rename to bfabric/tests/old_integration/test_bfabric_functional.py index c29261be..d6de2f50 100755 --- a/bfabric/tests/test_bfabric_functional.py +++ b/bfabric/tests/old_integration/test_bfabric_functional.py @@ -1,5 +1,4 @@ #!/usr/bin/env python3 -# -*- coding: latin1 -*- # Modified to use Slurm on November 9th 2020 @@ -15,53 +14,58 @@ import logging import time +import bfabric.bfabric_legacy +import bfabric.wrapper_creator.bfabric_submitter +import bfabric.wrapper_creator.bfabric_wrapper_creator -logging.basicConfig(filename="test_functional.log", - filemode='a', - format='%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s', - datefmt='%H:%M:%S', - level=logging.DEBUG) +logging.basicConfig( + filename="test_functional.log", + filemode="a", + format="%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s", + datefmt="%H:%M:%S", + level=logging.DEBUG, +) -class BfabricFunctionalTestCase(unittest.TestCase): +class BfabricFunctionalTestCase(unittest.TestCase): externaljobid = 0 def __init__(self, *args, **kwargs): super(BfabricFunctionalTestCase, self).__init__(*args, **kwargs) - def test_wrappercreator_submitter(self): logging.info("XXX start functional testing") - B = bfabric.Bfabric() + B = bfabric.bfabric_legacy.BfabricLegacy() logging.info("Running functional test on bfabricPy") msg = "This test case requires user 'pfeeder'." - self.assertEqual(B.auth.login, 'pfeeder', msg) + self.assertEqual(B.auth.login, "pfeeder", msg) msg = "This test case requires a bfabric test system!" self.assertIn("bfabric-test", B.config.base_url, msg) # TODO # create input resource - # 0. THIS IS ALL DONE PRIOR TO THE APPLICATION LAUNCH # 0.1 logging.info("Creating a new executable for the test application") - executable = B.save_object("executable", obj={"name": "exec_func_test", "context": "APPLICATION", "program": "/usr/bin/wc"}) + executable = B.save_object( + "executable", obj={"name": "exec_func_test", "context": "APPLICATION", "program": "/usr/bin/wc"} + ) try: if executable[0].errorreport: logging.error("Error while creating the executable") - logging.info('Errorreport present: {}'.format(executable[0].errorreport)) + logging.info("Errorreport present: {}".format(executable[0].errorreport)) raise except: - logging.info('Executable successfully created') + logging.info("Executable successfully created") try: executableid = int(executable[0]._id) logging.info("executableid = {}".format(executableid)) except: - logging.error('Error while getting the executable id') + logging.error("Error while getting the executable id") msg = "executableid should be a positig integer." self.assertTrue(executableid > 0, msg) @@ -73,46 +77,63 @@ def test_wrappercreator_submitter(self): # The executable for submitterid=11 has been created in the test system running the following script: # ./bfabric_upload_submitter_executable.py bfabric_executable_submitter_functionalTest.py slurm --name "Dummy_-_yaml___Slurm_executable" --description "test new submitter's parameters" # Note that the executable bfabric_executable_submitter_functionalTest.py only prints "Dummy submitter executable defined for the bfabricPy functional test". - application = B.save_object("application", obj={"name": "appl_func_test", 'type': 'Analysis', 'technologyid': 2, 'description': "Application functional test", 'executableid': executableid, "wrappercreatorid": 8, "submitterid": 11, 'storageid': 1, 'outputfileformat': 'txt'}) - try: + application = B.save_object( + "application", + obj={ + "name": "appl_func_test", + "type": "Analysis", + "technologyid": 2, + "description": "Application functional test", + "executableid": executableid, + "wrappercreatorid": 8, + "submitterid": 11, + "storageid": 1, + "outputfileformat": "txt", + }, + ) + try: if application[0].errorreport: logging.error("Error while creating the application") - logging.info('Errorreport present: {}'.format(application[0].errorreport)) + logging.info("Errorreport present: {}".format(application[0].errorreport)) raise except: - logging.info('Application successfully created') + logging.info("Application successfully created") try: applicationid = int(application[0]._id) logging.info("applicationid = {}".format(applicationid)) except: - logging.error('Error while getting the application id') + logging.error("Error while getting the application id") raise msg = "applicationid should be a positig integer." self.assertTrue(applicationid > 0, msg) - - # 1. THIS CODE SNIPPET IS TRIGGERED BY THE BFABRIC SYSTEM AFTER THE USER RUN THE APPLICATION + # 1. THIS CODE SNIPPET IS TRIGGERED BY THE BFABRIC SYSTEM AFTER THE USER RUN THE APPLICATION # 1.1 logging.info("Creating new workunit connecting the test application executable to the execution anvironment") - workunit = B.save_object("workunit", - obj={"name": "unit test run - bfabricPy", - "status": "PENDING", 'containerid': 3061, - 'applicationid': applicationid, - 'description': "https://github.com/fgcz/bfabricPy/blob/iss27/bfabric/tests/test_bfabric_functional.py", - 'inputdatasetid': 32428}) + workunit = B.save_object( + "workunit", + obj={ + "name": "unit test run - bfabricPy", + "status": "PENDING", + "containerid": 3061, + "applicationid": applicationid, + "description": "https://github.com/fgcz/bfabricPy/blob/iss27/bfabric/tests/test_bfabric_functional.py", + "inputdatasetid": 32428, + }, + ) try: if workunit[0].errorreport: - logging.error('Error while creating workunit') - logging.info('Errorreport present: {}'.format(workunit[0].errorreport)) + logging.error("Error while creating workunit") + logging.info("Errorreport present: {}".format(workunit[0].errorreport)) raise except: - logging.info('Workunit successfully created') + logging.info("Workunit successfully created") try: workunitid = int(workunit[0]._id) logging.info("workunit = {}".format(workunitid)) except: - logging.error('Error while getting the workunit id') + logging.error("Error while getting the workunit id") raise msg = "workunitid should be a positig integer." @@ -123,14 +144,17 @@ def test_wrappercreator_submitter(self): logging.info("Creating new externaljob for the WrapperCreator executable") # Here a precomputed test executable is replacing the wrappercreatorid in the application definition wrapper_creator_executableid = 16374 - externaljob_wc = B.save_object("externaljob", obj={'workunitid': workunitid, 'action': 'CREATE', 'executableid': wrapper_creator_executableid}) + externaljob_wc = B.save_object( + "externaljob", + obj={"workunitid": workunitid, "action": "CREATE", "executableid": wrapper_creator_executableid}, + ) try: if externaljob_wc[0].errorreport: - logging.error('Error while creating externaljob_wc') - logging.info('Errorreport present: {}'.format(externaljob_wc[0].errorreport)) + logging.error("Error while creating externaljob_wc") + logging.info("Errorreport present: {}".format(externaljob_wc[0].errorreport)) raise except: - logging.info('Externaljob_wc successfully created') + logging.info("Externaljob_wc successfully created") try: externaljobid_wc = int(externaljob_wc[0]._id) logging.info("externaljob = {}".format(externaljobid_wc)) @@ -153,7 +177,7 @@ def test_wrappercreator_submitter(self): ## this information is contained in the application definition try: - W = bfabric.BfabricWrapperCreator(externaljobid=externaljobid_wc) + W = wrapper_creator.bfabric_wrapper_creator.BfabricWrapperCreator(externaljobid=externaljobid_wc) W.write_yaml() # TODO(cp): write getter of execuableid except: @@ -161,13 +185,17 @@ def test_wrappercreator_submitter(self): logging.info("Checking if wrapper creator's externaljob with id={} was set to 'done'".format(externaljobid_wc)) try: - res = B.read_object('externaljob', {'id': externaljobid_wc, 'status':'DONE'}) - self.assertEqual(res[0].status, 'done', 'set externaljob id={} of wrapper creator failed.'.format(externaljobid_wc)) + res = B.read_object("externaljob", {"id": externaljobid_wc, "status": "DONE"}) + self.assertEqual( + res[0].status, "done", "set externaljob id={} of wrapper creator failed.".format(externaljobid_wc) + ) except: logging.error("Error while setting wrapper creator's externaljob status to done") # 2.3 - logging.info("Fetching the id of the yaml_workunit_externaljob in order to set it as DONE at the end of this functional test") + logging.info( + "Fetching the id of the yaml_workunit_externaljob in order to set it as DONE at the end of this functional test" + ) try: # The method W.get_externaljobid_yaml_workunit() returns the external job with Action=WORKUNIT externaljobid_yaml_workunit = W.get_externaljobid_yaml_workunit() @@ -179,7 +207,9 @@ def test_wrappercreator_submitter(self): # 3.1 logging.info("Fetching the submitter's externaljob automatically triggered by B-Fabric") try: - externaljobid_submitter = B.read_object('externaljob', {'cliententityid': workunitid, "action": "SUBMIT", 'cliententityclass': 'Workunit'})[0]._id + externaljobid_submitter = B.read_object( + "externaljob", {"cliententityid": workunitid, "action": "SUBMIT", "cliententityclass": "Workunit"} + )[0]._id logging.info("externaljobid for submitter is {}.".format(externaljobid_submitter)) except: logging.error("Error while fetching the id of the submitter's externaljob") @@ -188,7 +218,9 @@ def test_wrappercreator_submitter(self): logging.info("Executing the Submitter executable: function submitter_yaml from BfabricSubmitter") # Submitter executable is supposed to download all workunit executables and submit them. # When finished successfully, the status of its external job is set to done, else to failed. - S = bfabric.BfabricSubmitter(externaljobid=externaljobid_submitter, SCHEDULEROOT="/usr/", scheduler="Slurm") + S = wrapper_creator.bfabric_submitter.BfabricSubmitter( + externaljobid=externaljobid_submitter, SCHEDULEROOT="/usr/", scheduler="Slurm" + ) ## this information is contained in the application definition try: S.submitter_yaml() @@ -199,10 +231,12 @@ def test_wrappercreator_submitter(self): time.sleep(10) logging.info("Checking if submitter's externaljob with id={} was set to 'done'".format(externaljobid_submitter)) try: - #res = B.read_object('externaljob', {'id': externaljobid_submitter, 'status': 'DONE'}) - res = B.read_object('externaljob', {'id': externaljobid_submitter}) + # res = B.read_object('externaljob', {'id': externaljobid_submitter, 'status': 'DONE'}) + res = B.read_object("externaljob", {"id": externaljobid_submitter}) logging.info("Status of externaljob for submitter {}".format(res[0].status)) - self.assertEqual(res[0].status, 'done', 'submitter externaljob with id={} failed.'.format(externaljobid_submitter)) + self.assertEqual( + res[0].status, "done", "submitter externaljob with id={} failed.".format(externaljobid_submitter) + ) except: logging.error("Error while setting submitter externaljob status to DONE") raise @@ -210,10 +244,16 @@ def test_wrappercreator_submitter(self): # 4. SETTING YAML_WORKUNIT_EXTERNALJOB TO DONE logging.info("Setting the yaml_workunit_externaljob created by the WrapperCreator to 'done'") try: - res = B.save_object(endpoint='externaljob', obj={'id': externaljobid_yaml_workunit, 'status': 'done'}) - logging.info("Checking if WORKUNIT's externaljob with id={} was set to 'done'".format(externaljobid_yaml_workunit)) - res = B.read_object('externaljob', {'id': externaljobid_yaml_workunit, 'status':'DONE'}) - self.assertEqual(res[0].status, 'done', 'yaml_workunit_externaljob with id={} failed.'.format(externaljobid_yaml_workunit)) + res = B.save_object(endpoint="externaljob", obj={"id": externaljobid_yaml_workunit, "status": "done"}) + logging.info( + "Checking if WORKUNIT's externaljob with id={} was set to 'done'".format(externaljobid_yaml_workunit) + ) + res = B.read_object("externaljob", {"id": externaljobid_yaml_workunit, "status": "DONE"}) + self.assertEqual( + res[0].status, + "done", + "yaml_workunit_externaljob with id={} failed.".format(externaljobid_yaml_workunit), + ) except: logging.error("Error while setting yaml_workunit externaljob status to done") @@ -229,11 +269,10 @@ def test_wrappercreator_submitter(self): time.sleep(1) logging.info("end processing job.") - logging.info("Deleting superfluous resources of test run workunit.") - res = B.read_object('workunit', {'id', workunit[0]._id})[0] + res = B.read_object("workunit", {"id", workunit[0]._id})[0] for i in res.resource: - resdel = B.delete_object('resource', i._id) + resdel = B.delete_object("resource", i._id) self.assertIn("removed successfully", resdel[0].deletionreport) logging.info("deleted resource id={}.".format(i._id)) @@ -245,44 +284,42 @@ def test_wrappercreator_submitter(self): # 6. THIS LINE IS CALLED WHEN THE APPLICATION IS DONE logging.info(f"set workunit {workunitid} status available.") - res = B.save_object('workunit', {'id': workunitid, 'status': 'available'}) + res = B.save_object("workunit", {"id": workunitid, "status": "available"}) logging.info("Cleanup for the python test: whatever is possible to be removed") logging.info(f"trying to delete executable {executableid} [expect to fail].") - res = B.delete_object('executable', executableid) + res = B.delete_object("executable", executableid) self.assertNotIn("removed successfully", res[0].deletionreport) logging.info(f"trying to delete application {applicationid} [expect to fail; since we have a workunit].") - res = B.delete_object('application', applicationid) + res = B.delete_object("application", applicationid) self.assertNotIn("removed successfully", res[0].deletionreport) - logging.info(f"trying to delete submitter externaljob {externaljobid_submitter} [expect to fail].") - res = B.delete_object('externaljob', externaljobid_submitter) + res = B.delete_object("externaljob", externaljobid_submitter) msg = "should fail" self.assertNotIn("removed successfully", res[0].deletionreport) logging.info(f"trying to delete workunit {workunitid}.") - res = B.delete_object('workunit', workunitid) + res = B.delete_object("workunit", workunitid) self.assertIn("removed successfully", res[0].deletionreport) logging.info(f"trying to delete submitter executable {executableid} [expect to fail].") - res = B.delete_object('executable', executableid) + res = B.delete_object("executable", executableid) self.assertNotIn("removed successfully", res[0].deletionreport) logging.info(f"trying to delete wrapper creator externaljob {externaljobid_wc} [expect to fail].") - res = B.delete_object('externaljob', externaljobid_wc) + res = B.delete_object("externaljob", externaljobid_wc) self.assertNotIn("removed successfully", res[0].deletionreport) logging.info(f"trying to delete application {applicationid}.") - res = B.delete_object('application', applicationid) + res = B.delete_object("application", applicationid) self.assertIn("removed successfully", res[0].deletionreport) -if __name__ == '__main__': +if __name__ == "__main__": suite = unittest.TestSuite() - suite.addTest(BfabricFunctionalTestCase('test_wrappercreator_submitter')) + suite.addTest(BfabricFunctionalTestCase("test_wrappercreator_submitter")) runner = unittest.TextTestRunner(verbosity=1) - runner.run(suite ) - + runner.run(suite) diff --git a/bfabric/tests/test_bfabric_read.py b/bfabric/tests/old_integration/test_bfabric_read.py similarity index 94% rename from bfabric/tests/test_bfabric_read.py rename to bfabric/tests/old_integration/test_bfabric_read.py index efaae2f1..1c46206e 100755 --- a/bfabric/tests/test_bfabric_read.py +++ b/bfabric/tests/old_integration/test_bfabric_read.py @@ -9,6 +9,7 @@ import unittest import bfabric +import bfabric.bfabric_legacy class BfabricTestCaseReadEndPoints(unittest.TestCase): @@ -17,7 +18,7 @@ def setUpClass(cls): path = os.path.join(os.path.dirname(__file__), "groundtruth.json") with open(path) as json_file: cls.ground_truth = json.load(json_file) - cls.bfapp = bfabric.Bfabric(verbose=False) + cls.bfapp = bfabric.bfabric_legacy.BfabricLegacy(verbose=False) def read(self, endpoint): """Executes read queries for `endpoint` and compares results with ground truth.""" diff --git a/bfabric/tests/old_integration/test_bfabric_sample.py b/bfabric/tests/old_integration/test_bfabric_sample.py new file mode 100755 index 00000000..21b5236d --- /dev/null +++ b/bfabric/tests/old_integration/test_bfabric_sample.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python3 + +""" +unittest by +""" + +import unittest +from bfabric.bfabric_legacy import BfabricLegacy + +""" +ssh localhost "cat > /tmp/bb.py && /usr/bin/python /tmp/bb.py" < PycharmProjects/untitled/bfabric_wsdl.py +""" + + +class BfabricTestCase(unittest.TestCase): + + workunits = [] + samples = [] + + bfapp = BfabricLegacy(verbose=True) + + def sample_save(self): + print("SAVE SAMPLE") + sample_type = "Biological Sample - Proteomics" + species = "n/a" + for name in [1, 2, 3]: + res = self.bfapp.save_object( + endpoint="sample", + obj={ + "name": "unit test - #{} - {}".format(name, sample_type), + "containerid": 3000, + "type": sample_type, + "species": species, + "samplingdate": "2017-10-12", + "groupingvar": "A", + "description": "68b329da9893e34099c7d8ad5cb9c940", + }, + ) + + for i in res: + print(i) + # self.samples.append(res[0].id) + + def sample_delete(self): + print("SAMPLE DELETE") + print(self.samples) + res = [self.bfapp.delete_object(endpoint="sample", id=x)[0] for x in self.samples] + res = [x for x in res if "removed successfully." in x.deletionreport] + print(res) + self.assertEqual(len(res), len(self.samples)) + + def test_sample(self): + self.sample_save() + # self.sample_delete() + + +if __name__ == "__main__": + unittest.main() diff --git a/bfabric/tests/old_integration/test_bfabric_workunit.py b/bfabric/tests/old_integration/test_bfabric_workunit.py new file mode 100755 index 00000000..8f25cc07 --- /dev/null +++ b/bfabric/tests/old_integration/test_bfabric_workunit.py @@ -0,0 +1,169 @@ +#!/usr/bin/env python3 + +""" +unittest by +""" + +import base64 +import unittest +import bfabric +import os +import json +import datetime + +import bfabric.bfabric_legacy + + +class bfabricEncoder(json.JSONEncoder): + def default(self, o): + try: + return dict(o) + except TypeError: + pass + else: + return list(o) + return JSONEncoder.default(self, o) + + +class BfabricTestCase(unittest.TestCase): + + endpoint = {} + + def __init__(self, *args, **kwargs): + super(BfabricTestCase, self).__init__(*args, **kwargs) + + self.bfapp = bfabric.bfabric_legacy.BfabricLegacy(verbose=False) + + for e in ["executable", "sample", "application", "workunit", "resource"]: + self.endpoint[e] = [] + + def resource_save(self, filename, workunitid): + with open(filename, "r") as f: + content = f.read() + + try: + resource_base64 = base64.b64encode(content.encode()) + except: + raise ("error: could not encode content") + + res = self.bfapp.save_object( + "resource", + { + "base64": resource_base64, + "name": os.path.basename(filename), + "description": content, + "workunitid": workunitid, + }, + ) + + self.endpoint["resource"].extend(res[0]) + + def delete_endpoint_entries(self, endpoint=None): + res = [self.bfapp.delete_object(endpoint=endpoint, id=x._id)[0] for x in self.endpoint[endpoint]] + print(json.dumps(res, cls=bfabricEncoder, indent=2)) + res = [x for x in res if "removed successfully." in x.deletionreport] + self.assertEqual(len(res), len(self.endpoint[endpoint])) + + def _01_executable_save(self, filename=os.path.abspath(__file__)): + with open(filename, "r") as f: + executable = f.read() + + query = { + "name": "unit test", + "context": "APPLICATION", + "parameter": { + "modifiable": "true", + "description": "will be ignored.", + "key": "argument1", + "label": "argument1", + "required": "true", + "type": "string", + "value": "PRX@fgcz-r-028", + }, + "description": "python3 unit test executable.", + #'masterexecutableid': 11871, + "base64": base64.b64encode(executable.encode()), + } + + res = self.bfapp.save_object("executable", query)[0] + print(res) + self.endpoint["executable"].extend(res) + + def _02_sample_save(self): + sample_type = "Biological Sample - Proteomics" + species = "n/a" + for name in [1, 2, 3]: + res = self.bfapp.save_object( + endpoint="sample", + obj={ + "name": "unit test - #{}; {} {}".format(name, sample_type, datetime.datetime.now()), + "containerid": 3000, + "type": sample_type, + "species": species, + "samplingdate": "2017-10-12", + "groupingvar": "A", + "description": "68b329da9893e34099c7d8ad5cb9c940", + }, + ) + + print(res[0]) + self.endpoint["sample"].extend(res[0]) + + def _03_application_save(self): + query = { + "name": "unit test", + "description": "68b329da9893e34099c7d8ad5cb9c940", + "type": "Analysis", + "technologyid": 2, + } + + res = self.bfapp.save_object(endpoint="application", obj=query) + print(json.dumps(res, cls=bfabricEncoder, indent=2)) + self.endpoint["application"].extend(res[0]) + + def _04_workunit_save(self): + queue = range(1, 4) + try: + applicationid = self.endpoint["application"][0]._id + except: + applicationid = 61 + for j in queue: + res = self.bfapp.save_object( + endpoint="workunit", + obj={ + "name": "unit test - #{}.".format(j), + "containerid": bfabric.project, + "description": "68b329da9893e34099c7d8ad5cb9c940", + "applicationid": applicationid, + }, + ) + self.endpoint["workunit"].extend(res[0]) + print(json.dumps(self.endpoint["workunit"], cls=bfabricEncoder, indent=2)) + self.resource_save(os.path.abspath(__file__), res[0]._id) + + # self.assertEqual(len(queue), len(self.workunits)) + + def _98_statistics(self): + print("\nsummary:") + for k, v in self.endpoint.items(): + try: + res = [x._id for x in v] + print("{}\n\t{}".format(k, [x._id for x in v])) + except: + pass + + def test_01(self): + self._01_executable_save() + self._02_sample_save() + self._03_application_save() + self._04_workunit_save() + self._98_statistics() + + self.delete_endpoint_entries(endpoint="executable") + self.delete_endpoint_entries(endpoint="sample") + self.delete_endpoint_entries(endpoint="workunit") + # self.delete_endpoint_entries(endpoint='application') + + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/bfabric/tests/test_bfabric_executable.py b/bfabric/tests/test_bfabric_executable.py deleted file mode 100755 index ca2bb294..00000000 --- a/bfabric/tests/test_bfabric_executable.py +++ /dev/null @@ -1,96 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: latin1 -*- - -""" -unittest by -""" - -import base64 -import unittest -import bfabric -import os -import json - -class bfabricEncoder(json.JSONEncoder): - def default(self, o): - try: - return dict(o) - except TypeError: - pass - else: - return list(o) - return JSONEncoder.default(self, o) - -class BfabricTestCase(unittest.TestCase): - - endpoint = {} - - - def __init__(self, *args, **kwargs): - super(BfabricTestCase, self).__init__(*args, **kwargs) - - self.B = bfabric.Bfabric(verbose=False) - - for e in ['executable', 'sample', 'application', 'workunit', 'resource']: - self.endpoint[e] = [] - - def delete_endpoint_entries(self, endpoint=None): - res = [self.B.delete_object(endpoint=endpoint, id=x._id)[0] for x in self.endpoint[endpoint]] - # print(json.dumps(res, cls=bfabricEncoder, indent=2)) - res = [x for x in res if "removed successfully." in x.deletionreport] - self.assertEqual(len(res), len(self.endpoint[endpoint])) - - - def test_executable(self, filename=os.path.abspath(__file__)): - wu_res = self.B.save_object(endpoint='workunit', obj={'name': "unit test - #{}.".format(1234), - 'containerid': 3000, - 'description': 'unit test', - 'applicationid': 61 - }) - self.endpoint['workunit'].append(wu_res[0]) - # print(json.dumps(wu_res, cls=bfabricEncoder, indent=2)) - # save - with open(filename, 'r') as f: - executable = f.read() - - - #executable = "echo 'hello, world!'" - input_executable = executable - - input_b64_executable = base64.b64encode(input_executable.encode()).decode() - - query = { 'name': 'unit test', - 'context': 'WORKUNIT', - 'parameter': {'modifiable': 'true', - 'description': 'will be ignored.', - 'key': 'argument1', - 'label': 'argument1', - 'required': 'true', - 'type':'string', - 'value': 'PRX@fgcz-r-028'}, - 'workunitid': wu_res[0]._id, - 'description': 'python3 unit test executable.', - #'masterexecutableid': 11871, - 'base64': input_b64_executable } - - self.endpoint['executable'].append(self.B.save_object('executable', query)[0]) - - # read - for e in self.endpoint['executable']: - res = self.B.read_object('executable', obj={'id': e._id}) - output_b64_executable = res[0].base64 - - output_executable = base64.b64decode(output_b64_executable.encode()).decode() - - - self.assertEqual(input_b64_executable, output_b64_executable) - self.assertEqual(input_executable, output_executable) - - # delete - self.delete_endpoint_entries(endpoint='executable') - self.delete_endpoint_entries(endpoint='workunit') - - -if __name__ == '__main__': - unittest.main(verbosity=2) - diff --git a/bfabric/tests/test_bfabric_sample.py b/bfabric/tests/test_bfabric_sample.py deleted file mode 100755 index 867b0b25..00000000 --- a/bfabric/tests/test_bfabric_sample.py +++ /dev/null @@ -1,52 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: latin1 -*- - -""" -unittest by -""" - -import unittest -from bfabric import Bfabric - -""" -ssh localhost "cat > /tmp/bb.py && /usr/bin/python /tmp/bb.py" < PycharmProjects/untitled/bfabric_wsdl.py -""" -class BfabricTestCase(unittest.TestCase): - - workunits = [] - samples = [] - - - bfapp = Bfabric(verbose=True) - def sample_save(self): - print("SAVE SAMPLE") - sample_type = 'Biological Sample - Proteomics' - species = "n/a" - for name in [1, 2, 3]: - res = self.bfapp.save_object(endpoint='sample', obj={'name': "unit test - #{} - {}".format(name, sample_type), - 'containerid': 3000, - 'type' : sample_type, - 'species' : species, - 'samplingdate' : "2017-10-12", - 'groupingvar' : "A", - 'description': '68b329da9893e34099c7d8ad5cb9c940' - }) - - for i in res: - print (i) - #self.samples.append(res[0].id) - - def sample_delete(self): - print("SAMPLE DELETE") - print(self.samples) - res = [self.bfapp.delete_object(endpoint='sample', id=x)[0] for x in self.samples] - res = [x for x in res if "removed successfully." in x.deletionreport] - print(res) - self.assertEqual(len(res), len(self.samples)) - - def test_sample(self): - self.sample_save() - # self.sample_delete() - -if __name__ == '__main__': - unittest.main() diff --git a/bfabric/tests/test_bfabric_workunit.py b/bfabric/tests/test_bfabric_workunit.py deleted file mode 100755 index 105045f6..00000000 --- a/bfabric/tests/test_bfabric_workunit.py +++ /dev/null @@ -1,157 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: latin1 -*- - -""" -unittest by -""" - -import base64 -import unittest -import bfabric -import os -import json -import datetime - -class bfabricEncoder(json.JSONEncoder): - def default(self, o): - try: - return dict(o) - except TypeError: - pass - else: - return list(o) - return JSONEncoder.default(self, o) - -class BfabricTestCase(unittest.TestCase): - - endpoint = {} - - - def __init__(self, *args, **kwargs): - super(BfabricTestCase, self).__init__(*args, **kwargs) - - self.bfapp = bfabric.Bfabric(verbose=False) - - for e in ['executable', 'sample', 'application', 'workunit', 'resource']: - self.endpoint[e] = [] - - def resource_save(self, filename, workunitid): - with open(filename, 'r') as f: - content = f.read() - - try: - resource_base64 = base64.b64encode(content.encode()) - except: - raise ("error: could not encode content") - - res = self.bfapp.save_object('resource', - {'base64': resource_base64, - 'name': os.path.basename(filename), - 'description': content, - 'workunitid': workunitid}) - - self.endpoint['resource'].append(res[0]) - - - def delete_endpoint_entries(self, endpoint=None): - res = [ self.bfapp.delete_object(endpoint=endpoint, id=x._id)[0] for x in self.endpoint[endpoint] ] - print(json.dumps(res, cls=bfabricEncoder, indent=2)) - res = [x for x in res if "removed successfully." in x.deletionreport] - self.assertEqual(len(res), len(self.endpoint[endpoint])) - - def _01_executable_save(self, filename=os.path.abspath(__file__)): - with open(filename, 'r') as f: - executable = f.read() - - query = { 'name': 'unit test', - 'context': 'APPLICATION', - 'parameter': {'modifiable': 'true', - 'description': 'will be ignored.', - 'key': 'argument1', - 'label': 'argument1', - 'required': 'true', - 'type':'string', - 'value': 'PRX@fgcz-r-028'}, - 'description': 'python3 unit test executable.', - #'masterexecutableid': 11871, - 'base64': base64.b64encode(executable.encode()) } - - res = self.bfapp.save_object('executable', query)[0] - print (res) - self.endpoint['executable'].append(res) - - def _02_sample_save(self): - sample_type = 'Biological Sample - Proteomics' - species = "n/a" - for name in [1, 2, 3]: - res = self.bfapp.save_object(endpoint='sample', - obj={'name': "unit test - #{}; {} {}".format(name, sample_type, datetime.datetime.now()), - 'containerid': 3000, - 'type' : sample_type, - 'species' : species, - 'samplingdate' : "2017-10-12", - 'groupingvar' : "A", - 'description': '68b329da9893e34099c7d8ad5cb9c940' - }) - - print(res[0]) - self.endpoint['sample'].append(res[0]) - - - def _03_application_save(self): - query={'name': "unit test", - 'description': '68b329da9893e34099c7d8ad5cb9c940', - 'type': "Analysis", - 'technologyid' : 2 - } - - res = self.bfapp.save_object(endpoint='application', obj=query) - print(json.dumps(res, cls=bfabricEncoder, indent=2)) - self.endpoint['application'].append(res[0]) - - - def _04_workunit_save(self): - queue = range(1, 4) - try: - applicationid = self.endpoint['application'][0]._id - except: - applicationid = 61 - for j in queue: - res = self.bfapp.save_object(endpoint='workunit', obj={'name': "unit test - #{}.".format(j), - 'containerid': bfabric.project, - 'description': '68b329da9893e34099c7d8ad5cb9c940', - 'applicationid': applicationid - }) - self.endpoint['workunit'].append(res[0]) - print(json.dumps(self.endpoint['workunit'], cls=bfabricEncoder, indent=2)) - self.resource_save(os.path.abspath(__file__), res[0]._id) - - #self.assertEqual(len(queue), len(self.workunits)) - - - def _98_statistics(self): - print("\nsummary:") - for k, v in self.endpoint.items(): - try: - res = [x._id for x in v] - print ("{}\n\t{}".format(k, [x._id for x in v])) - except: - pass - - def test_01(self): - self._01_executable_save() - self._02_sample_save() - self._03_application_save() - self._04_workunit_save() - self._98_statistics() - - - self.delete_endpoint_entries(endpoint='executable') - self.delete_endpoint_entries(endpoint='sample') - self.delete_endpoint_entries(endpoint='workunit') - #self.delete_endpoint_entries(endpoint='application') - - -if __name__ == '__main__': - unittest.main(verbosity=2) - diff --git a/bfabric/tests/unit/example_config.yml b/bfabric/tests/unit/example_config.yml new file mode 100644 index 00000000..75e7eaec --- /dev/null +++ b/bfabric/tests/unit/example_config.yml @@ -0,0 +1,20 @@ +GENERAL: + default_config: PRODUCTION + +PRODUCTION: + login: my_epic_production_login + password: my_secret_production_password + base_url: https://mega-production-server.uzh.ch/myprod + +TEST: + login: my_epic_test_login + password: my_secret_test_password + base_url: https://mega-test-server.uzh.ch/mytest + application_ids: + Proteomics/CAT_123: 7 + Proteomics/DOG_552: 6 + Proteomics/DUCK_666: 12 + job_notification_emails: john.snow@fgcz.uzh.ch billy.the.kid@fgcz.ethz.ch + +STANDBY: + base_url: https://standby-server.uzh.ch/mystandby \ No newline at end of file diff --git a/bfabric/tests/unit/test_bfabric.py b/bfabric/tests/unit/test_bfabric.py new file mode 100644 index 00000000..4ed44ecf --- /dev/null +++ b/bfabric/tests/unit/test_bfabric.py @@ -0,0 +1,344 @@ +import datetime +import unittest +from functools import cached_property +from unittest.mock import MagicMock, patch, ANY, call + +from bfabric import Bfabric, BfabricAPIEngineType, BfabricConfig +from bfabric.engine.engine_suds import EngineSUDS + + +class TestBfabric(unittest.TestCase): + def setUp(self): + self.mock_config = MagicMock(name="mock_config", spec=BfabricConfig) + self.mock_auth = None + self.mock_engine_type = BfabricAPIEngineType.SUDS + self.mock_engine = MagicMock(name="mock_engine", spec=EngineSUDS) + + @cached_property + def mock_bfabric(self) -> Bfabric: + return Bfabric(config=self.mock_config, auth=self.mock_auth, engine=self.mock_engine_type) + + @patch("bfabric.bfabric.get_system_auth") + @patch("bfabric.bfabric.EngineSUDS") + def test_from_config_when_no_args(self, _mock_engine_suds, mock_get_system_auth): + mock_config = MagicMock(name="mock_config") + mock_auth = MagicMock(name="mock_auth") + mock_get_system_auth.return_value = (mock_config, mock_auth) + client = Bfabric.from_config() + self.assertIsInstance(client, Bfabric) + self.assertEqual(mock_config, client.config) + self.assertEqual(mock_auth, client.auth) + mock_get_system_auth.assert_called_once_with(config_env=None, config_path=None) + + @patch("bfabric.bfabric.get_system_auth") + @patch("bfabric.bfabric.EngineSUDS") + def test_from_config_when_explicit_auth(self, _mock_engine_suds, mock_get_system_auth): + mock_config = MagicMock(name="mock_config") + mock_auth = MagicMock(name="mock_auth") + mock_config_auth = MagicMock(name="mock_config_auth") + mock_get_system_auth.return_value = (mock_config, mock_config_auth) + client = Bfabric.from_config(config_env="TestingEnv", auth=mock_auth) + self.assertIsInstance(client, Bfabric) + self.assertEqual(mock_config, client.config) + self.assertEqual(mock_auth, client.auth) + mock_get_system_auth.assert_called_once_with(config_env="TestingEnv", config_path=None) + + @patch("bfabric.bfabric.get_system_auth") + @patch("bfabric.bfabric.EngineSUDS") + def test_from_config_when_none_auth(self, _mock_engine_suds, mock_get_system_auth): + mock_config = MagicMock(name="mock_config") + mock_auth = MagicMock(name="mock_auth") + mock_get_system_auth.return_value = (mock_config, mock_auth) + client = Bfabric.from_config(config_env="TestingEnv", auth=None) + self.assertIsInstance(client, Bfabric) + self.assertEqual(mock_config, client.config) + with self.assertRaises(ValueError) as error: + _ = client.auth + self.assertIn("Authentication not available", str(error.exception)) + mock_get_system_auth.assert_called_once_with(config_env="TestingEnv", config_path=None) + + @patch("bfabric.bfabric.get_system_auth") + @patch("bfabric.bfabric.EngineSUDS") + def test_from_config_when_engine_suds(self, mock_engine_suds, mock_get_system_auth): + mock_config = MagicMock(name="mock_config") + mock_auth = MagicMock(name="mock_auth") + mock_get_system_auth.return_value = (mock_config, mock_auth) + client = Bfabric.from_config(engine=BfabricAPIEngineType.SUDS) + + self.assertIsInstance(client, Bfabric) + self.assertEqual(mock_config, client.config) + self.assertEqual(mock_auth, client.auth) + self.assertEqual(mock_engine_suds.return_value, client.engine) + mock_get_system_auth.assert_called_once_with(config_env=None, config_path=None) + + mock_engine_suds.assert_called_once_with(base_url=mock_config.base_url) + self.assertEqual(mock_engine_suds.return_value, client.engine) + + @patch("bfabric.bfabric.get_system_auth") + @patch("bfabric.bfabric.EngineZeep") + def test_from_config_when_engine_zeep(self, mock_engine_zeep, mock_get_system_auth): + mock_config = MagicMock(name="mock_config") + mock_auth = MagicMock(name="mock_auth") + mock_get_system_auth.return_value = (mock_config, mock_auth) + client = Bfabric.from_config(engine=BfabricAPIEngineType.ZEEP) + + self.assertIsInstance(client, Bfabric) + self.assertEqual(mock_config, client.config) + self.assertEqual(mock_auth, client.auth) + self.assertEqual(mock_engine_zeep.return_value, client.engine) + mock_get_system_auth.assert_called_once_with(config_env=None, config_path=None) + + mock_engine_zeep.assert_called_once_with(base_url=mock_config.base_url) + self.assertEqual(mock_engine_zeep.return_value, client.engine) + + @patch.object(Bfabric, "print_version_message") + @patch("bfabric.bfabric.get_system_auth") + @patch("bfabric.bfabric.EngineSUDS") + def test_from_config_when_verbose(self, _mock_engine_suds, mock_get_system_auth, mock_print_version_message): + mock_config = MagicMock(name="mock_config") + mock_auth = MagicMock(name="mock_auth") + mock_get_system_auth.return_value = (mock_config, mock_auth) + client = Bfabric.from_config(verbose=True) + mock_print_version_message.assert_called_once_with() + + def test_query_counter(self): + self.assertEqual(0, self.mock_bfabric.query_counter) + + def test_config(self): + self.assertEqual(self.mock_config, self.mock_bfabric.config) + + def test_auth_when_missing(self): + with self.assertRaises(ValueError) as error: + _ = self.mock_bfabric.auth + self.assertIn("Authentication not available", str(error.exception)) + + def test_auth_when_provided(self): + self.mock_auth = MagicMock(name="mock_auth") + self.assertEqual(self.mock_auth, self.mock_bfabric.auth) + + def test_with_auth(self): + mock_old_auth = MagicMock(name="mock_old_auth") + mock_new_auth = MagicMock(name="mock_new_auth") + self.mock_auth = mock_old_auth + with self.mock_bfabric.with_auth(mock_new_auth): + self.assertEqual(mock_new_auth, self.mock_bfabric.auth) + self.assertEqual(mock_old_auth, self.mock_bfabric.auth) + + def test_with_auth_when_exception(self): + mock_old_auth = MagicMock(name="mock_old_auth") + mock_new_auth = MagicMock(name="mock_new_auth") + self.mock_auth = mock_old_auth + try: + with self.mock_bfabric.with_auth(mock_new_auth): + raise ValueError("Test exception") + except ValueError: + pass + self.assertEqual(mock_old_auth, self.mock_bfabric.auth) + + def test_read_when_no_pages_available_and_check(self): + self.mock_auth = MagicMock(name="mock_auth") + with patch.object(self.mock_bfabric, "engine") as mock_engine: + mock_result = MagicMock(name="mock_result", total_pages_api=0, assert_success=MagicMock()) + mock_engine.read.return_value = mock_result + result = self.mock_bfabric.read(endpoint="mock_endpoint", obj="mock_obj") + self.assertEqual(mock_result.get_first_n_results.return_value, result) + mock_engine.read.assert_called_once_with( + endpoint="mock_endpoint", obj="mock_obj", auth=self.mock_auth, page=1, return_id_only=False + ) + mock_result.assert_success.assert_called_once_with() + mock_result.get_first_n_results.assert_called_once_with(100) + + @patch("bfabric.bfabric.compute_requested_pages") + def test_read_when_pages_available_and_check(self, mock_compute_requested_pages): + self.mock_auth = MagicMock(name="mock_auth") + with patch.object(self.mock_bfabric, "engine") as mock_engine: + mock_page_results = [ + MagicMock( + name="mock_page_result_1", + assert_success=MagicMock(), + total_pages_api=3, + errors=[], + ), + MagicMock( + name="mock_page_result_2", + assert_success=MagicMock(), + total_pages_api=3, + errors=[], + ), + MagicMock( + name="mock_page_result_3", + assert_success=MagicMock(), + total_pages_api=3, + errors=[], + ), + ] + mock_page_results[0].__getitem__.side_effect = lambda i: [1, 2, 3, 4, 5][i] + mock_page_results[1].__getitem__.side_effect = lambda i: [6, 7, 8, 9, 10][i] + mock_page_results[2].__getitem__.side_effect = lambda i: [11, 12, 13, 14, 15][i] + + mock_engine.read.side_effect = lambda **kwargs: mock_page_results[kwargs["page"] - 1] + mock_compute_requested_pages.return_value = ([1, 2], 4) + + result = self.mock_bfabric.read(endpoint="mock_endpoint", obj="mock_obj") + + mock_compute_requested_pages.assert_called_once_with( + n_page_total=3, + n_item_per_page=100, + n_item_offset=0, + n_item_return_max=100, + ) + self.assertListEqual([], result.errors) + self.assertListEqual( + [ + call.read( + endpoint="mock_endpoint", obj="mock_obj", auth=self.mock_auth, page=1, return_id_only=False + ), + call.read( + endpoint="mock_endpoint", obj="mock_obj", auth=self.mock_auth, page=2, return_id_only=False + ), + ], + mock_engine.mock_calls, + ) + self.assertEqual(6, len(result)) + self.assertEqual(5, result[0]) + self.assertEqual(10, result[5]) + + def test_save_when_no_auth(self): + endpoint = "test_endpoint" + obj = {"key": "value"} + with patch.object(self.mock_bfabric, "engine") as mock_engine: + with self.assertRaises(ValueError) as error: + self.mock_bfabric.save(endpoint, obj) + self.assertEqual("Authentication not available", str(error.exception)) + mock_engine.save.assert_not_called() + + def test_save_when_auth_and_check_false(self): + endpoint = "test_endpoint" + obj = {"key": "value"} + self.mock_auth = MagicMock(name="mock_auth") + method_assert_success = MagicMock(name="method_assert_success") + with patch.object(self.mock_bfabric, "engine") as mock_engine: + mock_engine.save.return_value.assert_success = method_assert_success + result = self.mock_bfabric.save(endpoint, obj, check=False) + self.assertEqual(mock_engine.save.return_value, result) + method_assert_success.assert_not_called() + mock_engine.save.assert_called_once_with(endpoint=endpoint, obj=obj, auth=self.mock_auth) + + def test_save_when_auth_and_check_true(self): + endpoint = "test_endpoint" + obj = {"key": "value"} + self.mock_auth = MagicMock(name="mock_auth") + method_assert_success = MagicMock(name="method_assert_success") + with patch.object(self.mock_bfabric, "engine") as mock_engine: + mock_engine.save.return_value.assert_success = method_assert_success + result = self.mock_bfabric.save(endpoint, obj) + self.assertEqual(mock_engine.save.return_value, result) + method_assert_success.assert_called_once_with() + mock_engine.save.assert_called_once_with(endpoint=endpoint, obj=obj, auth=self.mock_auth) + + def test_delete_when_no_auth(self): + endpoint = "test_endpoint" + obj = {"key": "value"} + with patch.object(self.mock_bfabric, "engine") as mock_engine: + with self.assertRaises(ValueError) as error: + self.mock_bfabric.delete(endpoint, obj) + self.assertEqual("Authentication not available", str(error.exception)) + mock_engine.delete.assert_not_called() + + def test_delete_when_auth_and_check_false(self): + endpoint = "test_endpoint" + self.mock_auth = MagicMock(name="mock_auth") + method_assert_success = MagicMock(name="method_assert_success") + with patch.object(self.mock_bfabric, "engine") as mock_engine: + mock_engine.delete.return_value.assert_success = method_assert_success + result = self.mock_bfabric.delete(endpoint=endpoint, id=10, check=False) + self.assertEqual(mock_engine.delete.return_value, result) + method_assert_success.assert_not_called() + mock_engine.delete.assert_called_once_with(endpoint=endpoint, id=10, auth=self.mock_auth) + + def test_delete_when_auth_and_check_true(self): + endpoint = "test_endpoint" + self.mock_auth = MagicMock(name="mock_auth") + method_assert_success = MagicMock(name="method_assert_success") + with patch.object(self.mock_bfabric, "engine") as mock_engine: + mock_engine.delete.return_value.assert_success = method_assert_success + result = self.mock_bfabric.delete(endpoint=endpoint, id=10) + self.assertEqual(mock_engine.delete.return_value, result) + method_assert_success.assert_called_once_with() + mock_engine.delete.assert_called_once_with(endpoint=endpoint, id=10, auth=self.mock_auth) + + @patch.object(Bfabric, "read") + def test_exists_when_true(self, method_read): + method_read.return_value.__len__.return_value = 1 + self.assertTrue(self.mock_bfabric.exists(endpoint="test_endpoint", key="key", value="value")) + method_read.assert_called_once_with( + endpoint="test_endpoint", obj={"key": "value"}, max_results=1, check=True, return_id_only=True + ) + + @patch.object(Bfabric, "read") + def test_exists_when_true_and_extra_args(self, method_read): + method_read.return_value.__len__.return_value = 1 + self.assertTrue( + self.mock_bfabric.exists( + endpoint="test_endpoint", key="key", value="value", query={"extra": "arg"}, check=False + ) + ) + method_read.assert_called_once_with( + endpoint="test_endpoint", + obj={"key": "value", "extra": "arg"}, + max_results=1, + check=False, + return_id_only=True, + ) + + @patch.object(Bfabric, "read") + def test_exists_when_false(self, method_read): + method_read.return_value.__len__.return_value = 0 + self.assertFalse(self.mock_bfabric.exists(endpoint="test_endpoint", key="key", value="value")) + method_read.assert_called_once_with( + endpoint="test_endpoint", obj={"key": "value"}, max_results=1, check=True, return_id_only=True + ) + + @patch.object(Bfabric, "save") + def test_upload_resource(self, method_save): + resource_name = "hello_world.txt" + content = b"Hello, World!" + workunit_id = 123 + check = MagicMock(name="check") + self.mock_bfabric.upload_resource(resource_name, content, workunit_id, check) + method_save.assert_called_once_with( + endpoint="resource", + obj={ + "base64": "SGVsbG8sIFdvcmxkIQ==", + "workunitid": 123, + "name": "hello_world.txt", + "description": "base64 encoded file", + }, + check=check, + ) + + def test_get_version_message(self): + self.mock_config.base_url = "dummy_url" + message = self.mock_bfabric.get_version_message() + lines = message.split("\n") + self.assertEqual(2, len(lines)) + # first line + pattern = r"--- bfabricPy v\d+\.\d+\.\d+ \(EngineSUDS, dummy_url, U=None\) ---" + self.assertRegex(lines[0], pattern) + # second line + year = datetime.datetime.now().year + self.assertEqual(f"--- Copyright (C) 2014-{year} Functional Genomics Center Zurich ---", lines[1]) + + @patch("bfabric.bfabric.Console") + @patch.object(Bfabric, "get_version_message") + def test_print_version_message(self, method_get_version_message, mock_console): + mock_stderr = MagicMock(name="mock_stderr") + self.mock_bfabric.print_version_message(stderr=mock_stderr) + mock_console.assert_called_once_with(stderr=mock_stderr, highlighter=ANY, theme=ANY) + mock_console.return_value.print.assert_called_once_with( + method_get_version_message.return_value, style="bright_yellow" + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/bfabric/tests/unit/test_bfabric_config.py b/bfabric/tests/unit/test_bfabric_config.py index bf04eef5..ce976a44 100644 --- a/bfabric/tests/unit/test_bfabric_config.py +++ b/bfabric/tests/unit/test_bfabric_config.py @@ -1,7 +1,8 @@ -import io +import os import unittest +from pathlib import Path -from bfabric.bfabric_config import BfabricConfig, BfabricAuth, parse_bfabricrc_py +from bfabric.bfabric_config import BfabricAuth, BfabricConfig, read_config class TestBfabricAuth(unittest.TestCase): @@ -22,9 +23,22 @@ def setUp(self): base_url="url", application_ids={"app": 1}, ) + self.example_config_path = Path(__file__).parent / "example_config.yml" - def test_with_overrides(self): - new_config = self.config.with_overrides( + def test_default_params_when_omitted(self): + config = BfabricConfig() + self.assertEqual("https://fgcz-bfabric.uzh.ch/bfabric", config.base_url) + self.assertEqual({}, config.application_ids) + self.assertEqual("", config.job_notification_emails) + + def test_default_params_when_specified(self): + config = BfabricConfig(base_url=None, application_ids=None, job_notification_emails=None) + self.assertEqual("https://fgcz-bfabric.uzh.ch/bfabric", config.base_url) + self.assertEqual({}, config.application_ids) + self.assertEqual("", config.job_notification_emails) + + def test_copy_with_overrides(self): + new_config = self.config.copy_with( base_url="new_url", application_ids={"new": 2}, ) @@ -33,51 +47,81 @@ def test_with_overrides(self): self.assertEqual("url", self.config.base_url) self.assertEqual({"app": 1}, self.config.application_ids) - def test_with_replaced_when_none(self): - new_config = self.config.with_overrides(base_url=None, application_ids=None) + def test_copy_with_replaced_when_none(self): + new_config = self.config.copy_with(base_url=None, application_ids=None) self.assertEqual("url", new_config.base_url) self.assertEqual({"app": 1}, new_config.application_ids) self.assertEqual("url", self.config.base_url) self.assertEqual({"app": 1}, self.config.application_ids) - def test_read_bfabricrc_py(self): - input_text = ( - "# Some comment\n" - "_LOGIN = login\n" - "_PASSWD = 'user'\n" - "_UKNOWNKEY = 'value'\n" - "# Another comment\n" - """_WEBBASE = "url"\n""" - """_APPLICATION = {"app": 1}\n""" - """_JOB_NOTIFICATION_EMAILS = "email1 email2"\n""" - ) - file = io.StringIO(input_text) - setattr(file, "name", "/file") + # Testing default initialization + # TODO: Test that logging is consistent with initialization + def test_read_yml_bypath_default(self): + # Ensure environment variable is not available, and the default is environment is loaded + os.environ.pop("BFABRICPY_CONFIG_ENV", None) + + config, auth = read_config(self.example_config_path) + self.assertEqual("my_epic_production_login", auth.login) + self.assertEqual("my_secret_production_password", auth.password) + self.assertEqual("https://mega-production-server.uzh.ch/myprod", config.base_url) + + # Testing environment variable initialization + # TODO: Test that logging is consistent with default config + def test_read_yml_bypath_environment_variable(self): + # Explicitly set the environment variable for this process + os.environ["BFABRICPY_CONFIG_ENV"] = "TEST" + + config, auth = read_config(self.example_config_path) + self.assertEqual("my_epic_test_login", auth.login) + self.assertEqual("my_secret_test_password", auth.password) + self.assertEqual("https://mega-test-server.uzh.ch/mytest", config.base_url) + + # Testing explicit initialization, as well as extra fields (application_ids, job_notification_emails) + # TODO: Test that logging is consistent with default config + def test_read_yml_bypath_all_fields(self): with self.assertLogs(level="INFO") as log_context: - config, auth = parse_bfabricrc_py(file) - self.assertEqual("login", auth.login) - self.assertEqual("user", auth.password) - self.assertEqual("url", config.base_url) - self.assertEqual({"app": 1}, config.application_ids) - self.assertEqual("email1 email2", config.job_notification_emails) - self.assertEqual( - [ - "INFO:bfabric.bfabric_config:Reading configuration from: /file" - ], - log_context.output, - ) + config, auth = read_config(self.example_config_path, config_env="TEST") + + # # Testing log + # self.assertEqual( + # [ + # "INFO:bfabric.bfabric_config:Reading configuration from: example_config.yml" + # "INFO:bfabric.bfabric_config:config environment specified explicitly as TEST" + # ], + # log_context.output, + # ) + + self.assertEqual("my_epic_test_login", auth.login) + self.assertEqual("my_secret_test_password", auth.password) + self.assertEqual("https://mega-test-server.uzh.ch/mytest", config.base_url) + + applications_dict_ground_truth = { + "Proteomics/CAT_123": 7, + "Proteomics/DOG_552": 6, + "Proteomics/DUCK_666": 12, + } - def test_read_bfabricrc_py_when_empty(self): - input_text = "" - file = io.StringIO(input_text) - setattr(file, "name", "/file") + job_notification_emails_ground_truth = "john.snow@fgcz.uzh.ch billy.the.kid@fgcz.ethz.ch" + + self.assertEqual(applications_dict_ground_truth, config.application_ids) + self.assertEqual(job_notification_emails_ground_truth, config.job_notification_emails) + + # Testing that we can load base_url without authentication if correctly requested + def test_read_yml_when_empty_optional(self): with self.assertLogs(level="INFO"): - config, auth = parse_bfabricrc_py(file) + config, auth = read_config(self.example_config_path, config_env="STANDBY") + self.assertIsNone(auth) - self.assertEqual("https://fgcz-bfabric.uzh.ch/bfabric", config.base_url) + self.assertEqual("https://standby-server.uzh.ch/mystandby", config.base_url) self.assertEqual({}, config.application_ids) self.assertEqual("", config.job_notification_emails) + # TODO delete if no mandatory fields are reintroduced + # Test that missing authentication will raise an error if required + # def test_read_yml_when_empty_mandatory(self): + # with self.assertRaises(BfabricConfigError): + # read_config(self.example_config_path, config_env="STANDBY") + def test_repr(self): rep = repr(self.config) self.assertEqual( diff --git a/bfabric/tests/unit/test_dict_helper.py b/bfabric/tests/unit/test_dict_helper.py new file mode 100644 index 00000000..ee136824 --- /dev/null +++ b/bfabric/tests/unit/test_dict_helper.py @@ -0,0 +1,15 @@ +import unittest + +from bfabric.results.response_format_dict import sort_dict + + +class BfabricTestSortDict(unittest.TestCase): + def test_sort_dict(self): + # Main purpose of dictionary sorting is that they appear consistent when printed + d = {"c": 5, "b": 10} + d_sorted = sort_dict(d) + self.assertEqual(str(d_sorted), "{'b': 10, 'c': 5}") + + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/bfabric/tests/unit/test_math_helper.py b/bfabric/tests/unit/test_math_helper.py new file mode 100644 index 00000000..0f81be22 --- /dev/null +++ b/bfabric/tests/unit/test_math_helper.py @@ -0,0 +1,15 @@ +import unittest + +import bfabric.utils.math_helper as math_helper + + +class BfabricTestMath(unittest.TestCase): + def test_integer_division(self): + # Main purpose of dictionary sorting is that they appear consistent when printed + self.assertEqual(math_helper.div_int_ceil(120, 100), 2) + self.assertEqual(math_helper.div_int_ceil(200, 100), 2) + self.assertEqual(math_helper.div_int_ceil(245, 100), 3) + + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/bfabric/tests/unit/test_paginator.py b/bfabric/tests/unit/test_paginator.py new file mode 100644 index 00000000..c65a40fb --- /dev/null +++ b/bfabric/tests/unit/test_paginator.py @@ -0,0 +1,82 @@ +import unittest + +import bfabric.utils.paginator as paginator + + +class BfabricTestBasicPagination(unittest.TestCase): + def test_page_iter(self): + # Main purpose of dictionary sorting is that they appear consistent when printed + data = list(range(123)) + + rez = list(paginator.page_iter(data, page_size=100)) + self.assertEqual(len(rez), 2) + self.assertEqual(rez[0], list(range(100))) + self.assertEqual(rez[1], list(range(100, 123))) + + def test_compute_requested_pages_when_no_offset(self): + pages, init_offset = paginator.compute_requested_pages( + n_page_total=5, n_item_per_page=3, n_item_offset=0, n_item_return_max=None + ) + self.assertListEqual([1, 2, 3, 4, 5], pages) + self.assertEqual(0, init_offset) + + def test_compute_requested_pages_when_offset_2(self): + pages, init_offset = paginator.compute_requested_pages( + n_page_total=5, n_item_per_page=3, n_item_offset=2, n_item_return_max=None + ) + self.assertListEqual([1, 2, 3, 4, 5], pages) + self.assertEqual(2, init_offset) + + def test_compute_requested_pages_when_offset_3(self): + pages, init_offset = paginator.compute_requested_pages( + n_page_total=5, n_item_per_page=3, n_item_offset=3, n_item_return_max=None + ) + self.assertListEqual([2, 3, 4, 5], pages) + self.assertEqual(0, init_offset) + + def test_compute_requested_pages_when_offset_4(self): + pages, init_offset = paginator.compute_requested_pages( + n_page_total=5, n_item_per_page=3, n_item_offset=4, n_item_return_max=None + ) + self.assertListEqual([2, 3, 4, 5], pages) + self.assertEqual(1, init_offset) + + def test_compute_requested_pages_when_offset_6(self): + pages, init_offset = paginator.compute_requested_pages( + n_page_total=5, n_item_per_page=3, n_item_offset=6, n_item_return_max=None + ) + self.assertListEqual([3, 4, 5], pages) + self.assertEqual(0, init_offset) + + def test_compute_requested_pages_when_offset_out_of_bounds(self): + # TODO maybe it should yield an error? + pages, init_offset = paginator.compute_requested_pages( + n_page_total=5, n_item_per_page=3, n_item_offset=100, n_item_return_max=None + ) + self.assertListEqual([], pages) + self.assertEqual(0, init_offset) + + def test_compute_requested_pages_when_max(self): + pages, init_offset = paginator.compute_requested_pages( + n_page_total=5, n_item_per_page=3, n_item_offset=0, n_item_return_max=10 + ) + self.assertListEqual([1, 2, 3, 4], pages) + self.assertEqual(0, init_offset) + + def test_compute_requested_pages_when_max_9(self): + pages, init_offset = paginator.compute_requested_pages( + n_page_total=5, n_item_per_page=3, n_item_offset=0, n_item_return_max=9 + ) + self.assertListEqual([1, 2, 3], pages) + self.assertEqual(0, init_offset) + + def test_compute_requested_pages_when_max_6(self): + pages, init_offset = paginator.compute_requested_pages( + n_page_total=5, n_item_per_page=3, n_item_offset=0, n_item_return_max=6 + ) + self.assertListEqual([1, 2], pages) + self.assertEqual(0, init_offset) + + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/bfabric/tests/unit/test_pandas_helper.py b/bfabric/tests/unit/test_pandas_helper.py new file mode 100644 index 00000000..a65fcba3 --- /dev/null +++ b/bfabric/tests/unit/test_pandas_helper.py @@ -0,0 +1,25 @@ +import unittest +import numpy as np + +import bfabric.results.pandas_helper as pandas_helper + + +class BfabricTestPandasHelper(unittest.TestCase): + def test_list_dict_to_df(self): + # Main purpose of dictionary sorting is that they appear consistent when printed + example_list_dict = [ + {"cat": 1, "dog": 2}, + {"cat": 3, "rat": ["a", "b"]}, + {"rat": 5}, + {"cat": 1, "dog": 2, "rat": 7}, + ] + + df = pandas_helper.list_dict_to_df(example_list_dict) + self.assertEqual(list(df.columns), ["cat", "dog", "rat"]) + np.testing.assert_equal(list(df["cat"]), [1, 3, np.nan, 1]) + np.testing.assert_equal(list(df["dog"]), [2, np.nan, np.nan, 2]) + np.testing.assert_equal(list(df["rat"]), [np.nan, "['a', 'b']", 5, 7]) + + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/bfabric/tests/unit/test_response_format_dict.py b/bfabric/tests/unit/test_response_format_dict.py new file mode 100644 index 00000000..9fe1765b --- /dev/null +++ b/bfabric/tests/unit/test_response_format_dict.py @@ -0,0 +1,42 @@ +import unittest + +import bfabric.results.response_format_dict as response_format_dict + + +class BfabricTestResponseFormatDict(unittest.TestCase): + def test_drop_empty_elements(self): + # Should delete all hierarchical instances of key-value pairs, where value is None or empty dict + input_list_dict = [{"a": [], "b": [1, {"aa": 14, "gg": None}], "c": []}, {"zz": None, "uu": "cat"}] + target_list_dict = [{"b": [1, {"aa": 14}]}, {"uu": "cat"}] + + output_list_dict = response_format_dict.drop_empty_elements(input_list_dict, inplace=False) + self.assertEqual(output_list_dict, target_list_dict) + + def test_map_element_keys(self): + # Main use is to delete underscores in specific keys + input_list_dict = [{"a": [], "b": [1, {"_aa": 14, "gg": None}], "c": []}, {"zz": None, "uu": "cat"}] + target_list_dict = [{"a": [], "b": [1, {"aa": 14, "gg": None}], "c": []}, {"zz": None, "uu": "cat"}] + + output_list_dict = response_format_dict.map_element_keys(input_list_dict, {"_aa": "aa"}, inplace=False) + self.assertEqual(output_list_dict, target_list_dict) + + def test_sort_dicts_by_key(self): + # NOTE: The main purpose of sorting is to ensure consistent string representation + input_list_dict = [{"b": 1, "a": 2, "c": 3}, {"dog": 25, "cat": [1, 2, 3]}] + target_list_dict = [{"a": 2, "b": 1, "c": 3}, {"cat": [1, 2, 3], "dog": 25}] + + output_list_dict = response_format_dict.sort_dicts_by_key(input_list_dict, inplace=False) + self.assertEqual(str(output_list_dict), str(target_list_dict)) + + def test_clean_result(self): + result_input = [{"b": 1, "a": 2, "_id": 3}, {"b": 4, "_id": 5, "a": 6}] + cleaned = response_format_dict.clean_result(result_input, drop_underscores_suds=True, sort_keys=True) + self.assertEqual(repr([{"a": 2, "b": 1, "id": 3}, {"a": 6, "b": 4, "id": 5}]), repr(cleaned)) + self.assertEqual( + repr([{"b": 1, "a": 2, "_id": 3}, {"b": 4, "_id": 5, "a": 6}]), + repr(result_input), + ) + + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/bfabric/tests/unit/test_result_container.py b/bfabric/tests/unit/test_result_container.py new file mode 100644 index 00000000..6c24aa1e --- /dev/null +++ b/bfabric/tests/unit/test_result_container.py @@ -0,0 +1,98 @@ +import logging +import unittest + +import polars.testing + +from bfabric.results.result_container import ResultContainer + + +class BfabricTestResultContainer(unittest.TestCase): + def setUp(self): + self.res1 = ResultContainer([1, 2, 3], total_pages_api=1) + self.res2 = ResultContainer([4, 5], total_pages_api=1) + self.res_with_empty = ResultContainer([{"a": None, "b": 1, "c": []}, {"a": 2, "b": 3, "c": None}]) + + def test_str(self): + self.assertEqual("[1, 2, 3]", str(self.res1)) + self.assertEqual("[4, 5]", str(self.res2)) + + def test_repr(self): + self.assertEqual("[1, 2, 3]", repr(self.res1)) + self.assertEqual("[4, 5]", repr(self.res2)) + + def test_iter(self): + items = list(iter(self.res1)) + self.assertListEqual([1, 2, 3], items) + + def test_len(self): + self.assertEqual(3, len(self.res1)) + self.assertEqual(2, len(self.res2)) + + def test_getitem(self): + self.assertEqual(3, self.res1[2]) + self.assertEqual(4, self.res2[0]) + + def test_get_first_n_results_when_available(self): + res3 = self.res1.get_first_n_results(2) + self.assertEqual(2, len(res3)) + self.assertEqual([1, 2], res3.results) + + def test_get_first_n_results_when_not_available(self): + res3 = self.res1.get_first_n_results(4) + self.assertEqual(3, len(res3)) + self.assertEqual([1, 2, 3], res3.results) + + def test_get_first_n_results_when_none(self): + res3 = self.res1.get_first_n_results(None) + self.assertEqual(3, len(res3)) + self.assertEqual([1, 2, 3], res3.results) + + def test_assert_success_when_success(self): + self.res1.assert_success() + + def test_assert_success_when_error(self): + self.res1.errors.append("MockedError") + with self.assertRaises(RuntimeError) as error: + self.res1.assert_success() + self.assertEqual("('Query was not successful', ['MockedError'])", str(error.exception)) + + def test_extend_when_same_lengths(self): + res1 = ResultContainer([{"a": 1}, {"a": 2}], total_pages_api=5) + res2 = ResultContainer([{"b": 3}, {"b": 4}], total_pages_api=5) + res1.extend(res2) + self.assertEqual(4, len(res1)) + self.assertEqual([{"a": 1}, {"a": 2}, {"b": 3}, {"b": 4}], res1.results) + self.assertEqual(5, res1.total_pages_api) + + def test_extend_when_different_lengths(self): + res3 = ResultContainer( + list(range(200, 400)), + total_pages_api=2, + ) + with self.assertLogs(level=logging.WARNING) as error: + res3.extend(self.res1) + + self.assertEqual(203, len(res3)) + self.assertEqual(list(range(200, 400)) + [1, 2, 3], res3.results) + self.assertEqual(2, res3.total_pages_api) + self.assertIn("Results observed with different total pages counts: 2 != 1", str(error)) + + def test_to_list_dict_when_not_drop_empty(self): + expected = [{"a": None, "b": 1, "c": []}, {"a": 2, "b": 3, "c": None}] + with self.subTest(case="default"): + self.assertListEqual(expected, self.res_with_empty.to_list_dict()) + with self.subTest(case="explicit"): + self.assertListEqual(expected, self.res_with_empty.to_list_dict(drop_empty=False)) + + def test_to_list_dict_when_drop_empty(self): + expected = [{"b": 1}, {"a": 2, "b": 3}] + self.assertListEqual(expected, self.res_with_empty.to_list_dict(drop_empty=True)) + + def test_to_polars(self): + res = ResultContainer([{"a": 1, "b": 2}, {"a": 3, "b": 4}]) + df = res.to_polars() + polars.testing.assert_frame_equal(polars.DataFrame({"a": [1, 3], "b": [2, 4]}), df) + + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/bfabric/utils/__init__.py b/bfabric/utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/bfabric/utils/math_helper.py b/bfabric/utils/math_helper.py new file mode 100644 index 00000000..7e20278f --- /dev/null +++ b/bfabric/utils/math_helper.py @@ -0,0 +1,9 @@ +def div_int_ceil(n: int, d: int) -> int: + """ + :param n: Numerator + :param d: Denominator + :return: Performs integer ceiling division + Theoretically equivalent to math.ceil(n/d), but not subject to floating-point errors. + """ + q, r = divmod(n, d) + return q + bool(r) diff --git a/bfabric/utils/paginator.py b/bfabric/utils/paginator.py new file mode 100644 index 00000000..f20312b3 --- /dev/null +++ b/bfabric/utils/paginator.py @@ -0,0 +1,50 @@ +from __future__ import annotations + +import math + +# Single page query limit for BFabric API (as of time of writing, adapt if it changes) +BFABRIC_QUERY_LIMIT = 100 + + +def page_iter(objs: list, page_size: int = BFABRIC_QUERY_LIMIT) -> list: + """ + :param objs: A list of objects to provide to bfabric as part of a query + :param page_size: Number of objects per page + :return: An iterator over chunks that would be sent to bfabric, 1 chunk per query + """ + + for i in range(0, len(objs), page_size): + yield objs[i : i + page_size] + + +def compute_requested_pages( + n_page_total: int, + n_item_per_page: int, + n_item_offset: int, + n_item_return_max: int | None, +) -> tuple[list[int], int]: + """Returns the page indices that need to be requested to get all requested items. + :param n_page_total: Total number of pages available + :param n_item_per_page: Number of items per page + :param n_item_offset: Number of items to skip from the beginning + :param n_item_return_max: Maximum number of items to return + :return: + - list of page indices that need to be requested + - initial page offset (0-based), i.e. the i-th item from which onwards to retain results + """ + # B-Fabric API uses 1-based indexing for pages + index_start = 1 + + # Determine the page indices to request + # If n_item_return_max is not provided, we will return all items + if n_item_return_max is None: + n_item_return_max = n_page_total * n_item_per_page + + # Determine the page indices to request + idx_max_return = math.ceil((n_item_return_max + n_item_offset) / n_item_per_page) + idx_arr = [idx + index_start for idx in range(n_item_offset // n_item_per_page, min(n_page_total, idx_max_return))] + + # Determine the initial offset on the first page + initial_offset = min(n_item_offset, n_item_return_max) % n_item_per_page + + return idx_arr, initial_offset diff --git a/bfabric/wrapper_creator/__init__.py b/bfabric/wrapper_creator/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/bfabric/wrapper_creator/bfabric_external_job.py b/bfabric/wrapper_creator/bfabric_external_job.py new file mode 100644 index 00000000..20642ddf --- /dev/null +++ b/bfabric/wrapper_creator/bfabric_external_job.py @@ -0,0 +1,82 @@ +import json + +from bfabric.bfabric_legacy import bfabricEncoder, BfabricLegacy + + +class BfabricExternalJob(BfabricLegacy): + """ + ExternalJobs can use logging. + if you have a valid externaljobid use this class instead of + using Bfabric. + + + TODO check if an external job id is provided + """ + + externaljobid = None + + def __init__(self, login=None, password=None, externaljobid=None): + super(BfabricExternalJob, self).__init__(login, password) + if not externaljobid: + print("Error: no externaljobid provided.") + raise + else: + self.externaljobid = externaljobid + + print(("BfabricExternalJob externaljobid={}".format(self.externaljobid))) + + def logger(self, msg): + if self.externaljobid: + super(BfabricExternalJob, self).save_object("externaljob", {"id": self.externaljobid, "logthis": str(msg)}) + else: + print((str(msg))) + + def save_object(self, endpoint, obj, debug=None): + res = super(BfabricExternalJob, self).save_object(endpoint, obj, debug) + jsonres = json.dumps(res, cls=bfabricEncoder, sort_keys=True, indent=2) + self.logger("saved " + endpoint + "=" + str(jsonres)) + return res + + def get_workunitid_of_externaljob(self): + print(("DEBUG get_workunitid_of_externaljob self.externaljobid={}".format(self.externaljobid))) + res = self.read_object(endpoint="externaljob", obj={"id": self.externaljobid})[0] + print(res) + print("DEBUG END") + workunit_id = None + try: + workunit_id = res.cliententityid + print(("workunitid={}".format(workunit_id))) + except: + pass + return workunit_id + + def get_application_name(self): + workunitid = self.get_workunitid_of_externaljob() + if workunitid is None: + raise ValueError("no workunit available for the given externaljobid.") + workunit = self.read_object(endpoint="workunit", obj={"id": workunitid})[0] + if workunit is None: + raise ValueError("ERROR: no workunit available for the given externaljobid.") + assert isinstance(workunit._id, int) + application = self.read_object("application", obj={"id": workunit.application._id})[0] + return application.name.replace(" ", "_") + + def get_executable_of_externaljobid(self): + """ + It takes as input an `externaljobid` and fetches the the `executables` + out of the bfabric system using wsdl into a file. + returns a list of executables. + + todo: this function should check if base64 is provided or + just a program. + """ + workunitid = self.get_workunitid_of_externaljob() + if workunitid is None: + return None + + executables = list() + for executable in self.read_object(endpoint="executable", obj={"workunitid": workunitid}): + if hasattr(executable, "base64"): + executables.append(executable) + + return executables if len(executables) > 0 else None diff --git a/bfabric/wrapper_creator/bfabric_feeder.py b/bfabric/wrapper_creator/bfabric_feeder.py new file mode 100644 index 00000000..03e5d4c7 --- /dev/null +++ b/bfabric/wrapper_creator/bfabric_feeder.py @@ -0,0 +1,46 @@ +import hashlib +import os + +from bfabric.bfabric_legacy import BfabricLegacy + + +class BfabricFeeder(BfabricLegacy): + """ + this class is used for reporting 'resource' status + """ + + def report_resource(self, resourceid): + """ + this function determines the 'md5 checksum', 'the file size', + and set the status of the resource available. + + this is gonna executed on the storage host + + """ + res = self.read_object("resource", {"id": resourceid})[0] + print(res) + + if not hasattr(res, "storage"): + return -1 + + storage = self.read_object("storage", {"id": res.storage._id})[0] + + filename = "{0}/{1}".format(storage.basepath, res.relativepath) + + if os.path.isfile(filename): + try: + fmd5 = hashlib.md5(open(filename, "rb").read()).hexdigest() + print("md5sum ({}) = {}".format(filename, fmd5)) + + fsize = int(os.path.getsize(filename)) + 1 + print("size ({}) = {}".format(filename, fsize)) + + return self.save_object( + "resource", {"id": resourceid, "size": fsize, "status": "available", "filechecksum": fmd5} + ) + except: + print("computing md5 failed") + # print ("{} {}".format(Exception, err)) + raise + + return self.save_object("resource", {"id": resourceid, "status": "failed"}) diff --git a/bfabric/wrapper_creator/bfabric_submitter.py b/bfabric/wrapper_creator/bfabric_submitter.py new file mode 100644 index 00000000..aab29ff0 --- /dev/null +++ b/bfabric/wrapper_creator/bfabric_submitter.py @@ -0,0 +1,290 @@ +import base64 + +import yaml + +import bfabric.wrapper_creator.gridengine as gridengine +import bfabric.wrapper_creator.slurm as slurm +from bfabric.wrapper_creator.bfabric_external_job import BfabricExternalJob + + +class BfabricSubmitter: + """ + the class is used by the submitter which is executed by the bfabric system. + """ + + (G, B) = (None, None) + + workunitid = None + workunit = None + parameters = None + execfilelist = [] + slurm_dict = { + "MaxQuant_textfiles_sge": {"partition": "prx", "nodelist": "fgcz-r-033", "memory": "1G"}, + "fragpipe": {"partition": "prx", "nodelist": "fgcz-r-033", "memory": "256G"}, + "MaxQuant": {"partition": "maxquant", "nodelist": "fgcz-r-033", "memory": "4G"}, + "scaffold_generic": {"partition": "scaffold", "nodelist": "fgcz-r-033", "memory": "256G"}, + "MSstats dataProcess": {"partition": "prx", "nodelist": "fgcz-r-033", "memory": "64G"}, + "MaxQuant_sampleSizeEstimation": {"partition": "prx", "nodelist": "fgcz-r-028", "memory": "2G"}, + "ProteomeDiscovererQC": {"partition": "prx", "nodelist": "fgcz-r-035", "memory": "2G"}, + } + + def __init__( + self, + login=None, + password=None, + externaljobid=None, + user="*", + node="PRX@fgcz-r-018", + partition="prx", + nodelist="fgcz-r-028", + memory="10G", + SCHEDULEROOT="/export/bfabric/bfabric/", + scheduler="GridEngine", + ): + """ + :rtype : object + """ + self.B = BfabricExternalJob(login=login, password=password, externaljobid=externaljobid) + self.partition = partition + self.nodelist = nodelist + self.memory = memory + self.SCHEDULEROOT = SCHEDULEROOT + self.user = user + self.scheduler = scheduler + + print(self.B.auth.login) + print(self.B.externaljobid) + + self.workunitid = self.B.get_workunitid_of_externaljob() + + try: + self.workunit = self.B.read_object(endpoint="workunit", obj={"id": self.workunitid})[0] + except: + print("ERROR: could not fetch workunit while calling constructor in BfabricSubmitter.") + raise + + try: + self.parameters = [ + self.B.read_object(endpoint="parameter", obj={"id": x._id})[0] for x in self.workunit.parameter + ] + except: + self.parameters = list() + print("Warning: could not fetch parameter.") + + partition = [x for x in self.parameters if x.key == "partition"] + nodelist = [x for x in self.parameters if x.key == "nodelist"] + memory = [x for x in self.parameters if x.key == "memory"] + application_name = self.B.get_application_name() + + if len(partition) > 0 and len(nodelist) > 0 and len(memory) > 0: + self.partition = partition[0].value + self.nodelist = nodelist[0].value + self.memory = memory[0].value + elif "queue" in [x.key for x in self.parameters] and application_name in self.slurm_dict: + # Temporary check for old workunit previously run with SGE + self.partition = self.slurm_dict[application_name]["partition"] + self.nodelist = self.slurm_dict[application_name]["nodelist"] + self.memory = self.slurm_dict[application_name]["memory"] + else: + pass + + print(("partition={0}".format(self.partition))) + print(("nodelist={0}".format(self.nodelist))) + print(("memory={0}".format(self.memory))) + print("__init__ DONE") + + def submit_gridengine(self, script="/tmp/runme.bash", arguments=""): + + GE = gridengine.GridEngine(user=self.user, queue=self.queue, GRIDENGINEROOT=self.SCHEDULEROOT) + + print(script) + print((type(script))) + resQsub = GE.qsub(script=script, arguments=arguments) + + self.B.logger("{}".format(resQsub)) + + def submit_slurm(self, script="/tmp/runme.bash", arguments=""): + + SL = slurm.SLURM(user=self.user, SLURMROOT=self.SCHEDULEROOT) + + print(script) + print((type(script))) + resSbatch = SL.sbatch(script=script, arguments=arguments) + + self.B.logger("{}".format(resSbatch)) + + def compose_bash_script(self, configuration=None, configuration_parser=lambda x: yaml.safe_load(x)): + """ + composes the bash script which is executed by the submitter (sun grid engine). + as argument it takes a configuration file, e.g., yaml, xml, json, or whatsoever, and a parser function. + + it returns a str object containing the code. + + :rtype : str + """ + + # assert isinstance(configuration, str) + + try: + config = configuration_parser(configuration) + except: + raise ValueError("error: parsing configuration content failed.") + + _cmd_template = """#!/bin/bash +# Maria d'Errico +# Christian Panse +# 2020-09-28 +# 2020-09-29 +# https://GitHub.com/fgcz/bfabricPy/ +# Slurm +#SBATCH --partition={0} +#SBATCH --nodelist={11} +#SBATCH -n 1 +#SBATCH -N 1 +#SBATCH --cpus-per-task=1 +#SBATCH --mem-per-cpu={12} +#SBATCH -e {1} +#SBATCH -o {2} +#SBATCH --job-name=WU{10} +#SBATCH --workdir=/home/bfabric +#SBATCH --export=ALL,HOME=/home/bfabric + +# Grid Engine Parameters +#$ -q {0}&{11} +#$ -e {1} +#$ -o {2} + + +set -e +set -o pipefail + +export EMAIL="{job_notification_emails}" +export EXTERNALJOB_ID={3} +export RESSOURCEID_OUTPUT={4} +export RESSOURCEID_STDOUT_STDERR="{5} {6}" +export OUTPUT="{7}" +export WORKUNIT_ID="{10}" +STAMP=`/bin/date +%Y%m%d%H%M`.$$.$JOB_ID +TEMPDIR="/home/bfabric/prx" + +_OUTPUT=`echo $OUTPUT | cut -d"," -f1` +test $? -eq 0 && _OUTPUTHOST=`echo $_OUTPUT | cut -d":" -f1` +test $? -eq 0 && _OUTPUTPATH=`echo $_OUTPUT | cut -d":" -f2` +test $? -eq 0 && _OUTPUTPATH=`dirname $_OUTPUTPATH` +test $? -eq 0 && ssh $_OUTPUTHOST "mkdir -p $_OUTPUTPATH" +test $? -eq 0 && echo $$ > $TEMPDIR/$$ +test $? -eq 0 && scp $TEMPDIR/$$ $OUTPUT + +if [ $? -eq 1 ]; +then + echo "writting to output url failed!"; + exit 1; +fi + +# job configuration set by B-Fabrics wrapper_creator executable +# application parameter/configuration +cat > $TEMPDIR/config_WU$WORKUNIT_ID.yaml < $TEMPDIR/$JOB_ID.bash + + (who am i; hostname; uptime; echo $0; pwd; ps;) \ + | mutt -s "JOB_ID=$JOB_ID WORKUNIT_ID=$WORKUNIT_ID EXTERNALJOB_ID=$EXTERNALJOB_ID" $EMAIL \ + -a $TEMPDIR/$JOB_ID.bash $TEMPDIR/config_WU$WORKUNIT_ID.yaml +fi +# exit 0 + +# run the application +test -f $TEMPDIR/config_WU$WORKUNIT_ID.yaml && {9} $TEMPDIR/config_WU$WORKUNIT_ID.yaml + + +if [ $? -eq 0 ]; +then + ssh fgcz-r-035.uzh.ch "bfabric_setResourceStatus_available.py $RESSOURCEID_OUTPUT" \ + | mutt -s "JOB_ID=$JOB_ID WORKUNIT_ID=$WORKUNIT_ID EXTERNALJOB_ID=$EXTERNALJOB_ID DONE" $EMAIL + + bfabric_save_workflowstep.py $WORKUNIT_ID + bfabric_setExternalJobStatus_done.py $EXTERNALJOB_ID + bfabric_setWorkunitStatus_available.py $WORKUNIT_ID + echo $? +else + echo "application failed" + mutt -s "JOB_ID=$JOB_ID WORKUNIT_ID=$WORKUNIT_ID EXTERNALJOB_ID=$EXTERNALJOB_ID failed" $EMAIL < /dev/null + bfabric_setResourceStatus_available.py $RESSOURCEID_STDOUT_STDERR $RESSOURCEID; + exit 1; +fi + +# should be available also as zero byte files +bfabric_setResourceStatus_available.py $RESSOURCEID_STDOUT_STDERR + + +exit 0 +""".format( + self.partition, + config["job_configuration"]["stderr"]["url"], + config["job_configuration"]["stdout"]["url"], + config["job_configuration"]["external_job_id"], + config["job_configuration"]["output"]["resource_id"], + config["job_configuration"]["stderr"]["resource_id"], + config["job_configuration"]["stdout"]["resource_id"], + ",".join(config["application"]["output"]), + configuration, + config["job_configuration"]["executable"], + config["job_configuration"]["workunit_id"], + self.nodelist, + self.memory, + job_notification_emails=self.B.config.job_notification_emails, + ) + + return _cmd_template + + def submitter_yaml(self): + """ + implements the default submitter + + the function fetches the yaml base64 configuration file linked to the external job id out of the B-Fabric + system. Since the file can not be stagged to the LRMS as argument, we copy the yaml file into the bash script + and stage it on execution the application. + + TODO(cp): create the output url before the application is started. + + return None + """ + + # foreach (executable in external job): + for executable in self.B.get_executable_of_externaljobid(): + self.B.logger("executable = {0}".format(executable)) + + try: + content = base64.b64decode(executable.base64.encode()).decode() + except: + raise ValueError("error: decoding executable.base64 failed.") + + print(content) + _cmd_template = self.compose_bash_script( + configuration=content, configuration_parser=lambda x: yaml.safe_load(x) + ) + + _bash_script_filename = "/home/bfabric/prx/workunitid-{0}_externaljobid-{1}_executableid-{2}.bash".format( + self.B.get_workunitid_of_externaljob(), self.B.externaljobid, executable._id + ) + + with open(_bash_script_filename, "w") as f: + f.write(_cmd_template) + + if self.scheduler == "GridEngine": + self.submit_gridengine(_bash_script_filename) + else: + self.submit_slurm(_bash_script_filename) + self.execfilelist.append(_bash_script_filename) + + res = self.B.save_object(endpoint="externaljob", obj={"id": self.B.externaljobid, "status": "done"}) + + def get_job_script(self): + return self.execfilelist diff --git a/bfabric/wrapper_creator/bfabric_wrapper_creator.py b/bfabric/wrapper_creator/bfabric_wrapper_creator.py new file mode 100644 index 00000000..819bd984 --- /dev/null +++ b/bfabric/wrapper_creator/bfabric_wrapper_creator.py @@ -0,0 +1,365 @@ +import base64 +import datetime +import json +import os + +import yaml + +from bfabric.bfabric_legacy import bfabricEncoder +from bfabric.wrapper_creator.bfabric_external_job import BfabricExternalJob + + +class BfabricWrapperCreator(BfabricExternalJob): + """ + the class is used for the wrapper_creator which is executed by the bfabtic system + (non batch) so each resource is processed seperate + """ + + (externaljobid_submitter, workunit_executableid) = (None, None) + + def get_externaljobid_yaml_workunit(self): + return self.externaljobid_yaml_workunit + + def uploadGridEngineScript(self, para={"INPUTHOST": "fgcz-r-035.uzh.ch"}): + """ + the methode creates and uploads an executebale. + """ + + self.warning( + "This python method is superfluously and will be removed. Please use the write_yaml method of the BfabricWrapperCreato class." + ) + + _cmd_template = """#!/bin/bash +# $HeadURL: http://fgcz-svn.uzh.ch/repos/scripts/trunk/linux/bfabric/apps/python/bfabric/bfabric.py $ +# $Id: bfabric.py 3000 2017-08-18 14:18:30Z cpanse $ +# Christian Panse +#$ -q PRX@fgcz-r-028 +#$ -e {1} +#$ -o {2} + +set -e +set -o pipefail + + +# debug +hostname +uptime +echo $0 +pwd + +# variables to be set by the wrapper_creator executable +{0} + + +# create output directory +ssh $SSHARGS $OUTPUTHOST "mkdir -p $OUTPUTPATH" || exit 1 + +# staging input and output data and proc +ssh $SSHARGS $INPUTHOST "cat $INPUTPATH/$INPUTFILE" \\ +| $APPLICATION --inputfile $INPUTFILE --ssh "$OUTPUTHOST:$OUTPUTPATH/$OUTPUTFILE" \\ +&& bfabric_setResourceStatus_available.py $RESSOURCEID \\ +&& bfabric_setExternalJobStatus_done.py $EXTERNALJOBID \\ +|| exit 1 + +exit 0 +""".format( + "\n".join(sorted(['%s="%s"' % (key, info) for key, info in para.iteritems()])), + para["STDERR"], + para["STDOUT"], + ) + + resExecutable = self.save_object( + "executable", + { + "name": os.path.basename(para["APPLICATION"]) + "_executable", + "context": "WORKUNIT", + "parameter": None, + "description": "This script should run as 'bfabric' user in the FGCZ compute infrastructure.", + "workunitid": para["WORKUNITID"], + "base64": base64.b64encode(_cmd_template), + "version": 0.2, + }, + ) + + return resExecutable + + def get_executableid(self): + return self.workunit_executableid + + def write_yaml(self, data_serializer=lambda x: yaml.dump(x, default_flow_style=False, encoding=None)): + """ + This method writes all related parameters into a yaml file which is than upload as base64 encoded + file into the b-fabric system. + + if the method does not excepted at the end it reports also the status of the external_job. + + TODO(cp): make this function more generic so that it can also export xml, json, yaml, ... + """ + + # Inherits all parameters of the application executable out of B-Fabric to create an executable script + workunitid = self.get_workunitid_of_externaljob() + + if workunitid is None: + raise ValueError("no workunit available for the given externaljobid.") + + workunit = self.read_object(endpoint="workunit", obj={"id": workunitid})[0] + if workunit is None: + raise ValueError("ERROR: no workunit available for the given externaljobid.") + + assert isinstance(workunit._id, int) + + application = self.read_object("application", obj={"id": workunit.application._id})[0] + # TODO(cp): rename to application_execuatbel + workunit_executable = self.read_object("executable", obj={"id": workunit.applicationexecutable._id})[0] + try: + self.workunit_executableid = workunit_executable._id + except: + self.workunit_executableid = None + + # Get container details + container = workunit.container + fastasequence = "" + if container._classname == "order": + order = self.read_object("order", obj={"id": container._id})[0] + order_id = order._id + if "project" in order: + project_id = order.project._id + else: + project_id = None + if "fastasequence" in order: + fastasequence = "\n".join([x.strip() for x in str(order.fastasequence).split("\r")]) + else: + order_id = None + project_id = container._id + + today = datetime.date.today() + + # merge all information into the executable script + _output_storage = self.read_object("storage", obj={"id": application.storage._id})[0] + + _output_relative_path = "p{0}/bfabric/{1}/{2}/{3}/workunit_{4}/".format( + container._id, + application.technology.replace(" ", "_"), + application.name.replace(" ", "_"), + today.strftime("%Y/%Y-%m/%Y-%m-%d/"), + workunitid, + ) + + # Setup the log_storage to SlurmLog with id 13 + _log_storage = self.read_object("storage", obj={"id": 13})[0] + + # _cmd_applicationList = [workunit_executable.program] + + application_parameter = {} + + if not getattr(workunit, "parameter", None) is None: + for para in workunit.parameter: + parameter = self.read_object("parameter", obj={"id": para._id}) + if parameter: + for p in parameter: + try: + application_parameter["{}".format(p.key)] = "{}".format(p.value) + except: + application_parameter["{}".format(p.key)] = "" + + try: + input_resources = [x._id for x in workunit.inputresource] + input_resources = [self.read_object(endpoint="resource", obj={"id": x})[0] for x in input_resources] + except: + print("no input resources found. continue with empty list.") + input_resources = [] + + # query all urls and ids of the input resources + resource_urls = dict() + resource_ids = dict() + + for resource_iterator in input_resources: + try: + _appication_id = self.read_object(endpoint="workunit", obj={"id": resource_iterator.workunit._id})[ + 0 + ].application._id + + _application_name = "{0}".format(self.read_object("application", obj={"id": _appication_id})[0].name) + + _storage = self.read_object("storage", {"id": resource_iterator.storage._id})[0] + + _inputUrl = "bfabric@{0}:/{1}/{2}".format( + _storage.host, _storage.basepath, resource_iterator.relativepath + ) + + if not _application_name in resource_urls: + resource_urls[_application_name] = [] + resource_ids[_application_name] = [] + + resource_urls[_application_name].append(_inputUrl) + + sample_id = self.get_sampleid(int(resource_iterator._id)) + + _resource_sample = { + "resource_id": int(resource_iterator._id), + "resource_url": "{0}/userlab/show-resource.html?id={1}".format( + self.config.base_url, resource_iterator._id + ), + } + + if not sample_id is None: + _resource_sample["sample_id"] = int(sample_id) + _resource_sample["sample_url"] = "{0}/userlab/show-sample.html?id={1}".format( + self.config.base_url, sample_id + ) + + resource_ids[_application_name].append(_resource_sample) + except: + print("resource_iterator failed. continue ...") + pass + + # create resources for output, stderr, stdout + _ressource_output = self.save_object( + "resource", + { + "name": "{0} {1} - resource".format(application.name, len(input_resources)), + "workunitid": workunit._id, + "storageid": int(application.storage._id), + "relativepath": _output_relative_path, + }, + )[0] + + print(_ressource_output) + _output_filename = "{0}.{1}".format(_ressource_output._id, application.outputfileformat) + # we want to include the resource._id into the filename + _ressource_output = self.save_object( + "resource", + { + "id": int(_ressource_output._id), + "relativepath": "{0}/{1}".format(_output_relative_path, _output_filename), + }, + )[0] + + print(_ressource_output) + _resource_stderr = self.save_object( + "resource", + { + "name": "slurm_stderr", + "workunitid": int(workunit._id), + "storageid": _log_storage._id, + "relativepath": "/workunitid-{0}_resourceid-{1}.err".format(workunit._id, _ressource_output._id), + }, + )[0] + + _resource_stdout = self.save_object( + "resource", + { + "name": "slurm_stdout", + "workunitid": workunit._id, + "storageid": _log_storage._id, + "relativepath": "/workunitid-{0}_resourceid-{1}.out".format(workunit._id, _ressource_output._id), + }, + )[0] + + # Creates the workunit executable + # The config includes the externaljobid: the yaml_workunit_externaljob has to be created before it. + # The yaml_workunit_externaljob cannot be created without specifying an executableid: + # a yaml_workunit_executable is thus created before the config definition in order to provide + # the correct executableid to the yaml_workunit_externaljob. + # However this yaml_workunit_executable has to be updated later to include 'base64': base64.b64encode(config_serialized.encode()).decode() + yaml_workunit_executable = self.save_object( + "executable", + { + "name": "job configuration (executable) in YAML", + "context": "WORKUNIT", + "workunitid": workunit._id, + "description": "This is a job configuration as YAML base64 encoded. It is configured to be executed by the B-Fabric yaml submitter.", + }, + )[0] + print(yaml_workunit_executable) + + yaml_workunit_externaljob = self.save_object( + "externaljob", + { + "workunitid": workunit._id, + "status": "new", + "executableid": yaml_workunit_executable._id, + "action": "WORKUNIT", + }, + )[0] + print(yaml_workunit_externaljob) + assert isinstance(yaml_workunit_externaljob._id, int) + self.externaljobid_yaml_workunit = int(yaml_workunit_externaljob._id) + print(("XXXXXXX self.externaljobid_yaml_workunit ={} XXXXXXX".format(self.externaljobid_yaml_workunit))) + + _output_url = "bfabric@{0}:{1}{2}/{3}".format( + _output_storage.host, _output_storage.basepath, _output_relative_path, _output_filename + ) + + try: + query_obj = {"id": workunit.inputdataset._id} + inputdataset = self.read_object(endpoint="dataset", obj=query_obj)[0] + inputdataset_json = json.dumps(inputdataset, cls=bfabricEncoder, sort_keys=True, indent=2) + inputdataset = json.loads(inputdataset_json) + except: + inputdataset = None + + # Compose configuration structure + config = { + "job_configuration": { + "executable": "{}".format(workunit_executable.program), + "inputdataset": inputdataset, + "input": resource_ids, + "output": { + "protocol": "scp", + "resource_id": int(_ressource_output._id), + "ssh_args": "-o StrictHostKeyChecking=no -2 -l bfabric -x", + }, + "stderr": { + "protocol": "file", + "resource_id": int(_resource_stderr._id), + "url": "{0}/workunitid-{1}_resourceid-{2}.err".format( + _log_storage.basepath, workunit._id, _ressource_output._id + ), + }, + "stdout": { + "protocol": "file", + "resource_id": int(_resource_stdout._id), + "url": "{0}/workunitid-{1}_resourceid-{2}.out".format( + _log_storage.basepath, workunit._id, _ressource_output._id + ), + }, + "workunit_id": int(workunit._id), + "workunit_createdby": str(workunit.createdby), + "workunit_url": "{0}/userlab/show-workunit.html?workunitId={1}".format( + self.config.base_url, workunit._id + ), + "external_job_id": int(yaml_workunit_externaljob._id), + "order_id": order_id, + "project_id": project_id, + "fastasequence": fastasequence, + }, + "application": { + "protocol": "scp", + "parameters": application_parameter, + "input": resource_urls, + "output": [_output_url], + }, + } + + config_serialized = data_serializer(config) + print(config_serialized) + + yaml_workunit_executable = self.save_object( + "executable", + { + "id": yaml_workunit_executable._id, + "base64": base64.b64encode(config_serialized.encode()).decode(), + "version": "{}".format(10), + }, + )[0] + print(yaml_workunit_executable) + + # The WrapperCreator executable is successful, and the status of the its external job is set to done, + # which triggers B-Fabric to create an external job for the submitter executable. + + wrapper_creator_externaljob = self.save_object( + endpoint="externaljob", obj={"id": self.externaljobid, "status": "done"} + ) + + print(("\n\nquery_counter={0}".format(self.query_counter))) diff --git a/demo_config.yaml b/bfabric/wrapper_creator/demo_config.yaml similarity index 100% rename from demo_config.yaml rename to bfabric/wrapper_creator/demo_config.yaml diff --git a/bfabric/gridengine.py b/bfabric/wrapper_creator/gridengine.py similarity index 73% rename from bfabric/gridengine.py rename to bfabric/wrapper_creator/gridengine.py index ad6937bb..cd0e4fa5 100755 --- a/bfabric/gridengine.py +++ b/bfabric/wrapper_creator/gridengine.py @@ -37,20 +37,20 @@ # $Date: 2016-09-23 16:55:50 +0200 (Fri, 23 Sep 2016) $ # $Author: cpanse $ -__docformat__ = 'reStructuredText' -__version__ = '$Revision: 2463 $' - +__docformat__ = "reStructuredText" +__version__ = "$Revision: 2463 $" import os import subprocess + class GridEngine(object): """ - interface to Open Grid Sceduler qsub + interface to Open Grid Sceduler qsub """ - def __init__(self, user='*', queue="PRX@fgcz-r-035", GRIDENGINEROOT='/export/bfabric/bfabric/'): + def __init__(self, user="*", queue="PRX@fgcz-r-035", GRIDENGINEROOT="/export/bfabric/bfabric/"): """ Set up parameters for querying Grid Engine. @@ -65,41 +65,37 @@ def __init__(self, user='*', queue="PRX@fgcz-r-035", GRIDENGINEROOT='/export/bfa def qsub(self, script, arguments=""): """ - if qsub and script are files do - qsub as fire and forget + if qsub and script are files do + qsub as fire and forget - todo: pass stderr and stdout file location as argument + todo: pass stderr and stdout file location as argument """ qsub_cmd = [self.qsubbin, "-q", self.queue, script, " ".join(arguments)] if not os.path.isfile(self.qsubbin): - print ("{0} can not be found.".format(self.qsubbin)) + print("{0} can not be found.".format(self.qsubbin)) return if not os.path.isfile(script): - print ("'{0}' - no such file.".format(script)) + print("'{0}' - no such file.".format(script)) return try: - qsub_process = subprocess.Popen( - qsub_cmd, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - shell=False) + qsub_process = subprocess.Popen(qsub_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=False) stdout, stderr = qsub_process.communicate() return stdout # except subprocess.CalledProcessError, ex: except: - #logging.error("Error running '%s': '%s'; exit code %d", str.join(' ', qstat_cmd), stderr, ex.returncode) + # logging.error("Error running '%s': '%s'; exit code %d", str.join(' ', qstat_cmd), stderr, ex.returncode) raise def main(): - print ("hello world!") + print("hello world!") pass -if __name__ == "__main__": +if __name__ == "__main__": main() diff --git a/bfabric/slurm.py b/bfabric/wrapper_creator/slurm.py similarity index 73% rename from bfabric/slurm.py rename to bfabric/wrapper_creator/slurm.py index cf45eeb3..0eda4b8f 100755 --- a/bfabric/slurm.py +++ b/bfabric/wrapper_creator/slurm.py @@ -30,20 +30,20 @@ # limitations under the License. # -__docformat__ = 'reStructuredText' -#__version__ = '$Revision: 2463 $' - +__docformat__ = "reStructuredText" +# __version__ = '$Revision: 2463 $' import os import subprocess + class SLURM(object): """ - interface to Slurm sbatch + interface to Slurm sbatch """ - def __init__(self, user='*', SLURMROOT='/usr/'): + def __init__(self, user="*", SLURMROOT="/usr/"): """ Set up parameters for querying Slurm. @@ -57,26 +57,19 @@ def __init__(self, user='*', SLURMROOT='/usr/'): def sbatch(self, script, arguments=""): """ - todo: pass stderr and stdout file location as argument + todo: pass stderr and stdout file location as argument """ sbatch_cmd = [self.sbatchbin, script, " ".join(arguments)] if not os.path.isfile(self.sbatchbin): - print ("{0} can not be found.".format(self.sbatchbin)) + print("{0} can not be found.".format(self.sbatchbin)) return if not os.path.isfile(script): - print ("'{0}' - no such file.".format(script)) + print("'{0}' - no such file.".format(script)) return - sbatch_process = subprocess.Popen( - sbatch_cmd, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - shell=False) - result = [x.decode('utf-8') for x in sbatch_process.communicate()] - - return ''.join(result) - - + sbatch_process = subprocess.Popen(sbatch_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=False) + result = [x.decode("utf-8") for x in sbatch_process.communicate()] + return "".join(result) diff --git a/docs/changelog.md b/docs/changelog.md new file mode 100644 index 00000000..171ac06f --- /dev/null +++ b/docs/changelog.md @@ -0,0 +1,27 @@ +# Changelog +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). + +Versioning currently follows `X.Y.Z` where + +- `X` is used for major changes, that contain breaking changes +- `Y` should be the current bfabric release +- `Z` is increased for feature releases, that should not break the API + +## [1.13.0] - 2024-05-24 +This is a major release refactoring bfabricPy's API. + +### Changed +- The `Bfabric` class operations now return `ResultContainer` objects. + - These provide a list-like interface to access individual items or iterate over them. + - Individual items are a dictionary, potentially nested, and not specific to suds/zeep anymore. + - Convenience conversions, e.g. to a polars DataFrame, can be provided there. +- Configuration is now defined in `~/.bfabricpy.yml` and supports multiple configurations, which can be selected by the `BFABRICPY_CONFIG_ENV` environment variable. Please consult the README for an example configuration. +- Use `pyproject.toml` for package configuration. +- Scripts have been refactored on a case-by-case basis. + +### Added +- Zeep can be used instead of suds for SOAP communication. +- `Bfabric` can be instantiated without authentication, that can be provided later. This is useful in a server setup. + +### Removed +- Several old scripts have been moved into a `deprecated_scripts` folder. diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..314f24cf --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,85 @@ +[build-system] +requires = ["setuptools >= 61.0"] +build-backend = "setuptools.build_meta" + +[tool.setuptools.packages.find] +include = ["bfabric*"] + +[project] +name = "bfabric" +description = "Python client for the B-Fabric WSDL API" +version = "1.13.0" +license = { text = "GPL-3.0" } +authors = [ + {name = "Christian Panse", email = "cp@fgcz.ethz.ch"}, + {name = "Leonardo Schwarz", email = "leonardo.schwarz@fgcz.ethz.ch"}, + {name = "Aleksejs Fomins"}, + {name = "Marco Schmidt"}, + {name = "Maria d'Errico"}, + {name = "Witold Eryk Wolski"} +] +requires-python = ">=3.9" +dependencies = [ + "suds >= 1.1.2", + "PyYAML >= 6.0", + "Flask >= 3.0.3", + "rich >= 13.7.1", + "zeep >= 4.2.1", + "pandas >= 2.2.2", + "polars-lts-cpu >= 0.20.25", + "setuptools" +] + +[project.optional-dependencies] +dev = [ + "black", + "isort", + "ruff", + "licensecheck" +] + +[project.urls] +Homepage = "https://github.com/fgcz/bfabricPy" +Repository = "https://github.com/fgcz/bfabricPy" + +[project.scripts] +"bfabric_flask.py"="bfabric.scripts.bfabric_flask:main" +#bfabric_feeder_resource_autoQC="bfabric.scripts.bfabric_feeder_resource_autoQC:main" +"bfabric_list_not_existing_storage_directories.py"="bfabric.scripts.bfabric_list_not_existing_storage_directories:main" +"bfabric_list_not_available_proteomics_workunits.py"="bfabric.scripts.bfabric_list_not_available_proteomics_workunits:main" +"bfabric_list_workunit_parameters.py"="bfabric.scripts.bfabric_list_workunit_parameters:main" +"bfabric_upload_resource.py"="bfabric.scripts.bfabric_upload_resource:main" +"bfabric_logthis.py"="bfabric.scripts.bfabric_logthis:main" +"bfabric_setResourceStatus_available.py"="bfabric.scripts.bfabric_setResourceStatus_available:main" +"bfabric_setExternalJobStatus_done.py"="bfabric.scripts.bfabric_setExternalJobStatus_done:main" +"bfabric_setWorkunitStatus_available.py"="bfabric.scripts.bfabric_setWorkunitStatus_generic:main_available" +"bfabric_setWorkunitStatus_processing.py"="bfabric.scripts.bfabric_setWorkunitStatus_processing:main_processing" +"bfabric_setWorkunitStatus_failed.py"="bfabric.scripts.bfabric_setWorkunitStatus_failed:main_failed" +"bfabric_delete.py"="bfabric.scripts.bfabric_delete:main" +"bfabric_read.py"="bfabric.scripts.bfabric_read:main" +"bfabric_read_samples_of_workunit.py"="bfabric.scripts.bfabric_read_samples_of_workunit:main" +"bfabric_read_samples_from_dataset.py"="bfabric.scripts.bfabric_read_samples_from_dataset:main" +"bfabric_save_csv2dataset.py"="bfabric.scripts.bfabric_save_csv2dataset:main" +"bfabric_save_dataset2csv.py"="bfabric.scripts.bfabric_save_dataset2csv:main" +"bfabric_save_fasta.py"="bfabric.scripts.bfabric_save_fasta:main" +"bfabric_save_importresource_sample.py"="bfabric.scripts.bfabric_save_importresource_sample:main" +"bfabric_save_link_to_workunit.py"="bfabric.scripts.bfabric_save_link_to_workunit:main" +#bfabric_save_resource="bfabric.scripts.bfabric_save_resource:main" +"bfabric_save_workunit_attribute.py"="bfabric.scripts.bfabric_save_workunit_attribute:main" +"bfabric_save_workflowstep.py"="bfabric.scripts.bfabric_save_workflowstep:main" + +[tool.black] +line-length = 120 +target-version = ["py39"] + +[tool.ruff] +line-length = 120 +indent-width = 4 +target-version = "py39" + +[tool.ruff.lint] +select = ["ANN", "BLE", "D103", "E", "F", "PLW", "PTH", "SIM", "UP", "TCH"] +ignore = ["ANN101", "ANN102"] + +[tool.licensecheck] +using = "PEP631" diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 8e4d062a..00000000 --- a/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -Flask==2.2.5 -PyYAML>=3.11 -suds-py3>=1.4.1 -slugify diff --git a/setup.py b/setup.py deleted file mode 100755 index 5698ba2b..00000000 --- a/setup.py +++ /dev/null @@ -1,68 +0,0 @@ -""" - -B-Fabric Appliaction Interface using WSDL - -The code contains classes for wrapper_creator and submitter. - -Ensure that this file is available on the bfabric exec host. - -Copyright (C) 2014-2024 Functional Genomics Center Zurich ETHZ|UZH. All rights reserved. - -Authors: - Christian Panse - Maria d'Errico - -Licensed under GPL version 3 - -""" - -from setuptools import setup, find_packages -import os - -with open('requirements.txt') as f: - INSTALL_REQUIRES = f.read().splitlines() -ver_file = os.path.join('bfabric', '_version.py') -with open(ver_file) as f: - exec(f.read()) - -VERSION = __version__ - -setup(name = 'bfabric', - version = VERSION, - description = """ -B-Fabric Appliaction Interface using WSDL. The code contains classes for wrapper_creator and submitter. -""", - url = 'git@github.com:fgcz/bfabricPy.git ', - author = 'Christian Panse', - author_email = 'cp@fgcz.ethz.ch', - license = 'GPLv3 / apache 2.0', - packages = ['bfabric'], - python_requires = ">=3.9", - install_requires = INSTALL_REQUIRES, - scripts = [ - 'bfabric/scripts/bfabric_flask.py', - 'bfabric/scripts/bfabric_feeder_resource_autoQC.py', - 'bfabric/scripts/bfabric_list_not_existing_storage_directories.py', - 'bfabric/scripts/bfabric_list_not_available_proteomics_workunits.py', - 'bfabric/scripts/bfabric_upload_resource.py', - 'bfabric/scripts/bfabric_logthis.py', - 'bfabric/scripts/bfabric_setResourceStatus_available.py', - 'bfabric/scripts/bfabric_setExternalJobStatus_done.py', - 'bfabric/scripts/bfabric_setWorkunitStatus_available.py', - 'bfabric/scripts/bfabric_setWorkunitStatus_processing.py', - 'bfabric/scripts/bfabric_setWorkunitStatus_failed.py', - 'bfabric/scripts/bfabric_delete.py', - 'bfabric/scripts/bfabric_read.py', - 'bfabric/scripts/bfabric_read_samples_of_workunit.py', - 'bfabric/scripts/bfabric_read_samples_from_dataset.py', - 'bfabric/scripts/bfabric_save_csv2dataset.py', - 'bfabric/scripts/bfabric_save_dataset2csv.py', - 'bfabric/scripts/bfabric_save_fasta.py', - 'bfabric/scripts/bfabric_save_importresource_sample.py', - 'bfabric/scripts/bfabric_save_link_to_workunit.py', - 'bfabric/scripts/bfabric_save_resource.py', - 'bfabric/scripts/bfabric_save_workunit_attribute.py', - 'bfabric/scripts/bfabric_save_workflowstep.py' - ], - zip_safe=True) -