From 65da904215f26378f2e59e2c4b20c7ee743f4c0c Mon Sep 17 00:00:00 2001 From: Kola Date: Fri, 14 Feb 2025 15:42:25 +0000 Subject: [PATCH] Remove bia-converter-light directory --- bia-converter-light/.env_template | 9 - bia-converter-light/README.md | 82 --- .../bia_converter_light/__init__.py | 0 .../bia_converter_light/cli.py | 315 ----------- .../bia_converter_light/config.py | 60 --- .../bia_converter_light/conversion.py | 219 -------- bia-converter-light/bia_converter_light/io.py | 119 ----- .../bia_converter_light/omezarrmeta.py | 86 ---- .../bia_converter_light/propose_utils.py | 160 ------ .../bia_converter_light/rendering.py | 487 ------------------ ...bioformats_curated_file_formats_readme.txt | 14 - .../bioformats_curated_other_file_formats.txt | 66 --- ...bioformats_curated_single_file_formats.txt | 120 ----- .../bia_converter_light/utils.py | 231 --------- .../file_references_to_convert.tsv | 16 - bia-converter-light/pyproject.toml | 40 -- .../print_details_of_convertible_images.py | 70 --- .../test_files_for_study_component_2/im06.png | Bin 24649 -> 0 bytes .../study_component1/im06.ome.zarr/.zattrs | 3 - .../study_component1/im06.ome.zarr/.zgroup | 3 - .../study_component1/im06.ome.zarr/0/.zattrs | 80 --- .../study_component1/im06.ome.zarr/0/.zgroup | 3 - .../im06.ome.zarr/0/0/.zarray | 17 - .../im06.ome.zarr/0/0/0/0/0/0/0 | Bin 66 -> 0 bytes .../im06.ome.zarr/0/0/0/1/0/0/0 | Bin 66 -> 0 bytes .../im06.ome.zarr/0/0/0/2/0/0/0 | Bin 66 -> 0 bytes .../im06.ome.zarr/OME/.zattrs | 3 - .../im06.ome.zarr/OME/.zgroup | 3 - .../im06.ome.zarr/OME/METADATA.ome.xml | 1 - bia-converter-light/test/test_cli.py | 340 ------------ .../test/test_conversion_util_funcs.py | 14 - .../test/test_create_thumbnails.py | 16 - 32 files changed, 2577 deletions(-) delete mode 100644 bia-converter-light/.env_template delete mode 100644 bia-converter-light/README.md delete mode 100644 bia-converter-light/bia_converter_light/__init__.py delete mode 100644 bia-converter-light/bia_converter_light/cli.py delete mode 100644 bia-converter-light/bia_converter_light/config.py delete mode 100644 bia-converter-light/bia_converter_light/conversion.py delete mode 100644 bia-converter-light/bia_converter_light/io.py delete mode 100644 bia-converter-light/bia_converter_light/omezarrmeta.py delete mode 100644 bia-converter-light/bia_converter_light/propose_utils.py delete mode 100644 bia-converter-light/bia_converter_light/rendering.py delete mode 100644 bia-converter-light/bia_converter_light/resources/bioformats_curated_file_formats_readme.txt delete mode 100644 bia-converter-light/bia_converter_light/resources/bioformats_curated_other_file_formats.txt delete mode 100644 bia-converter-light/bia_converter_light/resources/bioformats_curated_single_file_formats.txt delete mode 100644 bia-converter-light/bia_converter_light/utils.py delete mode 100644 bia-converter-light/file_references_to_convert.tsv delete mode 100644 bia-converter-light/pyproject.toml delete mode 100644 bia-converter-light/scripts/print_details_of_convertible_images.py delete mode 100644 bia-converter-light/test/data/test_files_for_study_component_2/im06.png delete mode 100644 bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/.zattrs delete mode 100644 bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/.zgroup delete mode 100644 bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/0/.zattrs delete mode 100644 bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/0/.zgroup delete mode 100644 bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/0/0/.zarray delete mode 100644 bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/0/0/0/0/0/0/0 delete mode 100644 bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/0/0/0/1/0/0/0 delete mode 100644 bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/0/0/0/2/0/0/0 delete mode 100644 bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/OME/.zattrs delete mode 100644 bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/OME/.zgroup delete mode 100644 bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/OME/METADATA.ome.xml delete mode 100644 bia-converter-light/test/test_cli.py delete mode 100644 bia-converter-light/test/test_conversion_util_funcs.py delete mode 100644 bia-converter-light/test/test_create_thumbnails.py diff --git a/bia-converter-light/.env_template b/bia-converter-light/.env_template deleted file mode 100644 index f74f43c8..00000000 --- a/bia-converter-light/.env_template +++ /dev/null @@ -1,9 +0,0 @@ -bia_data_dir=Root of directory to save/read models from disk if using persistence_mode=disk -cache_root_dirpath=PATH_TO_CACHE_FILES_DOWNLOADED_AND_CONVERTED -bia_api_basepath=CREDENTIALS_FOR_LOCALHOST -bia_api_username=CREDENTIALS_FOR_LOCALHOST -bia_api_password=CREDENTIALS_FOR_LOCALHOST -endpoint_url=EMBASSY_S3_ENDPOINT_URL_LEAVE_THIS_DUMMY_FOR_NO_S3 -bucket_name=BUCKET_NAME_OF_S3_BUCKET -bioformats2raw_java_home=DIDNT_NEED_TO_CONFIGURE_THIS -bioformats2raw_bin=PATH_TO_BIOFORMATS_TO_RAW_BIN_DIR diff --git a/bia-converter-light/README.md b/bia-converter-light/README.md deleted file mode 100644 index a379ee1d..00000000 --- a/bia-converter-light/README.md +++ /dev/null @@ -1,82 +0,0 @@ -## Description -This sub-package creates image representations *and* actual images associated with the representations. It is named *bia-converter-light* because it only converts one file reference per image representation. Whereas the upcoming *bia-converter* sub-package will be able to handle more complex conversion including creation of multichannel images and multi-slice images from multiple file references per image representation. - -## Setup - -1. Install the project using poetry. -2. Configure your environment. Either create a .env file from .env_template in this directory or set environment variables for the items in .env_template - * For getting objects from the API set: - - bia_api_basepath - - bia_api_username - - bia_api_password - * For caching downloaded/converted images locally the default location is `~/.cache/bia-converter/` which can be changed by setting `cache_root_dirpath` - * For conversion to zarr format [bioformats2raw](https://github.com/glencoesoftware/bioformats2raw) is used. Set: - - bioformats2raw_java_home - - bioformats2raw_bin - * For upload to S3 set: - - endpoint_url - - bucket_name - -The AWS credentails for the endpoint also need to be set. This is done using exclusively by environment variables. Either: -* AWS_ACCESS_KEY_ID *and* AWS_SECRET_ACCESS_KEY -
OR -* AWS_SHARED_CREDENTIALS_FILE with optional AWS_PROFILE and/or AWS_CONFIG_FILE - -## Usage -This package has 3 cli applications: - * **propose**: used to create a tsv file with details of file references that can be converted to images. - * **convert-image**: used to create actual images associated with the representations (the necessary BIA image object and associated UPLOADED_BY_SUBMITTER representation are created if they do not exist). - * **update-example-image-uri-for-dataset**: used to update the example image uri for a dataset. - -Subsequent instructions assume the project is installed and the environment configured, assuming this is the working directory. - -## Creating details of file references to convert -To create a tsv file with details of file references to convert for one or more studies, run: -``` sh -$ poetry run bia-converter-light propose --accession-ids-path -``` -or to specify accession ids on command line: -``` sh -$ poetry run bia-converter-light propose -a -a -``` -E.g.: -```sh -$ poetry run bia-converter-light propose -a S-BIAD1444 S-BIAD1266 -``` -By default this writes output to `./file_references_to_convert.tsv` which can be changed with the `--output-path` option. - - -## Converting images associated with representations -The input is a file containing details of file references for conversion. This is of the format produced by the `propose` command above. Additionally, if conversion is required for a subset of accession ids in the file, these can be specified on the command line. INTERACTIVE_DISPLAY and THUMBNAIL representations are created for all file references, and a STATIC_DISPLAY is created for the first file reference processed for each study. - -The STATIC_DISPLAY representation is not created by default because the BIA website only needs one static display per experimental imaging dataset. All interactive images need a thumbnail for the website, so they are created together. - -The convention followed for conversion is: -1. Ensure a BIA Image object and UPLOADED_BY_SUBMITTER representation exist - the `bia-assign-image` subpackage is called to create these if they do not exist -2. Create an INTERACTIVE_DISPLAY representation -3. Create a THUMBNAIL representation - an INTERACTIVE_DISPLAY representation is a prerequisite -4. Create a STATIC_DISPLAY representation if necessary - an INTERACTIVE_DISPLAY representation is a prerequisite - -Example cli use: -```sh -$ poetry run bia-converter-light convert-image --conversion-details-path -``` - -## Updating example image uri for dataset -```sh -$ poetry run bia-converter-light update-example-image-uri-for-dataset -``` - - - -## convert-images dependencies - -bioformats2raw see [this](https://github.com/glencoesoftware/bioformats2raw) - -As a prerequisitite to installing bioformats2raw (which is documented in the link above) need to install blosc for image file compression -On Ubuntu (at least): `sudo apt-get install libblosc-dev` -On mac: `brew install c-blosc` - -aws cli see [this](https://aws.amazon.com/cli/) - -note: issue on mac that fsherwood had: bioformats2raw may not be able to find blosc, and fails with an error along the lines of 'Exception java.lang.UnsatisfiedLinkError: Unable to load library 'blosc'' even after adding to Djna.library.path. To solve, I created a symlink to the library in a location that was being searched: e.g. in `/opt/homebrew/Cellar/openjdk/23/libexec/openjdk.jdk/Contents/Home/bin` running `ln -s /opt/homebrew/Cellar/c-blosc/1.21.6/lib/libblosc.dylib libblosc.dylib`. diff --git a/bia-converter-light/bia_converter_light/__init__.py b/bia-converter-light/bia_converter_light/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/bia-converter-light/bia_converter_light/cli.py b/bia-converter-light/bia_converter_light/cli.py deleted file mode 100644 index 5bbf4445..00000000 --- a/bia-converter-light/bia_converter_light/cli.py +++ /dev/null @@ -1,315 +0,0 @@ -from typing import List, Union -import csv -from pathlib import Path -from uuid import UUID -import typer -from typing_extensions import Annotated - -from bia_shared_datamodels.semantic_models import ImageRepresentationUseType -from bia_integrator_api.exceptions import NotFoundException -from bia_integrator_api import PrivateApi -from bia_assign_image.cli import assign as assign_image -from bia_converter_light.config import api_client -from bia_converter_light.utils import save_to_api -from bia_converter_light.conversion import ( - convert_to_zarr, - convert_to_png, -) - -from bia_converter_light.propose_utils import ( - write_convertible_file_references_for_accession_id, -) - -import logging -from rich.logging import RichHandler - -from bia_shared_datamodels import uuid_creation - -app = typer.Typer() - - -logging.basicConfig( - level=logging.INFO, format="%(message)s", handlers=[RichHandler(show_time=False)] -) - -# Set default page size for API queries -DEFAULT_PAGE_SIZE = 10000 - -logger = logging.getLogger() - -representations_app = typer.Typer() -app.add_typer( - representations_app, - name="representations", - help="Create specified representations", -) - - -def validate_propose_inputs( - accession_ids: list[str] = None, accession_ids_path: Path = None -) -> None: - """Validate that only one of accession_ids or file_path is provided.""" - if accession_ids and accession_ids_path: - typer.echo( - "Error: Provide either a list of accession IDs or a file path, not both.", - err=True, - ) - raise typer.Exit(code=1) - if not accession_ids and not accession_ids_path: - typer.echo( - "Error: You must provide either a list of accession IDs or a file path.", - err=True, - ) - raise typer.Exit(code=1) - - -def ensure_assigned( - accession_id: str, image_uuid: str, file_reference_uuid: str -) -> None: - """Ensure Image and corresponding UPLOADED_BY_USER representation exist""" - try: - api_client.get_image(image_uuid) - except NotFoundException: - logger.warning( - f"Could not find Image with uuid {image_uuid}. Attempting creation" - ) - assign_image( - accession_id, - [ - file_reference_uuid, - ], - "api", - ) - - -def get_conversion_details(conversion_details_path: Path) -> List[dict]: - with conversion_details_path.open("r") as fid: - field_names = [ - "accession_id", - "study_uuid", - "file_path", - "file_reference_uuid", - "size_in_bytes", - "size_human_readable", - ] - reader = csv.DictReader(fid, fieldnames=field_names, delimiter="\t") - # Some files may not have header, so check first row - first_row = next(reader) - conversion_details = [row for row in reader] - if first_row.get("accession_id") != "accession_id": - conversion_details.insert(0, first_row) - - return conversion_details - - -def convert_file_reference_to_image_representation( - accession_id: str, - file_reference_uuid: str, - use_type: ImageRepresentationUseType, - verbose: bool = False, -) -> None: - """Convert file ref to image rep of use type. Upload to s3 - - Create the actual image for the image representation and stage to S3, - and persist the image representation in the API. - - This function is only temporary whilst the API image conversion - using the API is being developed - """ - - if verbose: - logger.setLevel(logging.DEBUG) - - assert isinstance( - api_client, PrivateApi - ), f"Expected valid instance of . Got : {type(api_client)} - are your API credentials valid and/or is the API server online?" - - bia_images = api_client.get_image_linking_file_reference( - file_reference_uuid, page_size=DEFAULT_PAGE_SIZE - ) - n_bia_images = len(bia_images) - assert ( - n_bia_images < 2 - ), f"Expected one image to be associated with file reference uuid {file_reference_uuid}. Got {n_bia_images}: {bia_images}. Not sure what to do!!!" - if n_bia_images == 1: - bia_image = bia_images[0] - image_uuid = f"{bia_image.uuid}" - ensure_assigned(accession_id, image_uuid, file_reference_uuid) - else: - image_uuid = uuid_creation.create_image_uuid( - [ - file_reference_uuid, - ] - ) - image_uuid = str(image_uuid) - ensure_assigned(accession_id, image_uuid, file_reference_uuid) - bia_image = api_client.get_image(image_uuid) - file_reference = api_client.get_file_reference(file_reference_uuid) - - if use_type == ImageRepresentationUseType.INTERACTIVE_DISPLAY: - return convert_to_zarr(accession_id, file_reference, bia_image) - elif use_type in ( - ImageRepresentationUseType.THUMBNAIL, - ImageRepresentationUseType.STATIC_DISPLAY, - ): - return convert_to_png(accession_id, file_reference, bia_image, use_type) - else: - logger.warning( - f"Cannot create/convert images for image representation of type: {use_type.value} - exiting" - ) - return - - -def update_example_image_uri( - representation_uuid: Union[UUID, str], - verbose: bool = False, -) -> bool: - # pdb.set_trace() - try: - representation = api_client.get_image_representation(representation_uuid) - except Exception as e: - # raise(e) - logger.error(f"Could not retrieve image representation. Error was {e}.") - return False - if representation.use_type == ImageRepresentationUseType.STATIC_DISPLAY: - image = api_client.get_image(representation.representation_of_uuid) - dataset = api_client.get_dataset(image.submission_dataset_uuid) - dataset.example_image_uri.append(representation.file_uri[0]) - save_to_api( - [ - dataset, - ] - ) - - logger.info( - f"Updated example image uri of dataset {dataset.uuid} to {dataset.example_image_uri}" - ) - return True - else: - logger.warning( - f"Cannot update dataset example image uri when image representation use type is {representation.use_type.value}" - ) - return False - - -@app.command() -def update_example_image_uri_for_dataset( - representation_uuid: Annotated[ - str, - typer.Argument(help="UUID for a STATIC_DISPLAY representation of the dataset"), - ], - # TODO: Have a 'mode' option to allow replace, prepend or append - verbose: Annotated[bool, typer.Option("-v")] = False, -): - update_example_image_uri(representation_uuid, verbose) - - -@app.command() -def convert_image( - accession_ids: Annotated[ - List[str], typer.Option("--accession-ids", "-a", help="Accession ID(s).") - ] = ["all"], - conversion_details_path: Annotated[ - Path, - typer.Option( - "--conversion-details-path", - "-c", - exists=True, - help="Path to tsv file containing details needed for conversion (produced by 'propose' command).", - ), - ] = None, -): - """Convert file references to image representations""" - # The convention is to create - # i) INTERACTIVE_DISPLAY - # ii) THUMBNAIL - # iii) If first image for accession ID STATIC_DISPLAY - conversion_details = get_conversion_details(conversion_details_path) - if accession_ids == ["all"]: - set_accession_ids = {cd["accession_id"] for cd in conversion_details} - accession_ids = list(set_accession_ids) - accession_ids.sort() - else: - # Filter conversion details for accession IDs to process - conversion_details_temp = [ - cd for cd in conversion_details if cd["accession_id"] in accession_ids - ] - conversion_details = conversion_details_temp - - accession_ids_with_static_display = set() - for conversion_detail in conversion_details: - accession_id = conversion_detail["accession_id"] - file_reference_uuid = conversion_detail["file_reference_uuid"] - for use_type in ( - ImageRepresentationUseType.INTERACTIVE_DISPLAY, - ImageRepresentationUseType.THUMBNAIL, - ): - convert_file_reference_to_image_representation( - accession_id, - file_reference_uuid, - use_type, - ) - if accession_id not in accession_ids_with_static_display: - # Get STATIC_DISPLAY - convert_file_reference_to_image_representation( - accession_id, - file_reference_uuid, - ImageRepresentationUseType.STATIC_DISPLAY, - ) - accession_ids_with_static_display.add(accession_id) - - -@app.command() -def propose( - accession_ids: Annotated[ - List[str], typer.Option("--accession-ids", "-a", help="Accession ID(s).") - ] = None, - accession_ids_path: Annotated[ - Path, - typer.Option( - "--accession-ids-path", - "-p", - exists=True, - help="Path to a file containing accession IDs one per line.", - ), - ] = None, - max_items: Annotated[int, typer.Option()] = 5, - output_path: Annotated[Path, typer.Option()] = None, - append: Annotated[bool, typer.Option("--append/--no-append")] = True, -): - """Propose images to convert""" - - # TODO: Make this output yaml in form of bia-converter - # TODO: Write test - - # Get accession IDs - validate_propose_inputs(accession_ids, accession_ids_path) - if accession_ids_path: - accession_ids = [a for a in accession_ids_path.read_text().strip().split("\n")] - - if not output_path: - output_path = Path(__file__).parent.parent / "file_references_to_convert.tsv" - if output_path.exists(): - assert output_path.is_file() - if not append: - output_path.unlink() - - for accession_id in accession_ids: - n_lines_written = write_convertible_file_references_for_accession_id( - accession_id, - output_path, - max_items, - append=True, - ) - logger.info( - f"Written {n_lines_written} proposals to {output_path} for {accession_id}" - ) - - -@app.callback() -def main() -> None: - return - - -if __name__ == "__main__": - app() diff --git a/bia-converter-light/bia_converter_light/config.py b/bia-converter-light/bia_converter_light/config.py deleted file mode 100644 index caa3cc1a..00000000 --- a/bia-converter-light/bia_converter_light/config.py +++ /dev/null @@ -1,60 +0,0 @@ -from pathlib import Path -import os -import logging - -from pydantic import Field -from pydantic_settings import BaseSettings, SettingsConfigDict - -from bia_integrator_api.util import get_client_private, get_client - -logger = logging.getLogger("__main__." + __name__) - -default_output_base = ( - f"{Path(os.environ.get('HOME', '')) / '.cache' / 'bia-integrator-data-sm'}" -) - - -class Settings(BaseSettings): - model_config = SettingsConfigDict( - env_file=f"{Path(__file__).parent.parent / '.env'}", - env_file_encoding="utf-8", - case_sensitive=False, - # extra="forbid", - ) - - bia_data_dir: str = Field(default_output_base) - endpoint_url: str = Field("https://uk1s3.embassy.ebi.ac.uk") - bucket_name: str = Field("bia-integrator-data") - cache_root_dirpath: Path = Field(Path.home() / ".cache" / "bia-converter") - bioformats2raw_java_home: str = Field("") - bioformats2raw_bin: str = Field("") - bia_api_basepath: str = Field( - "http://localhost:8080", json_schema_extra={"env": "BIA_API_BASEPATH"} - ) - bia_api_username: str = Field( - "test@example.com", json_schema_extra={"env": "BIA_API_USERNAME"} - ) - bia_api_password: str = Field("test", json_schema_extra={"env": "BIA_API_PASSWORD"}) - - -settings = Settings() - -try: - api_client = get_client_private( - username=settings.bia_api_username, - password=settings.bia_api_password, - api_base_url=settings.bia_api_basepath, - ) -except Exception as e: - message = f"Could not initialise private api_client: {e}" - logger.warning(message) - api_client = None - -try: - read_only_client = get_client( - api_base_url=settings.bia_api_basepath, - ) -except Exception as e: - message = f"Could not initialise public api_client: {e}" - logger.warning(message) - api_client = None diff --git a/bia-converter-light/bia_converter_light/conversion.py b/bia-converter-light/bia_converter_light/conversion.py deleted file mode 100644 index 91363810..00000000 --- a/bia-converter-light/bia_converter_light/conversion.py +++ /dev/null @@ -1,219 +0,0 @@ -# All code in this module originate from bia-converter/bia_converter/io.py -import logging -import subprocess -from uuid import UUID -from pathlib import Path - - -from bia_converter_light.config import settings, api_client -from bia_converter_light.io import stage_fileref_and_get_fpath, copy_local_to_s3 -from bia_converter_light import utils -from bia_shared_datamodels import bia_data_model, semantic_models -from bia_assign_image import image_representation -from bia_converter_light.rendering import generate_padded_thumbnail_from_ngff_uri -from bia_converter_light.utils import save_to_api - -logger = logging.getLogger(__name__) -DEFAULT_PAGE_SIZE = 10000 - - -def run_zarr_conversion(input_fpath, output_dirpath): - """Convert the local file at input_fpath to Zarr format, in a directory specified by - output_dirpath""" - - zarr_cmd = f'export JAVA_HOME={settings.bioformats2raw_java_home} && {settings.bioformats2raw_bin} "{input_fpath}" "{output_dirpath}"' - - logger.info(f"Converting with {zarr_cmd}") - - retval = subprocess.run( - zarr_cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE - ) - assert ( - retval.returncode == 0 - ), f"Error converting to zarr: {retval.stderr.decode('utf-8')}" - - -def cached_convert_to_zarr_and_get_fpath(representation, input_fpath): - zarr_fpath = get_local_path_to_zarr(representation.uuid) - dst_dir_basepath = zarr_fpath.parent - dst_dir_basepath.mkdir(exist_ok=True, parents=True) - - if not zarr_fpath.exists(): - run_zarr_conversion(input_fpath, zarr_fpath) - - return zarr_fpath - - -def get_local_path_to_zarr(image_representation_uuid: str | UUID) -> Path: - return ( - settings.cache_root_dirpath / "zarr" / f"{image_representation_uuid}.ome.zarr" - ) - - -def convert_to_zarr( - accession_id: str, - file_reference: bia_data_model.FileReference, - image: bia_data_model.Image, -) -> bia_data_model.ImageRepresentation: - """Create zarr image of file reference""" - - local_path_to_uploaded_by_submitter_rep = stage_fileref_and_get_fpath( - file_reference - ) - - # Check if representation already exists -> update. Otherwise, create. - all_representations_for_image = api_client.get_image_representation_linking_image( - str(image.uuid), page_size=DEFAULT_PAGE_SIZE - ) - representations = [ - r - for r in all_representations_for_image - if r.use_type == semantic_models.ImageRepresentationUseType.INTERACTIVE_DISPLAY - ] - n_representations = len(representations) - assert ( - n_representations < 2 - ), f"Expected one interactive display to be associated with image {image.uuid}. Got {n_representations}: {representations}. Not sure what to do!!!" - if n_representations == 1: - representation = representations[0] - else: - representation = image_representation.get_image_representation( - accession_id, - [ - file_reference, - ], - image, - semantic_models.ImageRepresentationUseType.INTERACTIVE_DISPLAY, - ) - local_path_to_zarr = cached_convert_to_zarr_and_get_fpath( - representation, - local_path_to_uploaded_by_submitter_rep, - ) - pixel_metadata = utils.get_ome_zarr_pixel_metadata(str(local_path_to_zarr)) - - # When converting for SAB in August 2024, some images returned tuples in metadata for XYZCT. - def _format_pixel_metadata(key): - value = pixel_metadata.pop(key, None) - if isinstance(value, tuple): - value = value[0] - if isinstance(value, str): - value = int(value) - return value - - representation.size_x = _format_pixel_metadata("SizeX") - representation.size_y = _format_pixel_metadata("SizeY") - representation.size_z = _format_pixel_metadata("SizeZ") - representation.size_c = _format_pixel_metadata("SizeC") - representation.size_t = _format_pixel_metadata("SizeT") - - attributes_from_ome = { - "name": "attributes_from_bioformat2raw_conversion", - "provenance": semantic_models.AttributeProvenance.bia_conversion, - "value": pixel_metadata, - } - representation.attribute.append( - semantic_models.Attribute.model_validate(attributes_from_ome) - ) - - representation.image_format = ".ome.zarr" - file_uri = copy_local_to_s3( - local_path_to_zarr, - utils.create_s3_uri_suffix_for_image_representation( - accession_id, representation - ), - ) - representation.file_uri = [ - file_uri + "/0", - ] - save_to_api( - [ - representation, - ] - ) - message = f"Converted uploaded by submitter to ome.zarr and uploaded to S3: {representation.file_uri}" - logger.info(message) - - return representation - - -def convert_to_png( - accession_id: str, - file_reference: bia_data_model.FileReference, - image: bia_data_model.Image, - use_type: semantic_models.ImageRepresentationUseType, -) -> bia_data_model.ImageRepresentation: - """Create png image of file reference""" - - # Check for interactive display representation (ome.zarr) - # This has to exist before we can generate thumbnails/static display - all_representations_for_image = api_client.get_image_representation_linking_image( - str(image.uuid), page_size=DEFAULT_PAGE_SIZE - ) - representations = [ - r - for r in all_representations_for_image - if r.use_type == semantic_models.ImageRepresentationUseType.INTERACTIVE_DISPLAY - ] - n_representations = len(representations) - assert ( - n_representations == 1 - ), f"Need exactly one interactive display to be associated with image {image.uuid}. For generation of {use_type.value} representation. Got {n_representations}: {representations}." - interactive_image_representation = representations[0] - - # Check for local path to zarr and use if it exists - local_path_to_zarr = get_local_path_to_zarr(interactive_image_representation.uuid) - if local_path_to_zarr.exists(): - source_uri = f"{local_path_to_zarr / '0'}" - logger.info( - f"Cached version of required ome.zarr exists locally at {source_uri}. Using this instead of S3 version" - ) - else: - source_uri = interactive_image_representation.file_uri[0] - logger.info( - f"No cached version of required ome.zarr exists locally. Using {source_uri}" - ) - - # create image - if use_type == semantic_models.ImageRepresentationUseType.THUMBNAIL: - dims = (256, 256) - else: - dims = (512, 512) - - representation = image_representation.get_image_representation( - accession_id, - [ - file_reference, - ], - image, - use_type, - ) - created_image = generate_padded_thumbnail_from_ngff_uri(source_uri, dims=dims) - created_image_path = utils.get_local_path_for_representation( - representation.uuid, ".png" - ) - with created_image_path.open("wb") as fh: - created_image.save(fh) - logger.info( - f"Saved {representation.use_type} representation to {created_image_path}" - ) - - # upload to s3 - representation.image_format = ".png" - s3_uri = utils.create_s3_uri_suffix_for_image_representation( - accession_id, representation - ) - file_uri = copy_local_to_s3(created_image_path, s3_uri) - - # update representation - representation.file_uri = [ - file_uri, - ] - save_to_api( - [ - representation, - ] - ) - message = f"Created {representation.use_type} image and uploaded to S3: {representation.file_uri}" - logger.info(message) - - return representation diff --git a/bia-converter-light/bia_converter_light/io.py b/bia-converter-light/bia_converter_light/io.py deleted file mode 100644 index a3a5ee44..00000000 --- a/bia-converter-light/bia_converter_light/io.py +++ /dev/null @@ -1,119 +0,0 @@ -# All code in this module originate from bia-converter/bia_converter/io.py - -from pathlib import Path -import urllib -import logging -import shutil -import subprocess - -import requests - -from .config import settings -from .utils import get_image_extension - - -logger = logging.getLogger(__name__) - - -def upload_dirpath_as_zarr_image_rep(src_dirpath, accession_id, image_id): - dst_prefix = f"{settings.bucket_name}/{accession_id}/{image_id}/{image_id}.zarr" - logger.info(f"Uploading with prefix {dst_prefix}") - cmd = f'aws --region us-east-1 --endpoint-url {settings.endpoint_url} s3 sync "{src_dirpath}/" s3://{dst_prefix} --acl public-read' - logger.info(f"Uploading using command {cmd}") - subprocess.run(cmd, shell=True) - - uri = f"{settings.endpoint_url}/{settings.bucket_name}/{accession_id}/{image_id}/{image_id}.zarr" - - return uri - - -def copy_uri_to_local(src_uri: str, dst_fpath: Path): - """Copy the object at the given source URI to the local path specified by dst_fpath.""" - - logger.info(f"Fetching {src_uri} to {dst_fpath}") - - with requests.get(src_uri, stream=True) as r: - r.raise_for_status() - with open(dst_fpath, "wb") as fh: - shutil.copyfileobj(r.raw, fh) - - -def copy_local_to_s3(src_fpath: Path, dst_key: str) -> str: - """Copy the local file with the given path to the S3 location for which the endpoint - and bucket are described in the global Config object, and the destination key is - passed as an argument. - - Returns: URI of uploaded object.""" - - endpoint_url = settings.endpoint_url - bucket_name = settings.bucket_name - - recursive = "--recursive" if src_fpath.is_dir() else "" - cmd = f"aws --region us-east-1 --endpoint-url {settings.endpoint_url} s3 cp {recursive} {src_fpath} s3://{bucket_name}/{dst_key} --acl public-read" - logger.info(f"Uploading {src_fpath} to {dst_key}") - subprocess.run(cmd, shell=True) - - return f"{endpoint_url}/{bucket_name}/{dst_key}" - - -def fetch_fileref_to_local(fileref, dst_fpath, max_retries=3): - # TODO: Clarify if 'format' represents old 'type' e.g. fire_object, file_in_zip etc. - # if fileref.type == "file_in_zip": - if fileref.format == "file_in_zip": - raise NotImplementedError - else: - # Ensure uri is encoded - fileref_uri = urllib.parse.quote(fileref.uri, safe="/:") - # Check size after download and retry if necessary - expected_size = ( - requests.header(fileref_uri)["content-length"] - if fileref.size_in_bytes == 0 - else fileref.size_in_bytes - ) - for attempt in range(1, max_retries + 1): - try: - copy_uri_to_local(fileref_uri, dst_fpath) - download_size = dst_fpath.stat().st_size - if download_size == expected_size: - break - - logger.warning( - f"Download attempt {attempt} did not give expected size. Got {download_size} expected {expected_size}" - ) - if attempt >= max_retries: - raise Exception( - f"{attempt} download attempt(s) did not give expected size. Got {download_size} expected {expected_size}. Maximum retries reached" - ) - except requests.exceptions.HTTPError as download_error: - if attempt >= max_retries: - logger.error( - f"Download attempt {attempt} resulted in error: {download_error} - exiting" - ) - raise download_error - - -# ToDo add max_retries as parameter to function definition -def stage_fileref_and_get_fpath(fileref) -> Path: - cache_dirpath = settings.cache_root_dirpath / "files" - cache_dirpath.mkdir(exist_ok=True, parents=True) - - # suffix = Path(urlparse(fileref.file_path).path).suffix - suffix = get_image_extension(fileref.file_path) - dst_fname = f"{fileref.uuid}{suffix}" - dst_fpath = cache_dirpath / dst_fname - logger.info(f"Checking cache for {fileref.file_path}") - - if not dst_fpath.exists(): - logger.info(f"File not in cache. Downloading file to {dst_fpath}") - fetch_fileref_to_local(fileref, dst_fpath) - elif dst_fpath.stat().st_size != fileref.size_in_bytes: - # ToDo: As of 04/12/2023 filerefs for type file_in_zip have size_in_bytes=0 - # Need to modify index_from_zips to get filesize info - logger.info( - f"File in cache with size {dst_fpath.stat().st_size}. Expected size={fileref.size_in_bytes}. Downloading again to {dst_fpath}" - ) - fetch_fileref_to_local(fileref, dst_fpath) - else: - logger.info(f"File exists at {dst_fpath}") - - return dst_fpath diff --git a/bia-converter-light/bia_converter_light/omezarrmeta.py b/bia-converter-light/bia_converter_light/omezarrmeta.py deleted file mode 100644 index 4a2aa109..00000000 --- a/bia-converter-light/bia_converter_light/omezarrmeta.py +++ /dev/null @@ -1,86 +0,0 @@ -from typing import List, Optional - -from pydantic import BaseModel - - -class RDefs(BaseModel): - defaultT: int - model: str - defaultZ: int - - -class Window(BaseModel): - min: float - max: float - start: float - end: float - - -class Channel(BaseModel): - color: str - coefficient: int - active: bool - label: str - window: Window - family: Optional[str] = None - inverted: Optional[bool] = None - - -class Omero(BaseModel): - rdefs: RDefs - channels: List[Channel] - - -class CoordinateTransformation(BaseModel): - scale: List[float] - type: str - - -class DataSet(BaseModel): - path: str - coordinateTransformations: Optional[List[CoordinateTransformation]] = None - - -class MSMetadata(BaseModel): - method: str - version: str - - -class Axis(BaseModel): - name: str - type: str - unit: Optional[str] = None - - -class MultiScaleImage(BaseModel): - datasets: List[DataSet] - metadata: Optional[MSMetadata] = None - axes: Optional[List[Axis]] = None - version: str - - -class Column(BaseModel): - name: str - - -class Row(BaseModel): - name: str - - -class Well(BaseModel): - columnIndex: int - path: str - rowIndex: int - - -class Plate(BaseModel): - columns: List[Column] - rows: List[Row] - wells: List[Well] - version: str - - -class ZMeta(BaseModel): - omero: Optional[Omero] = None - multiscales: Optional[List[MultiScaleImage]] = [] - plates: Optional[Plate] = None diff --git a/bia-converter-light/bia_converter_light/propose_utils.py b/bia-converter-light/bia_converter_light/propose_utils.py deleted file mode 100644 index dae3fe42..00000000 --- a/bia-converter-light/bia_converter_light/propose_utils.py +++ /dev/null @@ -1,160 +0,0 @@ -"""Functions to allow proposing images to convert - -Propose file references to convert by sorting based on size, -partitioning into n groups and randomly selecting one -file reference from each group -""" - -import math -import random -from typing import List, Dict -from pathlib import Path -from bia_converter_light.config import read_only_client -from bia_converter_light.utils import in_bioformats_single_file_formats_list - - -def select_indicies(n_indicies: int, n_to_select: int = 5) -> list[int]: - """Select a number of indicies from input list - - Select a number of indicies from input list. Split list into - n_to_select chunks and randomly select an index from each chunk - """ - - # Seed to allow reproducibility on repeated runs. - # Note: Only applies to selections after 23/12/2024 - random.seed(42) - - if n_indicies <= n_to_select: - return list(range(n_indicies)) - - min_per_chunk = math.floor(n_indicies / n_to_select) - remainder = n_indicies % n_to_select - selected_indicies = [] - stop = -1 - for i in range(n_to_select): - n_per_chunk = min_per_chunk - if remainder > 0 and i < remainder: - n_per_chunk += 1 - start = stop + 1 - stop = start + n_per_chunk - 1 - selected_index = random.randint(start, stop) - selected_indicies.append(selected_index) - return selected_indicies - - -def count_lines(file_path): - with open(file_path, "r") as file: - return sum(1 for _ in file) - - -def read_specific_line(file_path, line_number): - with open(file_path, "r") as file: - for current_line_number, line in enumerate(file, start=0): - if current_line_number == line_number: - return line # .strip() - return None # If the line number is beyond the end of the file - - -def sizeof_fmt(num, suffix="B"): - for unit in ["", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi"]: - if abs(num) < 1024.0: - return f"{num:3.1f}{unit}{suffix}" - num /= 1024.0 - return f"{num:.1f}Yi{suffix}" - - -def get_convertible_file_references(accession_id: str) -> List[Dict]: - """Get details of convertible images for given accession ID""" - - # ToDo: Fix this to recursively call using until all data returned - PAGE_SIZE_DEFAULT = 10000000 - - study = read_only_client.search_study_by_accession(accession_id) - if not study: - return [] - datasets = read_only_client.get_dataset_linking_study( - study.uuid, page_size=PAGE_SIZE_DEFAULT - ) - file_references = [] - for dataset in datasets: - file_references.extend( - read_only_client.get_file_reference_linking_dataset( - dataset.uuid, PAGE_SIZE_DEFAULT - ) - ) - - convertible_file_references = [ - { - "accession_id": accession_id, - "study_uuid": study.uuid, - "name": fr.file_path, - "uuid": fr.uuid, - "size_in_bytes": fr.size_in_bytes, - "size_human_readable": sizeof_fmt(fr.size_in_bytes), - } - for fr in file_references - if in_bioformats_single_file_formats_list(fr.file_path) - ] - - convertible_file_references = sorted( - convertible_file_references, - key=lambda fr: (fr["size_in_bytes"], fr["name"]), - reverse=True, - ) - return convertible_file_references - - -def write_convertible_file_references_for_accession_id( - accession_id: str, - output_path: Path, - max_items: int = 5, - append: bool = True, -) -> int: - """ - Write details of file references proposed for conversion to file - """ - - convertible_file_references = get_convertible_file_references(accession_id) - - n_proposal_candidates = len(convertible_file_references) - indicies_to_select = select_indicies(n_proposal_candidates, max_items) - - if append: - open_text_mode = "a" - else: - open_text_mode = "w" - - lines = [ - "\t".join( - [ - convertible_file_references[i]["accession_id"], - f"{convertible_file_references[i]['study_uuid']}", - convertible_file_references[i]["name"], - f"{convertible_file_references[i]['uuid']}", - f"{convertible_file_references[i]['size_in_bytes']}", - convertible_file_references[i]["size_human_readable"], - ] - ) - for i in indicies_to_select - ] - with output_path.open(open_text_mode) as fid: - # If we are at start of file write header. - if fid.tell() == 0: - fid.writelines( - "\t".join( - [ - "accession_id", - "study_uuid", - "name", - "file_reference_uuid", - "size_in_bytes", - "size_human_readable", - ] - ) - ) - fid.writelines("\n") - fid.writelines("\n".join(lines)) - # Write a new line so next append starts on next line - fid.writelines("\n") - - return len(indicies_to_select) diff --git a/bia-converter-light/bia_converter_light/rendering.py b/bia-converter-light/bia_converter_light/rendering.py deleted file mode 100644 index a21224f6..00000000 --- a/bia-converter-light/bia_converter_light/rendering.py +++ /dev/null @@ -1,487 +0,0 @@ -import logging -from typing import Dict, List, Optional - -import zarr -import numpy as np -import dask.array as da -from PIL import Image, ImageOps -from pydantic import BaseModel -from microfilm.colorify import multichannel_to_rgb -from matplotlib.colors import LinearSegmentedColormap - -from urllib.parse import urlparse -import s3fs - -# from bia_integrator_tools.utils import get_ome_ngff_rep_by_accession_and_image -from .omezarrmeta import ZMeta - -logger = logging.getLogger("__main__." + __name__) - -DEFAULT_COLORS = [[1, 0, 0], [0, 1, 0], [0, 0, 1], [0, 1, 1], [1, 0, 1], [1, 1, 0]] - - -class ChannelRenderingSettings(BaseModel): - """Rendering settings for a specific channel.""" - - label: Optional[str] = "" - colormap_start: List[float] = [0.0, 0.0, 0.0] - colormap_end: List[float] = None - window_start: Optional[int] = None - window_end: Optional[int] = None - - -class RenderingInfo(BaseModel): - """Rending settings for a whole image.""" - - channel_renders: List[ChannelRenderingSettings] - default_z: Optional[int] = None - default_t: Optional[int] = None - - -class NGFFProxyImage(object): - """Helper class for working with remove NGFF images to allow us to access - size properties of that image, and fetch multiscale data with specific - resolutions.""" - - def __init__(self, uri): - self.uri = uri.rstrip("/0") - self.zgroup = open_zarr_wrapper(self.uri) - self.array_paths = [] - try: - self.zgroup.visititems(self._get_array_paths) - except Exception as e: - print( - f"Exception {e} when trying to get array_paths. Setting array_paths to ['0,']" - ) - - if len(self.array_paths) == 0: - self.array_paths = [ - "0", - ] - - self.ngff_metadata = ZMeta.parse_obj(self.zgroup.attrs.asdict()) - self._init_darray() - - def _get_array_paths(self, name, obj): - """Get the paths of groups containing array data""" - if not obj: - return None - if "Array" in obj.__str__(): - self.array_paths.append(name) - elif len(self.array_paths) > 0: - # We terminate once we have array paths for a subgroup to - # prevent recursively traversing groups which may take a while - # especially when a store has a large number of groups - return obj - return None - - # @classmethod - # def from_bia_accession_and_image_ids(cls, accession_id, image_id): - # ome_ngff_rep = get_ome_ngff_rep_by_accession_and_image(accession_id, image_id) - # return cls(ome_ngff_rep.uri) - - def _init_darray(self): - self.darray = dask_array_from_ome_ngff_uri(self.uri, self.array_paths[0]) - - # Try to get axes info from image metadata - if this goes wrong - # or if image is plate well, fallback to old method - try: - axes = self.ngff_metadata.multiscales[0].axes - if axes is not None and len(axes) > 2: - size_t, size_c, size_z, size_y, size_x = (1, 1, 1, 1, 1) - for index, axis in enumerate(axes): - # The conditional statements below could be avoided - # using eval(f"size_{axis.name} = self.darray.shape[index]") - if axis.name == "t": - size_t = self.darray.shape[index] - elif axis.name == "c": - size_c = self.darray.shape[index] - elif axis.name == "z": - size_z = self.darray.shape[index] - elif axis.name == "y": - size_y = self.darray.shape[index] - elif axis.name == "x": - size_x = self.darray.shape[index] - - self.size_t = size_t - self.size_c = size_c - self.size_z = size_z - self.size_y = size_y - self.size_x = size_x - return - else: - raise Exception("NGFF metadata ({axes}) less than 2 entries") - except Exception as e: - message = f"Could not get axes info from NGFF. Message was {e}. Falling back to old method" - logger.warning(message) - - # FIXME - this is not a reliable way to determine which dimensions are present in which - # order, we should be parsing the NGFF metadata to do this - - if len(self.darray.shape) == 5: - size_t, size_c, size_z, size_y, size_x = self.darray.shape - elif len(self.darray.shape) == 4: - size_t = 1 - size_c, size_z, size_y, size_x = self.darray.shape - elif len(self.darray.shape) == 3: - size_z, size_y, size_x = self.darray.shape - size_t = 1 - size_c = 1 - elif len(self.darray.shape) == 2: - size_y, size_x = self.darray.shape - size_z = 1 - size_t = 1 - size_c = 1 - else: - raise Exception("Can't handle this array shape") - - self.size_t = size_t - self.size_c = size_c - self.size_z = size_z - self.size_y = size_y - self.size_x = size_x - - def get_dask_array_with_min_dimensions(self, dims): - ydim, xdim = dims - # path_keys = [dataset.path for dataset in self.ngff_metadata.multiscales[0].datasets] - path_keys = self.array_paths - - for path_key in reversed(path_keys): - zarr_array = self.zgroup[path_key] - if len(zarr_array.shape) >= 2: - size_y, size_x = zarr_array.shape[-2:] - else: - raise Exception("Can't handle this array shape") - - if (size_y >= ydim) and (size_x >= xdim): - break - - return da.from_zarr(zarr_array) - - @property - def all_sizes(self): - path_keys = [ - dataset.path for dataset in self.ngff_metadata.multiscales[0].datasets - ] - - for path_key in path_keys: - zarr_array = self.zgroup[path_key] - yield zarr_array.shape - - -class BoundingBox2DRel(BaseModel): - """Bounding box within a plane, described in relative coordniates such that - 1.0 is the full width/height of the plane image.""" - - x: float - y: float - xsize: float - ysize: float - - -class BoundingBox2DAbs(BaseModel): - """Bounding box within a plane, described in absolute coordinates.""" - - x: int - y: int - xsize: int - ysize: int - - -class PlaneRegionSelection(BaseModel): - """A 2D rectangular region.""" - - t: int - z: int - c: int - bb: BoundingBox2DRel - - -class RenderingView(BaseModel): - """A view of a BIAImage that should provide settings to produce a 2D image. - - Used for, e.g., generating thumbnails or example images.""" - - t: int = 0 - z: int = 0 - region: Optional[PlaneRegionSelection] - - channel_rendering: Dict[int, ChannelRenderingSettings] - - -def open_zarr_wrapper(uri): - """Wrapper using s3fs to open a S3 zarr or normal method for file zarr""" - - if uri.startswith("http"): - uri_parts = urlparse(uri) - endpoint_url = f"{uri_parts.scheme}://{uri_parts.netloc}" - s3_bucket = f"s3:/{uri_parts.path}" - fs = s3fs.S3FileSystem(anon=True, client_kwargs={"endpoint_url": endpoint_url}) - return zarr.open(s3fs.S3Map(s3_bucket, s3=fs), mode="r", path=r"/") - else: - return zarr.open(uri) - - -def scale_to_uint8(array): - """Given an input array, convert to uint8, including scaling to fill the - 0-255 range. - - Primarily used to convert general numpy arrays into an image rendering - suitable dtype.""" - - scaled = array.astype(np.float32) - - if scaled.max() - scaled.min() == 0: - return np.zeros(array.shape, dtype=np.uint8) - - scaled = 255 * (scaled - scaled.min()) / (scaled.max() - scaled.min()) - - return scaled.astype(np.uint8) - - -def apply_window(array, window_start, window_end): - """Apply a windowing function to the given array, values above or below - the window are clipped to the edges, and the range is scaled to the - window range.""" - - scaled = (array - window_start) / (window_end - window_start) - clipped = np.clip(scaled, 0, 1) - - return clipped - - -def generate_channel_renderings(n_channels): - """Generate a list channel renderings for a number of channels.""" - - threemap_ends = [[1, 0, 0], [0, 1, 0], [0, 0, 1]] - - channel_renderings = { - n: ChannelRenderingSettings(colormap_end=colormap_end) - for n, colormap_end in enumerate(threemap_ends) - } - - return channel_renderings - - -def dask_array_from_ome_ngff_uri(uri, path_key="0"): - """Get a dask array from a specific OME-NGFF uri""" - - # zgroup = zarr.open(uri) - # fs = s3fs.S3FileSystem( - # anon=True, client_kwargs={"endpoint_url": "https://uk1s3.embassy.ebi.ac.uk"} - # ) - zgroup = open_zarr_wrapper(uri) - darray = da.from_zarr(zgroup[path_key]) - - return darray - - -def dask_array_from_ome_ngff_rep(ome_ngff_rep, path_key="0"): - """Get a dask array from an OME-NGFF image representation.""" - - zgroup = open_zarr_wrapper(ome_ngff_rep.uri) - darray = da.from_zarr(zgroup[path_key]) - - return darray - - -def pad_to_target_dims(im, target_dims, fill=(0, 0, 0)): - """Given a PIL Image and a set of target dimensions, pad the image so that - it fits those dimensions.""" - - w, h = im.size - - delta_w = target_dims[0] - w - delta_h = target_dims[1] - h - - padding = ( - delta_w // 2, - delta_h // 2, - delta_w - (delta_w // 2), - delta_h - (delta_h // 2), - ) - padded_im = ImageOps.expand(im, padding, fill=fill) - - return padded_im - - -def select_region_from_dask_array(darray, region): - """Select a single plane from a Dask array, and compute it.""" - - if len(darray.shape) >= 2: - ydim, xdim = darray.shape[-2:] - else: - raise Exception("Can't handle this array shape") - - # Select region - reduce size if number of elements greater than - # N_ELEMENTS_MAX, to avoid OOM errors. - # TODO: Discuss using different pyramid levels with MH - N_ELEMENTS_MAX = 16384 * 16384 - ymin = int(region.bb.y * ydim) - ymax = int((region.bb.y + region.bb.ysize) * ydim) - - xmin = int(region.bb.x * xdim) - xmax = int((region.bb.x + region.bb.xsize) * xdim) - n_elements = (ymax - ymin + 1) * (xmax - xmin + 1) - to_truncate = 100 - while n_elements > N_ELEMENTS_MAX: - ymin += to_truncate - ymax -= to_truncate - - xmin += to_truncate - xmax -= to_truncate - - n_elements = (ymax - ymin + 1) * (xmax - xmin + 1) - - if len(darray.shape) == 5: - return darray[region.t, region.c, region.z, ymin:ymax, xmin:xmax].compute() - elif len(darray.shape) == 4: - return darray[region.c, region.z, ymin:ymax, xmin:xmax].compute() - elif len(darray.shape) == 3: - return darray[region.z, ymin:ymax, xmin:xmax].compute() - elif len(darray.shape) == 2: - return darray[ymin:ymax, xmin:xmax].compute() - else: - raise Exception("Can't handle this array shape") - - -def render_multiple_2D_arrays(arrays, colormaps): - """Given a list of 2D arrays and a list of colormaps, apply each colormap - merge into a single 2D RGB image.""" - - imarray, _, _, _ = multichannel_to_rgb(arrays, colormaps) - im = Image.fromarray(scale_to_uint8(imarray)) - - return im - - -DEFAULT_BB = BoundingBox2DRel(x=0, y=0, xsize=1, ysize=1) - - -def render_proxy_image( - proxy_im, - bbrel=DEFAULT_BB, - dims=(512, 512), - t=None, - z=None, - csettings=None, - mode=None, -): - """In order to render a 2D plane we need to: - - 1. Lazy-load the image as a Dask array. - 2. Select the plane (single t and z values) we'll use. - 3. Separate channels. - 4. Apply a color map to each channel array. - 5. Merge the channel arrays.""" - - ydim, xdim = dims - - min_ydim_needed = ydim / bbrel.ysize - min_xdim_needed = xdim / bbrel.xsize - - darray = proxy_im.get_dask_array_with_min_dimensions( - (min_xdim_needed, min_ydim_needed) - ) - - if not t: - t = proxy_im.size_t // 2 - if not z: - z = proxy_im.size_z // 2 - - channels_to_render = min(proxy_im.size_c, len(DEFAULT_COLORS)) - if not mode: - if channels_to_render == 1: - mode = "grayscale" - elif channels_to_render == 3: - mode = "RGB" - else: - mode = "channels" - - if not csettings: - if mode == "grayscale": - csettings = { - n: ChannelRenderingSettings(colormap_end=[1, 1, 1]) - for n in range(channels_to_render) - } - else: - csettings = { - n: ChannelRenderingSettings(colormap_end=DEFAULT_COLORS[n]) - for n in range(channels_to_render) - } - - region_per_channel = { - c: PlaneRegionSelection(t=t, z=z, c=c, bb=bbrel) - for c in range(channels_to_render) - } - - channel_arrays = { - c: select_region_from_dask_array(darray, region) - for c, region in region_per_channel.items() - } - - for c, channel_array in channel_arrays.items(): - if csettings[c].window_end: - windowed_array = apply_window( - channel_array, csettings[c].window_start, csettings[c].window_end - ) - channel_arrays[c] = windowed_array - - # ToDo: Discuss whether to create global DEFAULT_COLORMAPS constant - # so we do not call create_linear_cmap_dict too many times... - # OR use hex values: https://stackoverflow.com/questions/38147997/how-to-change-a-linearsegmentedcolormap-to-a-different-distribution-of-color - colormaps = { - c: LinearSegmentedColormap( - f"n{n}", create_linear_cmap_dict([0, 0, 0], csetting.colormap_end) - ) - for n, (c, csetting) in enumerate(csettings.items()) - } - - im = render_multiple_2D_arrays(channel_arrays.values(), list(colormaps.values())) - - return im - - -def generate_padded_thumbnail_from_ngff_uri( - ngff_uri, dims=(256, 256), autocontrast=True -): - """Given a NGFF URI, generate a 2D thumbnail of the given dimensions.""" - - proxy_im = NGFFProxyImage(str(ngff_uri)) - - im = render_proxy_image(proxy_im) - im.thumbnail(dims) - im_rgb = im.convert("RGB") - - if autocontrast: - cim = ImageOps.autocontrast(im_rgb, (0, 1)) - else: - cim = im_rgb - - padded = pad_to_target_dims(cim, dims) - - return padded - - -def create_linear_cmap_dict( - start_rgb=[ - 0.0, - 0.0, - 0.0, - ], - end_rgb=[ - 1.0, - 1.0, - 1.0, - ], -): - """Return a colormap dict for 'segmentedData' of LinearSegmentedColormap""" - cdict = { - "red": None, - "green": None, - "blue": None, - } - for i, key in enumerate(cdict.keys()): - cdict[key] = [(0.0, start_rgb[i], start_rgb[i]), (1.0, end_rgb[i], end_rgb[i])] - - return cdict diff --git a/bia-converter-light/bia_converter_light/resources/bioformats_curated_file_formats_readme.txt b/bia-converter-light/bia_converter_light/resources/bioformats_curated_file_formats_readme.txt deleted file mode 100644 index 2e7dac28..00000000 --- a/bia-converter-light/bia_converter_light/resources/bioformats_curated_file_formats_readme.txt +++ /dev/null @@ -1,14 +0,0 @@ -Curated files last updated 21/05/2024 - -Notes on creating the curated files: - -1. Copy supported formats from https://bio-formats.readthedocs.io/en/stable/supported-formats.html -2. Paste into spreadsheet or text editor and get all extensions -3. Ensure extensions are unique and sorted -4. Manually curate into 'bioformats_curated_single_file_formats.txt' Which have 1-2-1 conversion with bioformats2raw and bioformats_curated_other_file_formats.txt which require more input for conversion (e.g. pattern files) - -The above steps can be accomplished in a browser developer console using the following js snippet (thanks to LA): - -[...new Set(Array.from(document.getElementsByTagName("tbody")[0].querySelectorAll("td:nth-child(2)")).map(el => el.innerText.split(",")).flat().filter(extension => extension.length))].sort() - -TODO: write python version of js snippet e.g. using selinium diff --git a/bia-converter-light/bia_converter_light/resources/bioformats_curated_other_file_formats.txt b/bia-converter-light/bia_converter_light/resources/bioformats_curated_other_file_formats.txt deleted file mode 100644 index d311ed78..00000000 --- a/bia-converter-light/bia_converter_light/resources/bioformats_curated_other_file_formats.txt +++ /dev/null @@ -1,66 +0,0 @@ -.afi -.apl -.avi -.c01 -.cfg -.csv -.dat -.db -.dcm -.dib -.dicom -.dv -.exp -.flex -.h5 -.hdr -.hed -.htd -.html -.ics -.ids -.l2d -.labels -.lei -.mdb -.mea -.mov -.mtb -.mvd2 -.nd -.ndpis -.nhdr -.nii.gz -.nrrd -.obf -.obsep -.oib -.oif -.ome -.ome.btf -.ome.tf2 -.ome.tf8 -.ome.tif -.ome.tiff -.ome.xml -.omp2info -.par -.pcoraw -.pds -.pic -.pnl -.r3d -.rcpnl -.res -.spc -.stk -.tnb -.txt -.vff -.vms -.vsi -.vws -.wpi -.xdce -.xml -.xys diff --git a/bia-converter-light/bia_converter_light/resources/bioformats_curated_single_file_formats.txt b/bia-converter-light/bia_converter_light/resources/bioformats_curated_single_file_formats.txt deleted file mode 100644 index 4c5918a8..00000000 --- a/bia-converter-light/bia_converter_light/resources/bioformats_curated_single_file_formats.txt +++ /dev/null @@ -1,120 +0,0 @@ -.1sc -.2fl -.acff -.afm -.aim -.al3d -.ali -.am -.amiramesh -.arf -.bif -.bin -.bip -.bmp -.btf -.ch5 -.cif -.cr2 -.crw -.cxd -.czi -.dm2 -.dm3 -.dm4 -.dti -.eps -.epsi -.fdf -.fff -.ffr -.fits -.fli -.frm -.gel -.gif -.grey -.hdf -.his -.hx -.i2i -.im3 -.img -.ims -.inr -.ipl -.ipm -.ipw -.j2k -.jp2 -.jpeg -.jpf -.jpg -.jpk -.jpx -.klb -.lif -.liff -.lim -.lms -.lof -.lsm -.map -.mnc -.mng -.mod -.mrc -.mrcs -.mrw -.msr -.naf -.nd2 -.ndpi -.nef -.nii -.oir -.pbm -.pcx -.pgm -.pict -.png -.ppm -.pr3 -.ps -.psd -.qptiff -.raw -.rec -.scn -.sdt -.seq -.sif -.sld -.sldy -.sm2 -.sm3 -.spe -.spi -.st -.stp -.svs -.sxm -.tf2 -.tf8 -.tfr -.tga -.tif -.tiff -tiff -.top -.v -.wat -.wav -.wlz -.xlef -.xqd -.xqf -.xv -.zfp -.zfr -.zvi diff --git a/bia-converter-light/bia_converter_light/utils.py b/bia-converter-light/bia_converter_light/utils.py deleted file mode 100644 index 7e08acbf..00000000 --- a/bia-converter-light/bia_converter_light/utils.py +++ /dev/null @@ -1,231 +0,0 @@ -from pydantic import BaseModel -from uuid import UUID -from pathlib import Path -from typing import Dict, List -import logging -import xml.etree.ElementTree as ET - -from pydantic.alias_generators import to_snake - -from bia_integrator_api.exceptions import NotFoundException -import bia_integrator_api.models as api_models -from bia_shared_datamodels import bia_data_model -from bia_converter_light.config import settings, api_client - -logger = logging.getLogger("__main__." + __name__) - - -def get_total_zarr_size(zarr_path: str) -> int: - """Return size of zarr archive in bytes""" - - # Assume the zarr store is a local disk - # TODO: Generalise for any uri (including file:// and s3://) - # TODO: so the argument name for this func should be 'zarr_uri' - zarr_path = Path(zarr_path) - return ( - sum(f.stat().st_size for f in zarr_path.rglob("*")) + zarr_path.stat().st_size - ) - - -single_file_formats_path = ( - Path(__file__).parent / "resources" / "bioformats_curated_single_file_formats.txt" -) -single_file_formats = [ - s for s in single_file_formats_path.read_text().split("\n") if len(s) > 0 -] - - -def extension_in_bioformats_single_file_formats_list(ext: str) -> bool: - if len(ext) > 1 and not ext[0] == ".": - ext = "." + ext - return ext in single_file_formats - - -def in_bioformats_single_file_formats_list(file_location: [Path | str]) -> bool: - """Check if ext of path/uri/name of file in bioformats single file formats list""" - ext = get_image_extension(f"{file_location}") - return extension_in_bioformats_single_file_formats_list(ext) - - -def get_ome_zarr_pixel_metadata(zarr_location: str) -> dict: - """Return pixel metadata entry of METADATA.ome.xml of bioformats2raw zarr""" - - # This function assumes the zarr has been created at specified location - # on disk and was produced by bioformats2raw -> OME/METADATA.ome.xml - # exists - # - # TODO: handle general uris e.g. https://, s3:// or file:// - metadata_path = Path(zarr_location) / "OME" / "METADATA.ome.xml" - if metadata_path.is_file(): - metadata = parse_xml_string(metadata_path.read_text()) - try: - image_metadata = metadata[ - "{http://www.openmicroscopy.org/Schemas/OME/2016-06}OME" - ][r"{http://www.openmicroscopy.org/Schemas/OME/2016-06}Image"] - - # Multichannel images may have a list - use first element - # TODO: Discuss with team what to do in this case - if isinstance(image_metadata, list): - image_metadata = image_metadata[0] - - pixel_metadata = image_metadata[ - "{http://www.openmicroscopy.org/Schemas/OME/2016-06}Pixels" - ] - except KeyError: - pixel_metadata = {} - else: - pixel_metadata = {} - - return pixel_metadata - - -# TODO: discuss replacing this function with something from either -# ome_zarr or ome_zarr_metadata -def parse_xml_string(xml_string: str) -> dict: - """ - Parse an XML string and convert it into a dictionary. - - This is intended to be used for OME/METADATA.ome.xml created - by bioformats2raw. - """ - - def _xml_to_dict(element: ET) -> dict: - """ - Convert XML element and children to a dict, including attributes. - """ - # Initialize the dictionary to store the element's data - result = {} - - # Include attributes in the result if they exist - if element.attrib: - result.update({k: v for k, v in element.attrib.items()}) - - # If the element has no children, return its text or result if there are attributes - if len(element) == 0: - return element.text if not result else result - - # Iterate over the children of the element - for child in element: - child_dict = _xml_to_dict(child) - - # Handle duplicate tags by storing them as a list - if child.tag in result: - if isinstance(result[child.tag], list): - result[child.tag].append(child_dict) - else: - result[child.tag] = [result[child.tag], child_dict] - else: - result[child.tag] = child_dict - - return result - - # Parse the XML string into an ElementTree - root = ET.fromstring(xml_string) - - # Convert the ElementTree into a dictionary - return {root.tag: _xml_to_dict(root)} - - -def create_s3_uri_suffix_for_image_representation( - accession_id: str, representation: bia_data_model.ImageRepresentation -) -> str: - """Create the part of the s3 uri that goes after the bucket name for an image representation""" - - assert representation.image_format and len(representation.image_format) > 0 - assert isinstance(representation.representation_of_uuid, UUID) or isinstance( - UUID(representation.representation_of_uuid), UUID - ) - return f"{accession_id}/{representation.representation_of_uuid}/{representation.uuid}{representation.image_format}" - - -def get_local_path_for_representation(uuid: [str | UUID], image_format: str) -> Path: - """Return path to local cache for this image representation""" - - if not image_format.startswith("."): - image_format = f".{image_format}" - cache_dirpath = settings.cache_root_dirpath / "other_converted_images" - cache_dirpath.mkdir(exist_ok=True, parents=True) - return cache_dirpath / f"{uuid}{image_format}" - - -def get_image_extension(file_path: str) -> str: - """Return standardized image extension for a given file path.""" - - # Process files with multi suffix extensions - multi_suffix_ext = { - ".ome.zarr.zip": ".ome.zarr.zip", - ".zarr.zip": ".zarr.zip", - ".ome.zarr": ".ome.zarr", - ".ome.tiff": ".ome.tiff", - ".ome.tif": ".ome.tiff", - ".tar.gz": ".tar.gz", - } - - for ext, mapped_value in multi_suffix_ext.items(): - if file_path.lower().endswith(ext): - return mapped_value - - # Standardise extensions expressed using different suffixes - ext_map = { - ".jpeg": ".jpg", - ".tif": ".tiff", - } - - ext = Path(file_path).suffix.lower() - if ext in ext_map: - return ext_map[ext] - else: - return ext - - -def merge_dicts(dict_list: List[Dict[str, str]]) -> Dict: - """Merge list of dicts to one dict. Values for repeated keys are put into lists - - Assumes all input dict values are strings as in function type hint - """ - - if not dict_list: - return {} - - merged_dict = dict_list[0] - - for dictionary in dict_list[1:]: - for key, value in dictionary.items(): - # If the key already exists in the merged dictionary - if key in merged_dict: - # If it's not already a list, convert the current value to a list - if not isinstance(merged_dict[key], list): - merged_dict[key] = [merged_dict[key]] - # Append the new value to the list - merged_dict[key].append(value) - else: - # If the key does not exist, add it to the merged dictionary - merged_dict[key] = value - - return merged_dict - - -def save_to_api(object_list: List[BaseModel]) -> None: - """Convert bia_data_model to bia_integrator_api.model and persist to API""" - - for obj in object_list: - api_obj = getattr(api_models, obj.model.type_name).model_validate_json( - obj.model_dump_json() - ) - # First try to retrieve object - try: - api_get_method = f"get_{to_snake(obj.model.type_name)}" - api_copy_of_obj = getattr(api_client, api_get_method)(api_obj.uuid) - except NotFoundException: - api_copy_of_obj = None - - if api_obj == api_copy_of_obj: - message = f"Not writing object with uuid: {obj.uuid} and type: {obj.model.type_name} to API because an identical copy of object exists in API" - logger.warning(message) - continue - elif api_copy_of_obj: - api_obj.version = api_copy_of_obj.version + 1 - - api_creation_method = f"post_{to_snake(obj.model.type_name)}" - getattr(api_client, api_creation_method)(api_obj) - logger.debug(f"persisted {obj.uuid} of type {obj.model.type_name} to API") diff --git a/bia-converter-light/file_references_to_convert.tsv b/bia-converter-light/file_references_to_convert.tsv deleted file mode 100644 index 7184086d..00000000 --- a/bia-converter-light/file_references_to_convert.tsv +++ /dev/null @@ -1,16 +0,0 @@ -accession_id study_uuid name file_reference_uuid size_in_bytes size_human_readable -S-BIAD1444 cfd7d1f1-d215-4311-9d92-ac77b18e73f6 WT/WT-028.tif b3a888d4-d20c-4ea8-bdee-423a6727c957 522428 510.2KiB -S-BIAD1444 cfd7d1f1-d215-4311-9d92-ac77b18e73f6 c8-1_set1/c8-1_set1-018.tif 14c72b7e-e05c-4e68-9d30-605e6a62e02d 515312 503.2KiB -S-BIAD1444 cfd7d1f1-d215-4311-9d92-ac77b18e73f6 c8-1_set1/c8-1_set1-010.tif c178922c-8525-4922-aa2e-90dfcf84e30a 473364 462.3KiB -S-BIAD1444 cfd7d1f1-d215-4311-9d92-ac77b18e73f6 c8-1_set2/c8-1_set2-026.tif 01f912ae-2919-44c4-bec5-b9c2222a2feb 428224 418.2KiB -S-BIAD1444 cfd7d1f1-d215-4311-9d92-ac77b18e73f6 WT/WT-008.tif b35cc0d0-6560-4cc8-bff2-c0c7a58693a0 406184 396.7KiB -S-BIAD1266 a1d3488b-a325-4992-911a-2404fb7cd390 Fig.5A-C_N2_miRCon_Sham_n10_t30 min.Project Maximum Z_XYc561.tif 8b307eb1-5b61-4eb5-90c1-454c3bb122ec 524925 512.6KiB -S-BIAD1266 a1d3488b-a325-4992-911a-2404fb7cd390 Fig.7D-F_N2_CsA_Sham_n9_t20 min.Project Maximum Z_XYc640.tif 26dcc756-1ad5-4b60-a6b5-cd289880ad65 524923 512.6KiB -S-BIAD1266 a1d3488b-a325-4992-911a-2404fb7cd390 Fig.5A-C_N2_miRCon_Sham_n8_t10 min.Project Maximum Z_XYc488.tif 15ad9853-4813-430e-903b-8e187a75688e 524923 512.6KiB -S-BIAD1266 a1d3488b-a325-4992-911a-2404fb7cd390 Fig.5A-C_N1_miR153_iLTP_n1_t30 min.Project Maximum Z_XYc488.tif 49fbfaf4-ed84-4d98-a876-799c7efb3cd3 524923 512.6KiB -S-BIAD1266 a1d3488b-a325-4992-911a-2404fb7cd390 Fig.2A-C_N3_miRCon_n40.Project Maximum Z_XYc488.tif aa51e605-e4ac-43a7-9ace-25c2694e1d42 524917 512.6KiB -S-BIAD1285 55ecc13b-5896-434f-893e-88d992f21648 SR1482_WSIs/SR1482_40X_HE_T333_02.czi f7d4f019-e738-42f9-8687-991b73e71218 6589973632 6.1GiB -S-BIAD1285 55ecc13b-5896-434f-893e-88d992f21648 SR1482_WSIs/SR1482_40X_HE_T120_02.czi a57ec624-4dd7-4130-8911-d2777ae27852 5860090976 5.5GiB -S-BIAD1285 55ecc13b-5896-434f-893e-88d992f21648 SR386_WSIs/SR386_40X_HE_T211_01.czi 8e436a59-d7a0-4dce-9319-8b96497854b2 4680591136 4.4GiB -S-BIAD1285 55ecc13b-5896-434f-893e-88d992f21648 SR386_WSIs/SR386_40X_HE_T081_01.czi ff408246-ce37-4204-9418-7484c61dd534 2315012512 2.2GiB -S-BIAD1285 55ecc13b-5896-434f-893e-88d992f21648 SR1482_WSIs/SR1482_40X_HE_T276_01.czi 9c1b3a3d-7f21-472c-96b8-93d231813564 961088224 916.6MiB diff --git a/bia-converter-light/pyproject.toml b/bia-converter-light/pyproject.toml deleted file mode 100644 index ce6bad62..00000000 --- a/bia-converter-light/pyproject.toml +++ /dev/null @@ -1,40 +0,0 @@ -[tool.poetry] -name = "bia-converter-light" -version = "0.1.0" -description = "BIA simple approach to creation of images associated with image representations" -authors = ["Kola Babalola "] -license = "Apache Software License 2.0" -readme = "README.md" -packages = [{include = "bia_converter_light"}] - -[tool.poetry.dependencies] -python = "^3.10,<3.12" -requests = "^2.31.0" -pytest = "^7.0" -bia-shared-datamodels = { path = "../bia-shared-datamodels", develop = true } -bia-integrator-api = { path = "../clients/python", develop = true } -bia-ingest = { path = "../bia-ingest", develop = true } -bia-test-data = { path = "../bia-test-data", develop = true } -bia-assign-image = { path = "../bia-assign-image", develop = true } -typer = "^0.12.3" -typing-extensions = "^4.12.2" -pydantic-settings = "^2.3.4" - -# Dependencies for image conversion and upload to Embassy S3 -zarr = "^2.18.3" -dask = "^2024.8.2" -microfilm = "^0.2.1" -setuptools = "^74.1.2" -fsspec = "^2024.9.0" -s3fs = "^2024.9.0" - -[tool.poetry.scripts] -bia-converter-light = "bia_converter_light.cli:app" - - -[tool.poetry.group.dev.dependencies] -ipython = "^8.22.1" - -[build-system] -requires = ["poetry-core"] -build-backend = "poetry.core.masonry.api" diff --git a/bia-converter-light/scripts/print_details_of_convertible_images.py b/bia-converter-light/scripts/print_details_of_convertible_images.py deleted file mode 100644 index 0bc32550..00000000 --- a/bia-converter-light/scripts/print_details_of_convertible_images.py +++ /dev/null @@ -1,70 +0,0 @@ -"""Ad hoc script to select images to convert based on size""" - -import typer -from bia_converter_light.config import api_client -from bia_converter_light.utils import in_bioformats_single_file_formats_list - -PAGE_SIZE_DEFAULT = 10000000 - - -def sizeof_fmt(num, suffix="B"): - for unit in ["", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi"]: - if abs(num) < 1024.0: - return f"{num:3.1f}{unit}{suffix}" - num /= 1024.0 - return f"{num:.1f}Yi{suffix}" - - -app = typer.Typer() - - -def get_details_of_images_that_can_be_converted(accession_id: str): - study = api_client.search_study_by_accession(accession_id) - assert study - datasets = api_client.get_dataset_linking_study( - study.uuid, page_size=PAGE_SIZE_DEFAULT - ) - file_references = [] - for dataset in datasets: - file_references.extend( - api_client.get_file_reference_linking_dataset( - dataset.uuid, PAGE_SIZE_DEFAULT - ) - ) - - convertible_file_references = [ - { - "accession_id": accession_id, - "study_uuid": study.uuid, - "name": fr.file_path, - "uuid": fr.uuid, - "size_in_bytes": fr.size_in_bytes, - "size_human_readable": sizeof_fmt(fr.size_in_bytes), - } - for fr in file_references - if in_bioformats_single_file_formats_list(fr.file_path) - ] - - convertible_file_references = sorted( - convertible_file_references, key=lambda fr: fr["size_in_bytes"], reverse=True - ) - return convertible_file_references - - -@app.command() -def print_details_of_convertible_images(accession_id: str): - """ - Print details of images that can be converted. - """ - - convertible_file_references = get_details_of_images_that_can_be_converted( - accession_id - ) - for cfr in convertible_file_references: - print( - f"{cfr['accession_id']}\t{cfr['study_uuid']}\t{cfr['name']}\t{cfr['uuid']}\t{cfr['size_in_bytes']}\t{cfr['size_human_readable']}" - ) - - -if __name__ == "__main__": - app() diff --git a/bia-converter-light/test/data/test_files_for_study_component_2/im06.png b/bia-converter-light/test/data/test_files_for_study_component_2/im06.png deleted file mode 100644 index fd76f6cf6ac1324d3d6440ea781be631de3ef515..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 24649 zcmeI)X-JcC0LStFEZS5YYO{0Humg>ll0gSb<(zJmN0+8Wc5K%2XssucN;I#EPKAWT z%8`a$zOlf>4oN%7pe!rV!U{A(E4$Frwimtk=c|g};Mw5eJGOnE-`42uaGT&J zgzzvL!j?H6O^zc*;o`VommEzI9hyqC|m<{2t`Xr)iWj=-y5+)HLp_17NBNv^Ly+IT+qV0D$UDo67}*74zW zd8hBRfdK!@`#-&|ncUMI(%mi_2+u#4)H1hhOL1Y;^t_a`m5S@Wd47F?1=$TtGNV+> zKdKYIfBf`S+f-fm=-0em$LsEi(+h(iNVk03HK*DyOHKU^Ua>{0j_n;wbG3$_cXzya zH>XLz?^kfp2Gir-2CohF=k+#ub&=gW+Ut&L<(WNM&&>I~V>Vq`w(-*L&tnUEU*4b9 zVhJy7@GaJaW*?exzw%~iY3yOuXv@p!j34{dHw@YnYs`VRnPqN1MQ`MTxv$zv^6$CM z@h=s%i7EXX$0@p99@eI2O>Rx^9jOQ&5tq=Km@0x!m$$Sgd{A`08l|mjEvb{Q=@z-s zwI;(6RasYnD*Jm--&u8_)&0xGtJki4+tw#cl^2>ltX9P#mH+OxH*NN&_JNQm?|bAa zN?ZA2Z^_khSzN7#`0YYyr#KH;M#X}3$4R+ljMU3J6>4R`yw&|4w;iX(OZrukZfk0) zIZYBen|ZxtHfeTRq|KTLV`NO+UgcOJG{P9RXhnuh=DD`VE*l&ibUcr@p$Is?AiNDx zfLKF>a%RLL79aqmTo$na0U+hFhy@4$DVIeoKmbU&EMfryK+0ti3lIQOE{j-z0FZK7 z!~z6>l*=L(AONIX7O?;UAmy@%1qc8smqjc<07!?wtf%{vr9Zy~0ABEaNrw;jzX$-I zgIL4@1b~#wA{HP3q+AxU00AK7vWNu;04bM6EIFfB=wk zS;PVafRxK379aqmTo$na0U+hFhy@4$DVOzcU^Ol*=L(AONIX7O?;UAbsaK;^f+sEq>0Y81+$MRiW#0egpnp@Z$gg diff --git a/bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/.zattrs b/bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/.zattrs deleted file mode 100644 index 80da91b3..00000000 --- a/bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/.zattrs +++ /dev/null @@ -1,3 +0,0 @@ -{ - "bioformats2raw.layout" : 3 -} diff --git a/bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/.zgroup b/bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/.zgroup deleted file mode 100644 index 03087769..00000000 --- a/bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/.zgroup +++ /dev/null @@ -1,3 +0,0 @@ -{ - "zarr_format" : 2 -} diff --git a/bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/0/.zattrs b/bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/0/.zattrs deleted file mode 100644 index e5c3d03c..00000000 --- a/bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/0/.zattrs +++ /dev/null @@ -1,80 +0,0 @@ -{ - "multiscales" : [ { - "metadata" : { - "method" : "loci.common.image.SimpleImageScaler", - "version" : "Bio-Formats 6.13.0" - }, - "axes" : [ { - "name" : "t", - "type" : "time" - }, { - "name" : "c", - "type" : "channel" - }, { - "name" : "z", - "type" : "space" - }, { - "name" : "y", - "type" : "space" - }, { - "name" : "x", - "type" : "space" - } ], - "name" : "im06.png", - "datasets" : [ { - "path" : "0", - "coordinateTransformations" : [ { - "scale" : [ 1.0, 1.0, 1.0, 1.0, 1.0 ], - "type" : "scale" - } ] - } ], - "version" : "0.4" - } ], - "omero" : { - "channels" : [ { - "color" : "FF0000", - "coefficient" : 1, - "active" : true, - "label" : "Channel 0", - "window" : { - "min" : 255.0, - "max" : 255.0, - "start" : 255.0, - "end" : 255.0 - }, - "family" : "linear", - "inverted" : false - }, { - "color" : "00FF00", - "coefficient" : 1, - "active" : true, - "label" : "Channel 1", - "window" : { - "min" : 255.0, - "max" : 255.0, - "start" : 255.0, - "end" : 255.0 - }, - "family" : "linear", - "inverted" : false - }, { - "color" : "0000FF", - "coefficient" : 1, - "active" : true, - "label" : "Channel 2", - "window" : { - "min" : 255.0, - "max" : 255.0, - "start" : 255.0, - "end" : 255.0 - }, - "family" : "linear", - "inverted" : false - } ], - "rdefs" : { - "defaultT" : 0, - "model" : "color", - "defaultZ" : 0 - } - } -} diff --git a/bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/0/.zgroup b/bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/0/.zgroup deleted file mode 100644 index 03087769..00000000 --- a/bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/0/.zgroup +++ /dev/null @@ -1,3 +0,0 @@ -{ - "zarr_format" : 2 -} diff --git a/bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/0/0/.zarray b/bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/0/0/.zarray deleted file mode 100644 index e03ace41..00000000 --- a/bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/0/0/.zarray +++ /dev/null @@ -1,17 +0,0 @@ -{ - "chunks" : [ 1, 1, 1, 80, 100 ], - "compressor" : { - "clevel" : 5, - "blocksize" : 0, - "shuffle" : 1, - "cname" : "lz4", - "id" : "blosc" - }, - "dtype" : "|u1", - "fill_value" : 0, - "filters" : null, - "order" : "C", - "shape" : [ 1, 3, 1, 80, 100 ], - "dimension_separator" : "/", - "zarr_format" : 2 -} diff --git a/bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/0/0/0/0/0/0/0 b/bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/0/0/0/0/0/0/0 deleted file mode 100644 index 9b845ff547342afb60e6684e5f956e74755ddb85..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 66 kcmZQ#RAh9JXJ7zfCk6%v5g^tAV)_4!4FB;1w*as_09Irs^#A|> diff --git a/bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/0/0/0/1/0/0/0 b/bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/0/0/0/1/0/0/0 deleted file mode 100644 index 9b845ff547342afb60e6684e5f956e74755ddb85..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 66 kcmZQ#RAh9JXJ7zfCk6%v5g^tAV)_4!4FB;1w*as_09Irs^#A|> diff --git a/bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/0/0/0/2/0/0/0 b/bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/0/0/0/2/0/0/0 deleted file mode 100644 index 9b845ff547342afb60e6684e5f956e74755ddb85..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 66 kcmZQ#RAh9JXJ7zfCk6%v5g^tAV)_4!4FB;1w*as_09Irs^#A|> diff --git a/bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/OME/.zattrs b/bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/OME/.zattrs deleted file mode 100644 index 6837bf36..00000000 --- a/bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/OME/.zattrs +++ /dev/null @@ -1,3 +0,0 @@ -{ - "series" : [ "0" ] -} diff --git a/bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/OME/.zgroup b/bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/OME/.zgroup deleted file mode 100644 index 03087769..00000000 --- a/bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/OME/.zgroup +++ /dev/null @@ -1,3 +0,0 @@ -{ - "zarr_format" : 2 -} diff --git a/bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/OME/METADATA.ome.xml b/bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/OME/METADATA.ome.xml deleted file mode 100644 index 5fb45870..00000000 --- a/bia-converter-light/test/data/test_image_representations/study_component1/im06.ome.zarr/OME/METADATA.ome.xml +++ /dev/null @@ -1 +0,0 @@ - diff --git a/bia-converter-light/test/test_cli.py b/bia-converter-light/test/test_cli.py deleted file mode 100644 index 229187e4..00000000 --- a/bia-converter-light/test/test_cli.py +++ /dev/null @@ -1,340 +0,0 @@ -from pathlib import Path -import shutil -import pytest -from typer.testing import CliRunner -from unittest.mock import MagicMock - -from bia_integrator_api import PrivateApi -from bia_shared_datamodels import bia_data_model -from bia_test_data.mock_objects.mock_object_constants import accession_id -from bia_test_data.mock_objects import ( - mock_image_representation, - mock_image, - mock_dataset, - mock_file_reference, -) -from bia_ingest.persistence_strategy import ( - persistence_strategy_factory, - PersistenceStrategy, -) -from bia_converter_light import conversion -from bia_converter_light import cli, utils -from bia_converter_light.config import settings - - -@pytest.fixture -def runner() -> CliRunner: - return CliRunner(mix_stderr=False) - - -@pytest.fixture -def output_dir_base(tmpdir, monkeypatch): - odb = Path(tmpdir) - - monkeypatch.setattr( - settings, - "cache_root_dirpath", - odb / ".cache", - ) - - # Copy file to be convertered to cache so it does not need to be downloaded - file_reference = mock_file_reference.get_file_reference()[0] - src_path = ( - Path(__file__).parent / "data" / "test_files_for_study_component_2" / "im06.png" - ) - dest_dir = odb / ".cache" / "files" - if not dest_dir.is_dir(): - dest_dir.mkdir(parents=True) - dest_path = dest_dir / f"{file_reference.uuid}.png" - shutil.copy(src_path, dest_path) - - return odb - - -@pytest.fixture -def persister(output_dir_base) -> PersistenceStrategy: - persister = persistence_strategy_factory( - persistence_mode="disk", - output_dir_base=str(output_dir_base), - accession_id=accession_id, - ) - return persister - - -@pytest.fixture -def dataset(persister) -> bia_data_model.Dataset: - ds = mock_dataset.get_dataset()[1] - persister.persist( - [ - ds, - ] - ) - - return ds - - -@pytest.fixture -def image(persister) -> bia_data_model.Image: - im = mock_image.get_image_with_one_file_reference() - persister.persist( - [ - im, - ] - ) - - return im - - -@pytest.fixture -def file_reference(persister) -> bia_data_model.FileReference: - file_ref = mock_file_reference.get_file_reference()[0] - persister.persist( - [ - file_ref, - ] - ) - - return file_ref - - -@pytest.fixture -def conversion_details_path(output_dir_base, dataset, file_reference) -> Path: - """Write tsv files with details of file references to convert""" - - path_to_conversion_details = output_dir_base / "file_references_to_convert.tsv" - study_uuid = f"{dataset.submitted_in_study_uuid}" - # Get details of file references in study component 2 of mock study - size_human_readable = f"{file_reference.size_in_bytes}B" - conversion_details = "\t".join( - [ - "accession_id", - "study_uuid", - "name", - "file_reference_uuid", - "size_in_bytes", - "size_human_readable", - ] - ) - conversion_details += "\n" - conversion_details += "\t".join( - [ - accession_id, - study_uuid, - file_reference.file_path, - f"{file_reference.uuid}", - f"{file_reference.size_in_bytes}", - size_human_readable, - ] - ) - path_to_conversion_details.write_text(conversion_details) - return path_to_conversion_details - - -@pytest.fixture -def mock_api_client(monkeypatch, persister, output_dir_base): - """Mock api_client functions for getting and saving model objects used in test""" - - def mock_get_image(uuid): - return persister.fetch_by_uuid( - [ - uuid, - ], - bia_data_model.Image, - )[0] - - def mock_get_image_representation(uuid): - return persister.fetch_by_uuid( - [ - uuid, - ], - bia_data_model.ImageRepresentation, - )[0] - - def mock_get_dataset(uuid): - return persister.fetch_by_uuid( - [ - uuid, - ], - bia_data_model.Dataset, - )[0] - - def mock_get_file_reference(uuid): - return persister.fetch_by_uuid( - [ - uuid, - ], - bia_data_model.FileReference, - )[0] - - def mock_get_image_representation_linking_image(image_uuid, page_size): - image_representations_path = output_dir_base / "image_representation" - representation_uuids = [ - p.stem for p in image_representations_path.rglob("*/*.json") - ] - all_image_representations = persister.fetch_by_uuid( - representation_uuids, bia_data_model.ImageRepresentation - ) - - return [ - r - for r in all_image_representations - if f"{r.representation_of_uuid}" == f"{image_uuid}" - ] - - def mock_post_object(obj): - persister.persist( - [ - obj, - ] - ) - - mock_api_client_object = MagicMock() - mock_api_client_object.get_image_representation = mock_get_image_representation - mock_api_client_object.get_dataset = mock_get_dataset - mock_api_client_object.get_file_reference = mock_get_file_reference - mock_api_client_object.get_image = mock_get_image - mock_api_client_object.get_image_representation_linking_image = ( - mock_get_image_representation_linking_image - ) - mock_api_client_object.post_dataset = mock_post_object - mock_api_client_object.post_image_representation = mock_post_object - mock_api_client_object.__class__ = PrivateApi - monkeypatch.setattr( - conversion, - "api_client", - mock_api_client_object, - ) - monkeypatch.setattr( - cli, - "api_client", - mock_api_client_object, - ) - monkeypatch.setattr( - utils, - "api_client", - mock_api_client_object, - ) - return mock_api_client_object - - -@pytest.fixture -def mock_copy_local_to_s3(monkeypatch): - """Return s3 url without actual copy to s3""" - - def _mock_copy_local_to_s3(src_fpath, dst_key): - endpoint_url = settings.endpoint_url - # bucket_name = settings.bucket_name - bucket_name = "test-bucket" - - return f"{endpoint_url}/{bucket_name}/{dst_key}" - - monkeypatch.setattr( - conversion, - "copy_local_to_s3", - _mock_copy_local_to_s3, - ) - - -def test_cli_convert_image( - runner, - output_dir_base, - conversion_details_path, - mock_api_client, - image, - file_reference, - mock_copy_local_to_s3, - persister, -): - image_representation = ( - mock_image_representation.get_image_representation_of_interactive_display() - ) - persister.persist( - [ - image_representation, - ] - ) - - result = runner.invoke( - cli.app, - [ - "convert-image", - # "--accession-ids", - # accession_id, - "--conversion-details-path", - conversion_details_path, - ], - catch_exceptions=False, - ) - - assert result.exit_code == 0 - - # Check all zarr and pngs were created - # TODO: Compare all output files vs expected? - for use_type in ( - "thumbnail", - "static_display", - "interactive_display", - ): - func = f"get_image_representation_of_{use_type}" - representation = getattr(mock_image_representation, func)() - - if use_type == "interactive_display": - expected_path = ( - output_dir_base / ".cache" / "zarr" / f"{representation.uuid}.ome.zarr" - ) - assert expected_path.is_dir(), f"Did not find expected dir for {use_type}" - else: - expected_path = ( - output_dir_base - / ".cache" - / "other_converted_images" - / f"{representation.uuid}.png" - ) - assert expected_path.is_file(), f"Did not find expected file for {use_type}" - - # TODO: Check the file_uris of the created image representations properly - created_representation = persister.fetch_by_uuid( - [representation.uuid], - bia_data_model.ImageRepresentation, - )[0] - assert created_representation.file_uri[0].startswith("http") - assert f"{representation.uuid}" in created_representation.file_uri[0] - - -def test_cli_update_example_image_uri_for_dataset( - runner, mock_api_client, output_dir_base, persister, dataset -): - image_representation = ( - mock_image_representation.get_image_representation_of_static_display() - ) - persister.persist( - [ - image_representation, - ] - ) - - bia_image = mock_image.get_image_with_one_file_reference() - persister.persist( - [ - bia_image, - ] - ) - - result = runner.invoke( - cli.app, - [ - "update-example-image-uri-for-dataset", - str(image_representation.uuid), - ], - ) - - assert result.exit_code == 0 - # cli.update_example_image_uri_for_dataset(image_representation.uuid) - modified_dataset = persister.fetch_by_uuid( - [ - dataset.uuid, - ], - bia_data_model.Dataset, - )[0] - assert dataset.example_image_uri == [] - assert modified_dataset.example_image_uri == image_representation.file_uri diff --git a/bia-converter-light/test/test_conversion_util_funcs.py b/bia-converter-light/test/test_conversion_util_funcs.py deleted file mode 100644 index ed378f7d..00000000 --- a/bia-converter-light/test/test_conversion_util_funcs.py +++ /dev/null @@ -1,14 +0,0 @@ -from bia_converter_light import utils - - -def test_merge_dicts(): - dict_list = [ - {"key1": "value1", "key2": "value2"}, - {"key1": "value3", "key4": "value4"}, - ] - - assert utils.merge_dicts(dict_list) == { - "key1": ["value1", "value3"], - "key2": "value2", - "key4": "value4", - } diff --git a/bia-converter-light/test/test_create_thumbnails.py b/bia-converter-light/test/test_create_thumbnails.py deleted file mode 100644 index 55e4f1ce..00000000 --- a/bia-converter-light/test/test_create_thumbnails.py +++ /dev/null @@ -1,16 +0,0 @@ -from pathlib import Path -from bia_converter_light.rendering import generate_padded_thumbnail_from_ngff_uri - - -def test_generate_padded_thumbnail_from_ngff_uri(): - """Test function runs without errors. NOT that correct values produced""" - - local_path_to_zarr = ( - Path(__file__).parent - / "data" - / "test_image_representations" - / "study_component1" - / "im06.ome.zarr" - ) - thumbnail = generate_padded_thumbnail_from_ngff_uri(local_path_to_zarr) - assert thumbnail.size == (256, 256)